def main(filein, fileout): f = codecs.open(filein, "r", "utf-8") fout = codecs.open(fileout, "w", "utf-8") nin = 0 nout = 0 for line in f: line = line.rstrip('\r\n') nin = nin + 1 if line == '': continue if nin > 10: #print "DEBUG. stop after",nin #break pass rec = Input(line) subanta = rec.hstem genderstr = rec.hgenderstr words1 = rec.mwkey2 # gaRi-mat dbg = False #print "dictstr=",dictstr,"\ndictwords=",dictwords for g in genderstr: try: (key1, key2) = deduce_gender_stem(subanta, words1, g, genderstr, rec.mwtype) outputs = s_file_init_alt_helper(key1, g, key2, dbg=dbg) except (NameError, MyException) as err: print "\ncase=", nin, "line=", line print err outputs = None if isinstance(outputs, list): #print "outputs is a list of length",len(outputs) pass elif outputs == None: outputs = [] elif ';' in outputs: ##outputs.startswith(':adj'): outputs = outputs.split(';') else: #print "outputs is not a list" outputs = [outputs] for output in outputs: # change form so comparable with huet_noun_tables.txt (p0, p1, p2, p3, decl) = output.split(':') output1 = "%s %s:%s" % (subanta, g, decl) fout.write("%s\n" % output1) nout = nout + 1 if len(outputs) == 0: decl0 = ['nil' for i in xrange(0, 24)] decl = '[' + (' '.join(decl0)) + ']' output1 = "%s %s:%s" % (subanta, g, decl) fout.write("%s\n" % output1) nout = nout + 1 #key1=subanta #key2 = words1 #print "Warning",key1,key2,":",g f.close() fout.close() print nout, 'lines written to', fileout
def main(filein,fileout): f = codecs.open(filein,"r","utf-8") fout = codecs.open(fileout,"w","utf-8") nin = 0 nout=0 for line in f: line = line.rstrip('\r\n') nin = nin + 1 if line == '': continue if nin > 10: #print "DEBUG. stop after",nin #break pass rec = Input(line) subanta = rec.hstem genderstr = rec.hgenderstr words1 = rec.mwkey2 # gaRi-mat dbg=False #print "dictstr=",dictstr,"\ndictwords=",dictwords for g in genderstr: try: (key1,key2) = deduce_gender_stem(subanta,words1,g,genderstr,rec.mwtype) outputs = s_file_init_alt_helper(key1,g,key2,dbg=dbg) except (NameError,MyException) as err: print "\ncase=",nin,"line=",line print err outputs = None if isinstance(outputs,list): #print "outputs is a list of length",len(outputs) pass elif outputs == None: outputs = [] elif ';' in outputs: ##outputs.startswith(':adj'): outputs = outputs.split(';') else: #print "outputs is not a list" outputs = [outputs] for output in outputs: # change form so comparable with huet_noun_tables.txt (p0,p1,p2,p3,decl) = output.split(':') output1 = "%s %s:%s" %(subanta,g,decl) fout.write("%s\n" % output1) nout = nout + 1 if len(outputs) == 0: decl0 = ['nil' for i in xrange(0,24)] decl = '[' + (' '.join(decl0)) + ']' output1 = "%s %s:%s" %(subanta,g,decl) fout.write("%s\n" % output1) nout = nout + 1 #key1=subanta #key2 = words1 #print "Warning",key1,key2,":",g f.close() fout.close() print nout,'lines written to',fileout
def s_file_init_alt1(intab,indir,outtab,outdir,n1,n2): nin = 0 nout = 0 filein = "%s/%s" %(indir,intab) fileout = "%s/%s" %(outdir,outtab) f = codecs.open(filein,"r","utf-8") fout = codecs.open(fileout,"w","utf-8") for line in f: line = line.rstrip('\r\n') nin = nin + 1 if line == '': continue if not ((n1 <= nin) and (nin <= n2)): continue # Example. line = gaRimat : S m : <MW=gaRi-mat,83017,1> #print line words = line.split(':') subanta = words[0].strip() # gaRimat try: fg = re.findall(r"[^ ]+",words[1]) # ["S","m"] except: print "Case",nin,"Problem with fg.Line=",line exit(1) g = fg[1] # gender is what current version of s_file_init_alt_helper expects #print "fg=",fg dictstr = words[2].strip() # <MW=gaRi-mat,83017,1> dictwords = re.findall(r"[^ <=,>]+",dictstr) words1 = dictwords[1] # gaRi-mat dbg=False #print "dictstr=",dictstr,"\ndictwords=",dictwords try: outputs = s_file_init_alt_helper(subanta,g,words1,dbg=dbg) except (NameError,MyException) as err: print "\ncase=",nin,"line=",line print err outputs = None if isinstance(outputs,list): #print "outputs is a list of length",len(outputs) pass elif outputs == None: outputs = [] elif ';' in outputs: ##outputs.startswith(':adj'): outputs = outputs.split(';') else: #print "outputs is not a list" outputs = [outputs] for output in outputs: fout.write("%s\n" % output) nout = nout + 1 if len(outputs) == 0: key1=subanta key2 = words1 print "Warning",key1,fg,key2,":",g f.close() fout.close()
def s_file_init_alt1(intab, indir, outtab, outdir, n1, n2): nin = 0 nout = 0 filein = "%s/%s" % (indir, intab) fileout = "%s/%s" % (outdir, outtab) f = codecs.open(filein, "r", "utf-8") fout = codecs.open(fileout, "w", "utf-8") for line in f: line = line.rstrip('\r\n') nin = nin + 1 if line == '': continue if not ((n1 <= nin) and (nin <= n2)): continue # Example. line = gaRimat : S m : <MW=gaRi-mat,83017,1> #print line words = line.split(':') subanta = words[0].strip() # gaRimat try: fg = re.findall(r"[^ ]+", words[1]) # ["S","m"] except: print "Case", nin, "Problem with fg.Line=", line exit(1) g = fg[1] # gender is what current version of s_file_init_alt_helper expects #print "fg=",fg dictstr = words[2].strip() # <MW=gaRi-mat,83017,1> dictwords = re.findall(r"[^ <=,>]+", dictstr) words1 = dictwords[1] # gaRi-mat dbg = False #print "dictstr=",dictstr,"\ndictwords=",dictwords try: outputs = s_file_init_alt_helper(subanta, g, words1, dbg=dbg) except (NameError, MyException) as err: print "\ncase=", nin, "line=", line print err outputs = None if isinstance(outputs, list): #print "outputs is a list of length",len(outputs) pass elif outputs == None: outputs = [] elif ';' in outputs: ##outputs.startswith(':adj'): outputs = outputs.split(';') else: #print "outputs is not a list" outputs = [outputs] for output in outputs: fout.write("%s\n" % output) nout = nout + 1 if len(outputs) == 0: key1 = subanta key2 = words1 print "Warning", key1, fg, key2, ":", g f.close() fout.close()