예제 #1
0
def main(filein, fileout):
    f = codecs.open(filein, "r", "utf-8")
    fout = codecs.open(fileout, "w", "utf-8")
    nin = 0
    nout = 0
    for line in f:
        line = line.rstrip('\r\n')
        nin = nin + 1
        if line == '':
            continue
        if nin > 10:
            #print "DEBUG. stop after",nin
            #break
            pass
        rec = Input(line)
        subanta = rec.hstem
        genderstr = rec.hgenderstr
        words1 = rec.mwkey2  # gaRi-mat
        dbg = False
        #print "dictstr=",dictstr,"\ndictwords=",dictwords
        for g in genderstr:
            try:
                (key1, key2) = deduce_gender_stem(subanta, words1, g,
                                                  genderstr, rec.mwtype)
                outputs = s_file_init_alt_helper(key1, g, key2, dbg=dbg)
            except (NameError, MyException) as err:
                print "\ncase=", nin, "line=", line
                print err
                outputs = None
            if isinstance(outputs, list):
                #print "outputs is a list of length",len(outputs)
                pass
            elif outputs == None:
                outputs = []
            elif ';' in outputs:  ##outputs.startswith(':adj'):
                outputs = outputs.split(';')
            else:
                #print "outputs is not a list"
                outputs = [outputs]
            for output in outputs:
                # change form so comparable with huet_noun_tables.txt
                (p0, p1, p2, p3, decl) = output.split(':')
                output1 = "%s %s:%s" % (subanta, g, decl)
                fout.write("%s\n" % output1)
                nout = nout + 1
            if len(outputs) == 0:
                decl0 = ['nil' for i in xrange(0, 24)]
                decl = '[' + (' '.join(decl0)) + ']'
                output1 = "%s %s:%s" % (subanta, g, decl)
                fout.write("%s\n" % output1)
                nout = nout + 1

                #key1=subanta
                #key2 = words1
                #print "Warning",key1,key2,":",g
    f.close()
    fout.close()
    print nout, 'lines written to', fileout
예제 #2
0
def main(filein,fileout):
 f = codecs.open(filein,"r","utf-8")
 fout = codecs.open(fileout,"w","utf-8")
 nin = 0
 nout=0
 for line in f:
  line = line.rstrip('\r\n')
  nin = nin + 1
  if line == '':
   continue
  if nin > 10:
   #print "DEBUG. stop after",nin
   #break
   pass
  rec = Input(line)
  subanta = rec.hstem
  genderstr = rec.hgenderstr
  words1 = rec.mwkey2 # gaRi-mat
  dbg=False
  #print "dictstr=",dictstr,"\ndictwords=",dictwords
  for g in genderstr:
   try:
    (key1,key2) = deduce_gender_stem(subanta,words1,g,genderstr,rec.mwtype)
    outputs = s_file_init_alt_helper(key1,g,key2,dbg=dbg)
   except (NameError,MyException) as err:
    print "\ncase=",nin,"line=",line
    print err
    outputs = None
   if isinstance(outputs,list):
    #print "outputs is a list of length",len(outputs)
    pass
   elif outputs == None:
    outputs = []
   elif  ';' in outputs: ##outputs.startswith(':adj'):
    outputs = outputs.split(';')
   else:
    #print "outputs is not a list"
    outputs = [outputs]
   for output in outputs:
    # change form so comparable with huet_noun_tables.txt
    (p0,p1,p2,p3,decl) = output.split(':')
    output1 = "%s %s:%s" %(subanta,g,decl)
    fout.write("%s\n" % output1)
    nout = nout + 1
   if len(outputs) == 0:
    decl0 = ['nil' for i in xrange(0,24)]
    decl = '[' + (' '.join(decl0)) + ']'
    output1 = "%s %s:%s" %(subanta,g,decl)
    fout.write("%s\n" % output1)
    nout = nout + 1

    #key1=subanta
    #key2 = words1
    #print "Warning",key1,key2,":",g
 f.close()
 fout.close()
 print nout,'lines written to',fileout
def s_file_init_alt1(intab,indir,outtab,outdir,n1,n2):
 nin = 0
 nout = 0
 filein = "%s/%s" %(indir,intab)
 fileout = "%s/%s" %(outdir,outtab)
 f = codecs.open(filein,"r","utf-8")
 fout = codecs.open(fileout,"w","utf-8")

 for line in f:
  line = line.rstrip('\r\n')
  nin = nin + 1
  if line == '':
   continue
  if not ((n1 <= nin) and (nin <= n2)):
   continue
  # Example. line = gaRimat : S m : <MW=gaRi-mat,83017,1>
  #print line
  words = line.split(':')
  subanta = words[0].strip() # gaRimat
  try:
   fg = re.findall(r"[^ ]+",words[1]) # ["S","m"]
  except:
   print "Case",nin,"Problem with fg.Line=",line
   exit(1)
  g = fg[1] 
  #  gender is what current version of s_file_init_alt_helper expects
  #print "fg=",fg
  dictstr = words[2].strip() # <MW=gaRi-mat,83017,1>
  dictwords = re.findall(r"[^ <=,>]+",dictstr)
  words1 = dictwords[1] # gaRi-mat
  dbg=False
  #print "dictstr=",dictstr,"\ndictwords=",dictwords
  try:
   outputs = s_file_init_alt_helper(subanta,g,words1,dbg=dbg)
  except (NameError,MyException) as err:
   print "\ncase=",nin,"line=",line
   print err
   outputs = None
  if isinstance(outputs,list):
   #print "outputs is a list of length",len(outputs)
   pass
  elif outputs == None:
   outputs = []
  elif  ';' in outputs: ##outputs.startswith(':adj'):
   outputs = outputs.split(';')
  else:
   #print "outputs is not a list"
   outputs = [outputs]
  for output in outputs:
   fout.write("%s\n" % output)
   nout = nout + 1
  if len(outputs) == 0:
   key1=subanta
   key2 = words1
   print "Warning",key1,fg,key2,":",g
 f.close()
 fout.close()
def s_file_init_alt1(intab, indir, outtab, outdir, n1, n2):
    nin = 0
    nout = 0
    filein = "%s/%s" % (indir, intab)
    fileout = "%s/%s" % (outdir, outtab)
    f = codecs.open(filein, "r", "utf-8")
    fout = codecs.open(fileout, "w", "utf-8")

    for line in f:
        line = line.rstrip('\r\n')
        nin = nin + 1
        if line == '':
            continue
        if not ((n1 <= nin) and (nin <= n2)):
            continue
        # Example. line = gaRimat : S m : <MW=gaRi-mat,83017,1>
        #print line
        words = line.split(':')
        subanta = words[0].strip()  # gaRimat
        try:
            fg = re.findall(r"[^ ]+", words[1])  # ["S","m"]
        except:
            print "Case", nin, "Problem with fg.Line=", line
            exit(1)
        g = fg[1]
        #  gender is what current version of s_file_init_alt_helper expects
        #print "fg=",fg
        dictstr = words[2].strip()  # <MW=gaRi-mat,83017,1>
        dictwords = re.findall(r"[^ <=,>]+", dictstr)
        words1 = dictwords[1]  # gaRi-mat
        dbg = False
        #print "dictstr=",dictstr,"\ndictwords=",dictwords
        try:
            outputs = s_file_init_alt_helper(subanta, g, words1, dbg=dbg)
        except (NameError, MyException) as err:
            print "\ncase=", nin, "line=", line
            print err
            outputs = None
        if isinstance(outputs, list):
            #print "outputs is a list of length",len(outputs)
            pass
        elif outputs == None:
            outputs = []
        elif ';' in outputs:  ##outputs.startswith(':adj'):
            outputs = outputs.split(';')
        else:
            #print "outputs is not a list"
            outputs = [outputs]
        for output in outputs:
            fout.write("%s\n" % output)
            nout = nout + 1
        if len(outputs) == 0:
            key1 = subanta
            key2 = words1
            print "Warning", key1, fg, key2, ":", g
    f.close()
    fout.close()