Ejemplo n.º 1
0
 def print_GF(token,ms,mp,fs,fp):
     token_id=createGFToken(token,u"A")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : A ;")
         else:
             uniprint(u"lin "+token_id+u" = mk4A "+u" ".join([ u"\""+form+u"\"" for form in [ms,fs,mp,fp] ])+u" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 2
0
 def print_GF(lemma,surface):
     token_id=createGFToken(lemma,u"Adv")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : Adv ;")
         else:
             uniprint(u"lin "+token_id+u" = mkAdv \""+surface+u"\" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 3
0
 def print_GF_genitive( token, sgForm,plForm, sgGenForm, plGenForm):
     token_id=createGFToken(token,u"N")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : N ;")
         else:
             uniprint(u"lin "+token_id+u" = mkN "+u" ".join([ u"\""+form+u"\"" for form in [sgForm,plForm, sgGenForm, plGenForm] ])+u" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 4
0
 def print_GF_synthetic(lemma,baseform,comparativeForm):
     token_id=createGFToken(lemma,u"A")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : A ;")
         else:
             uniprint(u"lin "+token_id+" = mkA \""+baseform+"\" \""+comparativeForm+"\" ;")
         #don't print an entry with the same token again
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 5
0
 def print_GF(token,gender,sgForm,plForm):
     genderDict= {u"m":u"masculine", u"f":u"feminine" }
     token_id=createGFToken(token,u"N")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : N ;")
         else:
             uniprint(u"lin "+token_id+u" = mkN "+u" ".join([ u"\""+form+u"\"" for form in [sgForm, plForm] ] + [genderDict[gender]])+u" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 6
0
 def print_GF(token,gender,surfaceForm):
     genderDict= {u"m":u"masculine", u"f":u"feminine" }
     token_id=createGFToken(token,u"PN")
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : PN ;")
         else:
             uniprint(u"lin "+token_id+u" = mkPN \""+surfaceForm+u"\" "+genderDict[gender]+u" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 7
0
 def print_GF(lemma, forms, valency=u"V"):
     if not valency in [u"V",u"V2",u"V3",u"VS",u"VV",u"VA",u"VQ"]:
         uniprint(u"unknown valency: "+valency,True)
         return
     token_id=createGFToken(lemma,valency)
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : "+valency+" ;")
         else:
             uniprint(u"lin "+token_id+u" =  "+ ( u"mk"+valency+u" (" if valency != u"V" else u""  ) +u" mkV "+u" ".join([ u"\""+form+u"\"" for form in forms ])+u" "+( u")" if valency != u"V" else u""  )+u"  ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 8
0
 def print_GF(token,forms,valency=u"V"):
     if not valency in [u"V",u"V2",u"V3",u"VS",u"VV",u"VA",u"VQ"]:
         uniprint(u"unknown valency: "+valency,True)
         return
     token_id=createGFToken(token,valency)
     if ApertiumLexformGroupProcessor.can_print(token_id):
         if ApertiumLexformGroupProcessor.printOnlyTokens:
             uniprint(token_id+u" : "+valency+" ;")
         else:
             addValencyStart=u""
             addValencyEnd=u""
             if valency != u"V":
                 addValencyEnd=u")"
                 if valency == u"V3":
                     addValencyStart= u"dirdirV3 ("
                 else:
                     addValencyStart= u"mk"+valency+u" ("
             uniprint(u"lin "+token_id+u" = "+addValencyStart+u" verboV  (allforms_67 "+u" ".join([ u"\""+form+u"\"" for form in forms ])+u" ) "+addValencyEnd+u" ;")
         ApertiumLexformGroupProcessor.add_to_blacklist(token_id)
Ejemplo n.º 9
0
            cls.valencyDict[entry.lemma].append(entry.valency)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Generates adjective for GF monolingual lexicon')
    parser.add_argument('--lang')
    parser.add_argument('--category')
    parser.add_argument('--bildic_tl_expanded_file')
    parser.add_argument('--print_only_tokens',action='store_true')
    parser.add_argument('--black_list')
    parser.add_argument('--valencies')
    parser.add_argument('--no_multiwords',action='store_true')

    args = parser.parse_args(sys.argv[1:])
    
    ApertiumLexformGroupProcessor.set_ignore_multiwords(args.no_multiwords)
    
    if args.print_only_tokens:
        ApertiumLexformGroupProcessor.printOnlyTokens=True
    
    if args.bildic_tl_expanded_file:
        #actual_process bilingual dictionary
        #we want as a result a dict() in python which contains, for each TL lemma
        #the different SL lemmas which is mapped from in the bilingual dictionary
        
        fileDesc=open(args.bildic_tl_expanded_file,'r')
        MultipleLineEntriesProcessor.process(SpaEngBilingualProcessor,BilingualDicLineEntry,fileDesc)
        if args.category == "adj":    
            SpaAdjectivesProcessor.engAdjectivesDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation
        elif args.category == "n":
            SpaNounsProcessor.engNounsDict=SpaEngBilingualProcessor.bilingualDictionaryRepresentation