예제 #1
0
 def set_orth(self,Orth,dosound=False):
     self.orth=Orth
     if dosound:
         if myModule.all_of_chartypes_p(Orth,['katakana']):
             self.reading=Orth
         elif myModule.all_of_chartypes_p(Orth,['hiragana']):
             self.reading=myModule.kana2kana_wd(Orth)
         elif myModule.all_of_chartypes_p(Orth,['han','hiragana','katakana']):
             ShellCmd=' '.join([HomeDir+'/myProgs/scripts/kakasi_katakana.sh','"'+Orth+'"'])
             self.reading=subprocess.Popen(ShellCmd,shell=True,stdout=subprocess.PIPE).communicate()[0].strip().decode()
예제 #2
0
 def synchronise_sound(self):
     OrgOrth=self.orth
     if myModule.all_of_chartypes_p(OrgOrth,['katakana']):
         self.orth=self.reading
     elif myModule.all_of_chartypes_p(OrgOrth,['hiragana']):
         self.orth=myModule.kana2kana_wd(self.reading)
     elif myModule.all_of_chartypes_p(OrgOrth,['han','hiragana']):
         EndSubstr=''
         Boundary=identify_kana_boundary(OrgOrth)
         EndSubstr=myModule.kana2kana_wd(self.reading[Boundary:])
         TopSubstr=OrgOrth[:Boundary]
         self.orth=TopSubstr+EndSubstr
             
     else:
         self.orth=self.reading
예제 #3
0
def kanji_hiragana_combo(Strs):
            if len([Str for Str in Strs if myModule.at_least_one_of_chartypes_p(Str,['han'])])!=1:
                return False
            if not any(myModule.all_of_chartypes_p(Str,['hiragana']) for Str in Strs):
                return False
            if any(myModule.at_least_one_of_chartypes_p(Str,['katakana']) for Str in Strs):
                return False
            return True
예제 #4
0
def all_hiragana_p(Strs):
            for Str in Strs:
                if not myModule.all_of_chartypes_p(Str,['hiragana']):
                    return False
            return True
예제 #5
0
def filter_and_output_probunamb(GenHomStats):
    OrthAmbStats=[];CatAmbStats=[];UnambStats=[];UniOrthStats=[]    
    for GenHomStat in GenHomStats:
        if len(GenHomStat.superorthsfreqs)==1:
            UniOrthStats.append(GenHomStat)
            continue
        if GenHomStat.domcat:
            DomCat=GenHomStat.domcat
            if len([Orth for Orth in GenHomStat.homstats[DomCat].orthsfreqs.keys() if myModule.at_least_one_of_chartypes_p(Orth,['han'])]) <= 1 and len([Orth for Orth in GenHomStat.homstats[DomCat].orthsfreqs.keys() if myModule.all_of_chartypes_p(Orth,['katakana'])]) == 0:
                UnambStats.append(GenHomStat)
            else:
                DomOrth=count_homophones.pseudo_unambiguous(GenHomStat.orthsfreqs[DomCat],500)
                if DomOrth:
                    UnambStats.append(GenHomStat)
                else:
                    OrthAmbStats.append(GenHomStat)
        else:
            CatAmbStats.append(GenHomStat)

    Out=open(Args.out_fp,'wt') if Args.out_fp else sys.stdout
        
    SortedUnambStats=sorted(UnambStats,key=lambda a:a.domcatfreq,reverse=True)        
    for UnambStat in SortedUnambStats:
        Out.write(repr(UnambStat.__dict__)+'\n')