def learn_affixes(self, alignment, ft=features): #------------------------------------------------------------------------------ def create_affix(aff, aff2): if not re.match('_', aff): if self.affixes.has_key(aff): self.affixes[aff].count += 1 self.affixes[aff].partner += [aff2] self.affixes[aff].add_enviro(r_edge, 'r', ft) self.affixes[aff].add_enviro(l_edge, 'l', ft) else: self.affixes[aff] = Affix(aff, aff2) self.affixes[aff].add_enviro(r_edge, 'r', ft) self.affixes[aff].add_enviro(l_edge, 'l', ft) # end definition of create_affix() switch = False aff1, aff2 = '', '' for indx in range(len(alignment)): if indx == 0: previous = ('','') else: # collecting enviro previous = alignment[indx-1] pair = alignment[indx] if compare_features(pair[0], pair[1], ft=features) > 1: nomatch1 = pair[0] not in pair[1] nomatch2 = pair[1] not in pair[0] if nomatch1 and nomatch2: # we check if one string contains another aff2 =''.join([aff2, pair[1]]) aff1 =''.join([aff1, pair[0]]) if not switch: l_edge = previous[0] switch = True else: if switch: r_edge = pair[0] create_affix(aff1, aff2) create_affix(aff2, aff1) aff1, aff2 = '', '' switch = False if indx == len(alignment)-1: if switch: r_edge = '' create_affix(aff1, aff2) create_affix(aff2, aff1)
def verify_affixes(self, alignment, features): #------------------------------------------------------------------------------ def create_affix(aff): if not re.match('_', aff): return (aff, l_edge, r_edge) #end create_affix() definition #------------------------------------------------------------------------------ def logic_and(bool1, bool2): if bool1 == True and bool2 == True: return True else: return False #end logic_and() definition def test_env(lib, string): print string for indx in lib.keys(): if features[string][indx] != lib[indx]: print indx, features[string][indx], lib[indx] return False else: pass return True #------------------------------------------------------------------------------ def test_affix(aff): if aff != None: if self.affixes.has_key(aff[0]): print 'affix exists' affix = self.affixes[aff[0]] right, left = False, False if affix.suffix and aff[2] == '': print 'suspect it\'s a suffix' right = True left = test_env(affix.on_left, aff[1]) elif affix.prefix and aff[1] == '': left = True right = test_env(affix.on_right, aff[1]) else: right = test_env(affix.on_right, aff[0]) left = test_env(affix.on_left, aff[1]) return (aff, logic_and(right, left)) else: return (aff, False) else: pass #end test_affix() definition answers = [] switch = False aff1, aff2 = '', '' for indx in range(len(alignment)): pair = alignment[indx] if indx == 0: previous = ('','') else: previous = alignment[indx-1] if compare_features(pair[0], pair[1], ft=features) > 1: nomatch1 = pair[0] not in pair[1] nomatch2 = pair[1] not in pair[0] if nomatch1 and nomatch2: aff2 =''.join([aff2, pair[1]]) aff1 =''.join([aff1, pair[0]]) if not switch: l_edge = previous[0] switch = True else: if switch: r_edge = pair[0] answers.append(test_affix(create_affix(aff1))) answers.append(test_affix(create_affix(aff2))) aff1, aff2 = '', '' switch = False if indx == len(alignment)-1 and switch: r_edge = '' answers.append(test_affix(create_affix(aff1))) answers.append(test_affix(create_affix(aff2))) return [x for x in answers if x != None]