def lesionMorphologicalRules(self, solution): """Sometimes we end up learning to put the morphology into the rewrite rules, e.g. 0 > k/#_ or something like that. This will take a solution and try removing insertion/deletion rules whenever possible, keeping the underlying forms constant but being willing to modify the morphology. """ rules = list(solution.rules) for r in list(solution.rules): if isinstance(r.focus, EmptySpecification) and isinstance(r.structuralChange, ConstantPhoneme) and \ u'#' in unicode(r): print "Candidate for lesion",r candidateRules = [ r_ for r_ in rules if r_ != r ] print "the new rules would be",candidateRules Model.Global() prefixes = [ Morph.sample() for _ in xrange(self.numberOfInflections) ] suffixes = [ Morph.sample() for _ in xrange(self.numberOfInflections) ] self.conditionOnData([ r_.makeConstant(self.bank) for r_ in candidateRules ], [ solution.underlyingForms[x].makeConstant(self.bank) for x in self.data ], prefixes, suffixes, auxiliaryHarness=True) #minimize(sum(wordLength(m) for m in prefixes+suffixes )) try: output = self.solveSketch() print "Lesioning morphological rule", r solution = Solution(prefixes = [ Morph.parse(self.bank, output, p) for p in prefixes ], suffixes = [ Morph.parse(self.bank, output, s) for s in suffixes ], underlyingForms = solution.underlyingForms, rules = candidateRules) rules = solution.rules except SynthesisFailure: print "Turns out that you cannot lesion",r return solution
def sketchJointSolution(self, depth, canAddNewRules = False, costUpperBound = None, fixedRules = None, auxiliaryHarness = False, oldSolution=None): try: Model.Global() if fixedRules == None: rules = [ Rule.sample() for _ in range(depth) ] else: rules = [ r.makeDefinition(self.bank) for r in fixedRules ] stems = [ Morph.sample() for _ in self.data ] prefixes = [ Morph.sample() for _ in range(self.numberOfInflections) ] suffixes = [ Morph.sample() for _ in range(self.numberOfInflections) ] for j,m in enumerate(self.fixedMorphology): if m != None: (p,s) = m condition(wordEqual(prefixes[j],p.makeConstant(self.bank))) condition(wordEqual(suffixes[j],s.makeConstant(self.bank))) if self.wordBoundaries: for prefix, suffix in zip(prefixes, suffixes): condition(Or([wordLength(prefix) == 0, wordLength(suffix) == 0])) morphologicalCosts = [ None if m == None else len(m[0]) + len(m[1]) for m in self.fixedMorphology ] self.minimizeJointCost(rules, stems, prefixes, suffixes, costUpperBound, morphologicalCosts, oldSolution=oldSolution) self.conditionOnData(rules, stems, prefixes, suffixes, auxiliaryHarness = auxiliaryHarness) self.conditionOnPrecomputedMorphology(prefixes, suffixes) output = self.solveSketch() print "Final hole value:",parseMinimalCostValue(output) solution = Solution(prefixes = [ Morph.parse(self.bank, output, p) for p in prefixes ], suffixes = [ Morph.parse(self.bank, output, s) for s in suffixes ], underlyingForms = {x: Morph.parse(self.bank, output, s) for x,s in zip(self.data, stems) }, rules = [ Rule.parse(self.bank, output, r) for r in rules ] if fixedRules == None else fixedRules) solution.showMorphologicalAnalysis() solution.showRules() return solution except SynthesisFailure: if canAddNewRules: depth += 1 print "Expanding rule depth to %d"%depth return self.sketchJointSolution(depth, canAddNewRules = canAddNewRules, auxiliaryHarness = auxiliaryHarness, oldSolution=oldSolution) else: return None
def solveStem(self, ss, morphology): Model.Global() stem = Morph.sample() for (p, s), x in zip(zip(morphology.prefixes, morphology.suffixes), ss): if x is None: continue condition( matchPattern( x.makeConstant(self.bank), concatenate3(p.makeConstant(self.bank), stem, s.makeConstant(self.bank)))) minimize(patternCost(stem)) output = self.solveSketch() return Morph.parse(self.bank, output, stem)
def applyRuleUsingSketch(self,r,u,untilSuffix): '''u: morph; r: rule; untilSuffix: int''' Model.Global() result = Morph.sample() _r = r.makeDefinition(self.bank) condition(wordEqual(result,applyRule(_r,u.makeConstant(self.bank), Constant(untilSuffix), len(u) + 2))) try: output = solveSketch(self.bank, max(self.maximumObservationLength, len(u)) + 2, len(u) + 2, showSource=False, minimizeBound=31, timeout=None) except SynthesisFailure: print "applyRuleUsingSketch: UNSATISFIABLE for %s %s %s"%(u,r,untilSuffix) printSketchFailure() assert False except SynthesisTimeout: print "applyRuleUsingSketch: TIMEOUT for %s %s %s"%(u,r,untilSuffix) assert False return Morph.parse(self.bank, output, result)
def affix(): if useMorphology: return Morph.sample() else: return Morph([]).makeConstant(self.bank)
def paretoFront(self, depth, k, temperature, useMorphology = False, offFront=0, oldSolutions=[], morphologicalCoefficient = 3, stemBaseline=0, minimizeBits=7): # no idea why we want this #self.maximumObservationLength += 1 def affix(): if useMorphology: return Morph.sample() else: return Morph([]).makeConstant(self.bank) def parseAffix(output, morph): if useMorphology: return Morph.parse(self.bank, output, morph) else: return Morph([]) Model.Global() rules = [ Rule.sample() for _ in range(depth) ] stems = [ Morph.sample() for _ in self.data ] prefixes = [ affix() for _ in range(self.numberOfInflections) ] suffixes = [ affix() for _ in range(self.numberOfInflections) ] for i in range(len(stems)): self.conditionOnStem_1a(rules, stems[i], prefixes, suffixes, self.data[i]) # actually we want this #for r in rules: condition(Not(ruleDoesNothing(r))) stemCostExpression = sum([ wordLength(u) for u in stems ]) - stemBaseline stemCostVariable = unknownInteger(numberOfBits = minimizeBits) condition(stemCostVariable == stemCostExpression) minimize(stemCostExpression) ruleCostExpression = sum([ ruleCost(r) for r in rules ] + [ wordLength(u)*morphologicalCoefficient for u in suffixes + prefixes ]) ruleCostVariable = unknownInteger() condition(ruleCostVariable == ruleCostExpression) if len(rules) > 0 or useMorphology: minimize(ruleCostExpression) solutions = [] solutionCosts = [] if oldSolutions: solutions = oldSolutions[0] solutionCosts = oldSolutions[1] solutionIndex = 0 while solutionIndex < k + offFront: # Excludes solutions we have already found for rc,uc in solutionCosts: if oldSolutions or solutionIndex >= k: # This condition just says that it has to be a # different trade-off. Gets things a little bit off of # the front condition(And([ruleCostVariable == rc,stemCostVariable == (uc - stemBaseline)]) == 0) else: # This condition says that it has to actually be on # the pareto - a stronger constraint condition(Or([ruleCostVariable < rc, stemCostVariable < (uc - stemBaseline)])) try: output = self.solveSketch(minimizeBound = int(2**minimizeBits - 1)) except SynthesisFailure: if offFront > 0 and solutionIndex < k: solutionIndex = k print "Nothing on front, moving to things just off of front..." continue else: print "Exiting Pareto procedure early due to unsatisfied" break except SynthesisTimeout: print "Exiting Pareto procedure early due to timeout" break s = Solution(suffixes = [ parseAffix(output, m) for m in suffixes ], prefixes = [ parseAffix(output, m) for m in prefixes ], rules = [ Rule.parse(self.bank, output, r) for r in rules ], underlyingForms = {x: Morph.parse(self.bank, output, m) for x,m in zip(self.data, stems) }).withoutUselessRules() solutions.append(s) print s rc = sum([r.cost() for r in s.rules ] + [len(a)*morphologicalCoefficient for a in s.prefixes + s.suffixes ]) uc = sum([len(u) for u in s.underlyingForms.values() ]) rc = int(rc + 0.5) print "Costs:",(rc,uc) actualCosts = (parseInteger(output, ruleCostVariable), parseInteger(output, stemCostVariable) + stemBaseline) print "Actual costs:",actualCosts if not (actualCosts == (rc,uc)): print output assert actualCosts == (rc,uc) (rc,uc) = actualCosts solutionCosts.append((rc,uc)) solutionIndex += 1 print " pareto: got %d solutions of depth %d"%(len(solutions),depth) if len(solutions) > 0: optimalCost, optimalSolution = min([(uc + float(rc)/temperature, s) for ((rc,uc),s) in zip(solutionCosts, solutions) ]) print "Optimal solution:" print optimalSolution print "Optimal cost:",optimalCost return solutions, solutionCosts
def sampleMorphWithLength(l): m = Morph.sample() condition(wordLength(m) == l) return m
def solveAlignment(self): Model.Global() prefixes = [Morph.sample() for _ in range(self.numberOfInflections)] suffixes = [Morph.sample() for _ in range(self.numberOfInflections)] stems = [Morph.sample() for _ in self.data] for surfaces, stem in zip(self.data, stems): for (p, s), x in zip(zip(prefixes, suffixes), surfaces): if x is None: continue condition( matchPattern(x.makeConstant(self.bank), concatenate3(p, stem, s))) for i in range(self.numberOfInflections): if all(ss[i] == None for ss in self.data): condition(wordLength(prefixes[i]) == 0) condition(wordLength(suffixes[i]) == 0) # OBJECTIVE: (# inflections) * (stem lengths) + (# data points) * (affix len) # Because we pay for each stem once per inflection, # and pay for each affix once per data point observationsPerStem = float( sum(s is not None for ss in self.data for s in ss)) / len(stems) observationsPerAffix = sum( sum(ss[i] is not None for ss in self.data ) for i in range(self.numberOfInflections) ) \ / float(self.numberOfInflections) print "observations per stem", observationsPerStem print "observations per affix", observationsPerAffix r = observationsPerStem / observationsPerAffix if r < 2 and r > 0.5: ca = 1 cs = 1 elif r >= 2: ca = 1 cs = 2 elif r <= 0.5: ca = 2 cs = 1 else: assert False print "ca = ", ca print "cs = ", cs minimize(sum((patternCost(p) + patternCost(s)) * ca for j,(p,s) in enumerate(zip(prefixes, suffixes))) + \ sum(patternCost(stem) * cs for stem,ss in zip(stems, self.data) )) # for m in prefixes + suffixes: # condition(patternCost(m) < 4) output = self.solveSketch() solution = Solution( rules=[], prefixes=[Morph.parse(self.bank, output, p) for p in prefixes], suffixes=[Morph.parse(self.bank, output, p) for p in suffixes], underlyingForms={ x: Morph.parse(self.bank, output, s) for x, s in zip(self.data, stems) }) for i in range(self.numberOfInflections): if all(ss[i] == None for ss in self.data): print("\t(inflection not seen)") else: print solution.prefixes[i], "+ stem +", solution.suffixes[i] return solution
def sketchJointSolution(self, depth, canAddNewRules=True, existingSolutions=[]): assert depth == 1 assert canAddNewRules Model.Global() r = Rule.sample() for o in existingSolutions: assert len(o.rules) == 1 condition(Not(ruleEqual(r, o.rules[0].makeConstant(self.bank)))) morphs = {} morphs[1] = Morph.sample() morphs[4] = Morph.sample() morphs[5] = Morph.sample() morphs[9] = Morph.sample() morphs[10] = Morph.sample() if existingSolutions: for (k, ), v in existingSolutions[0].underlyingForms.iteritems(): condition(wordEqual(v.makeConstant(self.bank), morphs[k])) for j in range(len(self.data)): o = self.data[j] k = self.count[j] if k <= 10: condition( wordEqual( o.makeConstant(self.bank), applyRule(r, morphs[k], Constant(0), self.maximumObservationLength))) elif k % 10 == 0: condition( wordEqual( o.makeConstant(self.bank), applyRule(r, concatenate(morphs[k / 10], morphs[10]), Constant(0), self.maximumObservationLength))) elif k < 20: condition( wordEqual( o.makeConstant(self.bank), applyRule(r, concatenate(morphs[10], morphs[k - 10]), Constant(0), self.maximumObservationLength))) else: assert False minimize(ruleCost(r)) try: output = solveSketch( self.bank, unroll=self.maximumObservationLength + 2, maximumMorphLength=self.maximumObservationLength + 1) except SynthesisFailure: print "Failed at phonological analysis." return None r = Rule.parse(self.bank, output, r) print r.pretty() return Solution(rules=[r], prefixes=[], suffixes=[], underlyingForms={(k, ): Morph.parse(self.bank, output, m) for k, m in morphs.iteritems()})