def extend(self, n): for smt in self.semantics: smt.learnable = False idx = len(SYMBOLS) - 1 self.semantics.append(Semantics(idx, fewshot=True)) self.primitives.extend([ Invented(smt.program.prog) for smt in self.semantics if not smt.learnable and smt.arity > 0 ])
def update_grammar(self): programs = [ Invented(smt.program.prog) for smt in self.semantics if smt.learnable and smt.solved and smt.program is not None and smt.program.arity > 0 and '#' not in str(smt.program) ] # if '#' in the program, the program uses a invented primitive, it is very likely to have a high computation cost. # Therefore we don't add this program into primitives, since it might slow the enumeration a lot. # it might be resolved by increasing the enumeration time new_grammar = Grammar.uniform(self.primitives + programs) # self.train_args['enumerationTimeout'] += 100 * len(programs) if new_grammar != self.grammar: self.grammar = new_grammar self.helmholtzFrontiers = None self.allFrontiers = None print( "Update grammar with invented programs and set frontiers to none." )
def memorizeInduce(g, frontiers, **kwargs): existingInventions = {p.uncurry() for p in g.primitives } programs = {f.bestPosterior.program for f in frontiers if not f.empty} newInventions = programs - existingInventions newGrammar = Grammar.uniform([p for p in g.primitives] + \ [Invented(ni) for ni in newInventions]) # rewrite in terms of new primitives def substitute(p): nonlocal newInventions if p in newInventions: return Invented(p).uncurry() return p newFrontiers = [Frontier([FrontierEntry(program=np, logPrior=newGrammar.logLikelihood(f.task.request, np), logLikelihood=e.logLikelihood) for e in f for np in [substitute(e.program)] ], task=f.task) for f in frontiers ] return newGrammar, newFrontiers
def substitute(p): nonlocal newInventions if p in newInventions: return Invented(p).uncurry() return p
def rustInduce(g0, frontiers, _=None, topK=1, pseudoCounts=1.0, aic=1.0, structurePenalty=0.001, a=0, CPUs=1, iteration=-1, topk_use_only_likelihood=False, vs=False): def finite_logp(l): return l if l != float("-inf") else -1000 message = { "strategy": { "version-spaces": { "top_i": 50 } } if vs else { "fragment-grammars": {} }, "params": { "structure_penalty": structurePenalty, "pseudocounts": int(pseudoCounts + 0.5), "topk": topK, "topk_use_only_likelihood": topk_use_only_likelihood, "aic": aic if aic != float("inf") else None, "arity": a, }, "primitives": [{ "name": p.name, "tp": str(t), "logp": finite_logp(l) } for l, t, p in g0.productions if p.isPrimitive], "inventions": [ { "expression": str(p.body), "logp": finite_logp(l) } # -inf=-100 for l, t, p in g0.productions if p.isInvented ], "variable_logprob": finite_logp(g0.logVariable), "frontiers": [{ "task_tp": str(f.task.request), "solutions": [{ "expression": str(e.program), "logprior": finite_logp(e.logPrior), "loglikelihood": e.logLikelihood, } for e in f], } for f in frontiers], } eprint("running rust compressor") messageJson = json.dumps(message) with open("jsonDebug", "w") as f: f.write(messageJson) # check which version of python we are using # if >=3.6 do: if sys.version_info[1] >= 6: p = subprocess.Popen(['./rust_compressor/rust_compressor'], encoding='utf-8', stdin=subprocess.PIPE, stdout=subprocess.PIPE) elif sys.version_info[1] == 5: p = subprocess.Popen(['./rust_compressor/rust_compressor'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) messageJson = bytearray(messageJson, encoding='utf-8') # convert messageJson string to bytes else: eprint("must be python 3.5 or 3.6") assert False p.stdin.write(messageJson) p.stdin.flush() p.stdin.close() if p.returncode is not None: raise ValueError("rust compressor failed") if sys.version_info[1] >= 6: resp = json.load(p.stdout) elif sys.version_info[1] == 5: import codecs resp = json.load(codecs.getreader('utf-8')(p.stdout)) productions = [(x["logp"], p) for p, x in zip((p for (_, _, p) in g0.productions if p.isPrimitive), resp["primitives"])] + \ [(i["logp"], Invented(Program.parse(i["expression"]))) for i in resp["inventions"]] productions = [(l if l is not None else float("-inf"), p) for l, p in productions] g = Grammar.fromProductions(productions, resp["variable_logprob"], continuationType=g0.continuationType) newFrontiers = [ Frontier([ FrontierEntry(Program.parse(s["expression"]), logPrior=s["logprior"], logLikelihood=s["loglikelihood"]) for s in r["solutions"] ], f.task) for f, r in zip(frontiers, resp["frontiers"]) ] return g, newFrontiers
def update_grammar(self): programs = [ Invented(smt.program.prog_ori) for smt in self.semantics if smt.solved and smt.program.arity > 0 ] self.grammar = Grammar.uniform(McCarthyPrimitives() + programs)