def manualLogoTask(name, expression, proto=False, needToTrain=False, supervise=False, lambdaCalculus=False): p = Program.parse(expression) if lambdaCalculus else parseLogo(expression) from dreamcoder.domains.logo.logoPrimitives import primitives from dreamcoder.grammar import Grammar g = Grammar.uniform(primitives, continuationType=turtle) gp = Grammar.uniform(primitives) try: l = g.logLikelihood(arrow(turtle, turtle), p) lp = gp.logLikelihood(arrow(turtle, turtle), p) assert l >= lp eprint(name, -l, "nats") except: eprint("WARNING: could not calculate likelihood of manual logo", p) attempts = 0 while True: [output, highresolution] = drawLogo(p, p, resolution=[28, 128], cost=True) if output == "timeout" or highresolution == "timeout": attempts += 1 else: break if attempts > 0: eprint( f"WARNING: Took {attempts} attempts to render task {name} within timeout" ) cost = output[1] output = output[0] assert highresolution[1] == cost highresolution = highresolution[0] shape = list(map(int, output)) highresolution = list(map(float, highresolution)) t = Task(name, arrow(turtle, turtle), [(([0]), shape)]) t.mustTrain = needToTrain t.proto = proto t.specialTask = ("LOGO", {"proto": proto}) t.specialTask[1]["cost"] = cost * 1.05 t.highresolution = highresolution if supervise: t.supervisedSolution = p return t
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) tasks = make_list_bootstrap_tasks() print(tasks) maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] baseGrammar = Grammar.uniform(McCarthyPrimitives()) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: # necessary = train_necessary(t) # if not necessary: # continue # if necessary == "some": # train_some[t.name.split()[0]].append(t) # else: t.mustTrain = True # for k in sorted(train_some): # ts = train_some[k] # random.shuffle(ts) # ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] explorationCompression(baseGrammar, train, testingTasks=test, **args)
def demoLogoTasks(): import scipy.misc import numpy as np g0 = Grammar.uniform(primitives, continuationType=turtle) eprint("dreaming into /tmp/dreams_0...") N = 1000 programs = [ p for _ in range(N) for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)] if p is not None ] os.system("mkdir -p /tmp/dreams_0") for n, p in enumerate(programs): with open(f"/tmp/dreams_0/{n}.dream", "w") as handle: handle.write(str(p)) drawLogo(*programs, pretty=True, smoothPretty=False, resolution=512, filenames=[ f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs)) ], timeout=1) if len(sys.argv) > 1: tasks = makeTasks(sys.argv[1:], proto=False) else: tasks = makeTasks(['all'], proto=False) montageTasks(tasks, columns=16, testTrain=True) for n, t in enumerate(tasks): a = t.highresolution w = int(len(a)**0.5) scipy.misc.imsave('/tmp/logo%d.png' % n, np.array([a[i:i + w] for i in range(0, len(a), w)])) logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace( '/', '_').replace("-", "_") + ".png" #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}") os.system( f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}" ) eprint(len(tasks), "tasks") eprint(sum(t.mustTrain for t in tasks), "need to be trained on") for t in dSLDemo(): a = t.highresolution w = int(len(a)**0.5) scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name, np.array([a[i:i + w] for i in range(0, len(a), w)])) os.system( f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png" ) tasks = [t for t in tasks if t.mustTrain] random.shuffle(tasks) montageTasks(tasks[:16 * 3], "subset", columns=16) montageTasks(rotationalSymmetryDemo(), "rotational")
def __init__(self): args = commandlineArguments(enumerationTimeout=200, activation='tanh', iterations=1, recognitionTimeout=3600, a=3, maximumFrontier=5, topK=2, pseudoCounts=30.0, helmholtzRatio=0.5, structurePenalty=1., CPUs=min(numberOfCPUs(), 8), extras=list_options) args['noConsolidation'] = True args.pop("random_seed") args['contextual'] = True args['biasOptimal'] = True args['auxiliaryLoss'] = True args['activation'] = "relu" args['useDSL'] = False extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "tmp/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/hint" % outputDirectory, "evaluationTimeout": 0.0005, }) args.pop("maxTasks") args.pop("split") self.primitives = McCarthyPrimitives() baseGrammar = Grammar.uniform(self.primitives) self.grammar = baseGrammar self.train_args = args self.semantics = [Semantics(i) for i in range(len(SYMBOLS))] self.allFrontiers = None self.helmholtzFrontiers = None
def update_grammar(self): programs = [ Invented(smt.program.prog) for smt in self.semantics if smt.learnable and smt.solved and smt.program is not None and smt.program.arity > 0 and '#' not in str(smt.program) ] # if '#' in the program, the program uses a invented primitive, it is very likely to have a high computation cost. # Therefore we don't add this program into primitives, since it might slow the enumeration a lot. # it might be resolved by increasing the enumeration time new_grammar = Grammar.uniform(self.primitives + programs) # self.train_args['enumerationTimeout'] += 100 * len(programs) if new_grammar != self.grammar: self.grammar = new_grammar self.helmholtzFrontiers = None self.allFrontiers = None print( "Update grammar with invented programs and set frontiers to none." )
def memorizeInduce(g, frontiers, **kwargs): existingInventions = {p.uncurry() for p in g.primitives } programs = {f.bestPosterior.program for f in frontiers if not f.empty} newInventions = programs - existingInventions newGrammar = Grammar.uniform([p for p in g.primitives] + \ [Invented(ni) for ni in newInventions]) # rewrite in terms of new primitives def substitute(p): nonlocal newInventions if p in newInventions: return Invented(p).uncurry() return p newFrontiers = [Frontier([FrontierEntry(program=np, logPrior=newGrammar.logLikelihood(f.task.request, np), logLikelihood=e.logLikelihood) for e in f for np in [substitute(e.program)] ], task=f.task) for f in frontiers ] return newGrammar, newFrontiers
def deepcoderProductions(): return [(0.0, prim) for prim in deepcoderPrimitives()] # def flatten_program(p): # string = p.show(False) # num_inputs = string.count('lambda') # string = string.replace('lambda', '') # string = string.replace('(', '') # string = string.replace(')', '') # #remove '_fn' (optional) # for i in range(num_inputs): # string = string.replace('$' + str(num_inputs-i-1),'input_' + str(i)) # string = string.split(' ') # string = list(filter(lambda x: x is not '', string)) # return string if __name__ == "__main__": #g = Grammar.uniform(deepcoderPrimitives()) g = Grammar.fromProductions(deepcoderProductions(), logVariable=.9) request = arrow(tlist(tint), tint, tint) p = g.sample(request) print("request:", request) print("program:") print(prettyProgram(p)) print("flattened_program:") flat = flatten_program(p) print(flat)
helmholtzRatio=0.5, activation="tanh", maximumFrontier=5, a=3, topK=2, pseudoCounts=30.0, extras=rational_options) primitives = [ real, # f1, real_division, real_addition, real_multiplication ] baseGrammar = Grammar.uniform(primitives) random.seed(42) tasks = makeTasks() smooth = arguments.pop('smooth') for t in tasks: t.features = drawFunction(200, 10., t.f) delattr(t, 'f') if smooth: t.likelihoodThreshold = None eprint("Got %d tasks..." % len(tasks)) test, train = testTrainSplit(tasks, 100) random.shuffle(test)
('mapping', [ '(map _ $x)', '(mapi _ $x)', '(flatten $x)', '(map _ (zip (droplast 1 $x) (drop 1 $x)))', '(map _ (drop 1 $x))' ], lambda e: e == '$l') ]) ## features computed in 'predict()' in bin/list_routines_misc.py Primitive.GLOBALS.clear() Grammar.uniform(list_routines_misc.primitives()) pre_features = { 'program_length': lambda _, __, ___, p: Program.parse(p).size(), 'depth': lambda _, __, ___, p: Program.parse(p).depth(), 'apps': lambda _, __, ___, p: list_routines_misc.count_applications(Program.parse(p)) } ## miscellaneous features misc_features = {} concept_examples = {'model': {}, 'dataset': {}} for purpose, num in [('model', 100), ('dataset', 150)]: concepts = os.listdir('analysis/concept_examples/{}'.format(purpose)) for concept in map(lambda n: 'c{:03}'.format(n), range(1, num+1)):
# Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map), # Primitive("index", arrow(tint,tlist(t0),t0),None), # Primitive("length", arrow(tlist(t0),tint),None), primitiveRecursion1, #primitiveRecursion2, Primitive("gt?", arrow(tint, tint, tbool), _gt), Primitive("if", arrow(tbool, t0, t0, t0), _if), Primitive("eq?", arrow(tint, tint, tbool), _eq), Primitive("+", arrow(tint, tint, tint), _addition), Primitive("-", arrow(tint, tint, tint), _subtraction), ] + [Primitive(str(j), tint, j) for j in range(2)] if __name__ == "__main__": bootstrapTarget() g = Grammar.uniform(McCarthyPrimitives()) # with open("/home/ellisk/om/ec/experimentOutputs/list_aic=1.0_arity=3_ET=1800_expandFrontier=2.0_it=4_likelihoodModel=all-or-nothing_MF=5_baseline=False_pc=10.0_L=1.0_K=5_rec=False.pickle", "rb") as handle: # b = pickle.load(handle).grammars[-1] # print b p = Program.parse( "(lambda (lambda (lambda (if (empty? $0) empty (cons (+ (car $1) (car $0)) ($2 (cdr $1) (cdr $0)))))))") t = arrow(tlist(tint), tlist(tint), tlist(tint)) # ,tlist(tbool)) print(g.logLikelihood(arrow(t, t), p)) assert False print(b.logLikelihood(arrow(t, t), p)) # p = Program.parse("""(lambda (lambda # (unfold 0 # (lambda (+ (index $0 $2) (index $0 $1))) # (lambda (1+ $0))
bootstrapTarget() equationPrimitives = [ # real, f0, f1, fpi, real_power, real_subtraction, real_addition, real_division, real_multiplication ] + [ Program.parse(n) for n in ["map", "fold", "empty", "cons", "car", "cdr", "zip"] ] baseGrammar = Grammar.uniform(equationPrimitives) eprint("Got %d equation discovery tasks..." % len(tasks)) explorationCompression(baseGrammar, tasks, outputPrefix="experimentOutputs/scientificLaws", evaluationTimeout=0.1, testingTasks=[], **commandlineArguments( compressor="ocaml", featureExtractor=DummyFeatureExtractor, iterations=10, CPUs=numberOfCPUs(), structurePenalty=0.5, helmholtzRatio=0.5,
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) tasks = make_list_bootstrap_tasks() print(tasks) maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] primitives = McCarthyPrimitives() from dreamcoder.program import Program, Invented # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))") # plus = Invented(plus) # primitives.append(plus) # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))") # minus = Invented(minus) # primitives.append(minus) # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))") # times = Invented(times) # primitives.append(times) baseGrammar = Grammar.uniform(primitives) baseGrammar = Grammar( 0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p) for p in primitives]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: # necessary = train_necessary(t) # if not necessary: # continue # if necessary == "some": # train_some[t.name.split()[0]].append(t) # else: t.mustTrain = True # for k in sorted(train_some): # ts = train_some[k] # random.shuffle(ts) # ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] result = explorationCompression(baseGrammar, train, testingTasks=test, **args) print([x.bestPosterior for x in result.taskSolutions.values()])
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on regular expressions. """ #for dreaming #parse use_ll_cutoff use_ll_cutoff = args.pop('use_ll_cutoff') if not use_ll_cutoff is False: #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff #will be tuples of that string followed by the actual model if len(use_ll_cutoff) == 1: train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks)) test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks)) else: assert len(use_ll_cutoff) == 2 train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks)) test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks)) else: train_ll_cutoff = None test_ll_cutoff = None regexTasks = {"old": makeOldTasks, "short": makeShortTasks, "long": makeLongTasks, "words": makeWordTasks, "number": makeNumberTasks, "handpicked": makeHandPickedTasks, "new": makeNewTasks, "newNumber": makeNewNumberTasks }[args.pop("tasks")] tasks = regexTasks() # TODO eprint("Generated", len(tasks), "tasks") maxTasks = args.pop("maxTasks") if len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) seed = 42 # previously this was hardcoded and never changed random.seed(seed) random.shuffle(tasks) del tasks[maxTasks:] maxExamples = args.pop("maxExamples") split = args.pop("split") test, train = testTrainSplit(tasks, split) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) test = add_cutoff_values(test, test_ll_cutoff) train = add_cutoff_values(train, train_ll_cutoff) eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff ) if args.pop("use_str_const"): assert args["primitives"] == "strConst" or args["primitives"] == "reduced" ConstantInstantiateVisitor.SINGLE = \ ConstantInstantiateVisitor() test = add_string_constants(test) train = add_string_constants(train) eprint("added string constants to test and train") for task in test + train: if len(task.examples) > maxExamples: task.examples = task.examples[:maxExamples] task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const}) task.examples = [(xs, [y for y in ys ]) for xs,ys in task.examples ] task.maxParameters = 1 # from list stuff primtype = args.pop("primitives") prims = {"base": basePrimitives, "alt1": altPrimitives, "alt2": alt2Primitives, "easyWords": easyWordsPrimitives, "concat": concatPrimitives, "reduced": reducedConcatPrimitives, "strConst": strConstConcatPrimitives }[primtype] extractor = { "learned": LearnedFeatureExtractor, "json": MyJSONFeatureExtractor }[args.pop("extractor")] extractor.H = args.pop("hidden") #stardecay = args.stardecay #stardecay = args.pop('stardecay') #decaystr = 'd' + str(stardecay) import datetime timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/regex/%s"%timestamp os.system("mkdir -p %s"%outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/regex"%(outputDirectory), "evaluationTimeout": 0.005, "topk_use_only_likelihood": True, "maximumFrontier": 10, "compressor": "ocaml" }) #### # use the #prim_list = prims(stardecay) prim_list = prims() specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"] n_base_prim = len(prim_list) - len(specials) productions = [ (math.log(0.5 / float(n_base_prim)), prim) if prim.name not in specials else ( math.log(0.10), prim) for prim in prim_list] baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex) #baseGrammar = Grammar.uniform(prims()) #for i in range(100): # eprint(baseGrammar.sample(tpregex)) #eprint(baseGrammar) #explore test_stuff = args.pop("debug") if test_stuff: eprint(baseGrammar) eprint("sampled programs from prior:") for i in range(100): #100 eprint(baseGrammar.sample(test[0].request,maximumDepth=1000)) eprint("""half the probability mass is on higher-order primitives. Therefore half of enumerated programs should have more than one node. However, we do not observe this. Instead we see a very small fraction of programs have more than one node. So something seems to be wrong with grammar.sample. Furthermore: observe the large print statement above. This prints the candidates for sampleDistribution in grammar.sample. the first element of each tuple is the probability passed into sampleDistribution. Half of the probability mass should be on the functions, but instead they are equally weighted with the constants. If you look at the grammar above, this is an error!!!! """) assert False del args["likelihoodModel"] explorationCompression(baseGrammar, train, testingTasks = test, **args)
def rustInduce(g0, frontiers, _=None, topK=1, pseudoCounts=1.0, aic=1.0, structurePenalty=0.001, a=0, CPUs=1, iteration=-1, topk_use_only_likelihood=False, vs=False): def finite_logp(l): return l if l != float("-inf") else -1000 message = { "strategy": { "version-spaces": { "top_i": 50 } } if vs else { "fragment-grammars": {} }, "params": { "structure_penalty": structurePenalty, "pseudocounts": int(pseudoCounts + 0.5), "topk": topK, "topk_use_only_likelihood": topk_use_only_likelihood, "aic": aic if aic != float("inf") else None, "arity": a, }, "primitives": [{ "name": p.name, "tp": str(t), "logp": finite_logp(l) } for l, t, p in g0.productions if p.isPrimitive], "inventions": [ { "expression": str(p.body), "logp": finite_logp(l) } # -inf=-100 for l, t, p in g0.productions if p.isInvented ], "variable_logprob": finite_logp(g0.logVariable), "frontiers": [{ "task_tp": str(f.task.request), "solutions": [{ "expression": str(e.program), "logprior": finite_logp(e.logPrior), "loglikelihood": e.logLikelihood, } for e in f], } for f in frontiers], } eprint("running rust compressor") messageJson = json.dumps(message) with open("jsonDebug", "w") as f: f.write(messageJson) # check which version of python we are using # if >=3.6 do: if sys.version_info[1] >= 6: p = subprocess.Popen(['./rust_compressor/rust_compressor'], encoding='utf-8', stdin=subprocess.PIPE, stdout=subprocess.PIPE) elif sys.version_info[1] == 5: p = subprocess.Popen(['./rust_compressor/rust_compressor'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) messageJson = bytearray(messageJson, encoding='utf-8') # convert messageJson string to bytes else: eprint("must be python 3.5 or 3.6") assert False p.stdin.write(messageJson) p.stdin.flush() p.stdin.close() if p.returncode is not None: raise ValueError("rust compressor failed") if sys.version_info[1] >= 6: resp = json.load(p.stdout) elif sys.version_info[1] == 5: import codecs resp = json.load(codecs.getreader('utf-8')(p.stdout)) productions = [(x["logp"], p) for p, x in zip((p for (_, _, p) in g0.productions if p.isPrimitive), resp["primitives"])] + \ [(i["logp"], Invented(Program.parse(i["expression"]))) for i in resp["inventions"]] productions = [(l if l is not None else float("-inf"), p) for l, p in productions] g = Grammar.fromProductions(productions, resp["variable_logprob"], continuationType=g0.continuationType) newFrontiers = [ Frontier([ FrontierEntry(Program.parse(s["expression"]), logPrior=s["logprior"], logLikelihood=s["loglikelihood"]) for s in r["solutions"] ], f.task) for f, r in zip(frontiers, resp["frontiers"]) ] return g, newFrontiers
#print("self.name", self.name) return self.name def __setstate__(self, state): #for backwards compatibility: if type(state) == dict: pass #do nothing, i don't need to load them if they are old... else: p = Primitive.GLOBALS[state] self.__init__(p.name, p.tp, p.value, p.constraint) if __name__ == '__main__': import time CPrimitive("testCPrim", tint, lambda x: x, 17) g = Grammar.fromProductions(RobustFillProductions()) print(len(g)) request = tprogram p = g.sample(request) print("request:", request) print("program:") print(prettyProgram(p)) s = 'abcdefg' e = p.evaluate([]) #print("prog applied to", s) #print(e(s)) print("flattened_program:") flat = flatten_program(p) print(flat) t = time.time() constraints = Constraint_prop().execute(p)
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on LOGO tasks. """ # The below legacy global statement is required since prefix_dreams is used by LogoFeatureCNN. # TODO(lcary): use argument passing instead of global variables. global prefix_dreams # The below global statement is required since primitives is modified within main(). # TODO(lcary): use a function call to retrieve and declare primitives instead. global primitives visualizeCheckpoint = args.pop("visualize") if visualizeCheckpoint is not None: with open(visualizeCheckpoint, 'rb') as handle: primitives = pickle.load(handle).grammars[-1].primitives visualizePrimitives(primitives) sys.exit(0) dreamCheckpoint = args.pop("dreamCheckpoint") dreamDirectory = args.pop("dreamDirectory") proto = args.pop("proto") if dreamCheckpoint is not None: #outputDreams(dreamCheckpoint, dreamDirectory) enumerateDreams(dreamCheckpoint, dreamDirectory) sys.exit(0) animateCheckpoint = args.pop("animate") if animateCheckpoint is not None: animateSolutions(loadPickle(animateCheckpoint).allFrontiers) sys.exit(0) target = args.pop("target") red = args.pop("reduce") save = args.pop("save") prefix = args.pop("prefix") prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/" prefix_pickles = prefix + "/logo." + ('.'.join(target)) if not os.path.exists(prefix_dreams): os.makedirs(prefix_dreams) tasks = makeTasks(target, proto) eprint("Generated", len(tasks), "tasks") costMatters = args.pop("cost") for t in tasks: t.specialTask[1]["costMatters"] = costMatters # disgusting hack - include whether cost matters in the dummy input if costMatters: t.examples = [(([1]), t.examples[0][1])] os.chdir("prototypical-networks") subprocess.Popen(["python", "./protonet_server.py"]) time.sleep(3) os.chdir("..") test, train = testTrainSplit(tasks, args.pop("split")) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) try: if test: montageTasks(test, "test_") montageTasks(train, "train_") except: eprint( "WARNING: couldn't generate montage. Do you have an old version of scipy?" ) if red is not []: for reducing in red: try: with open(reducing, 'r') as f: prods = json.load(f) for e in prods: e = Program.parse(e) if e.isInvented: primitives.append(e) except EOFError: eprint("Couldn't grab frontier from " + reducing) except IOError: eprint("Couldn't grab frontier from " + reducing) except json.decoder.JSONDecodeError: eprint("Couldn't grab frontier from " + reducing) primitives = list(OrderedDict((x, True) for x in primitives).keys()) baseGrammar = Grammar.uniform(primitives, continuationType=turtle) eprint(baseGrammar) timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/logo/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) generator = ecIterator(baseGrammar, train, testingTasks=test, outputPrefix="%s/logo" % outputDirectory, evaluationTimeout=0.01, **args) r = None for result in generator: iteration = len(result.learningCurve) dreamDirectory = "%s/dreams_%d" % (outputDirectory, iteration) os.system("mkdir -p %s" % dreamDirectory) eprint("Dreaming into directory", dreamDirectory) dreamFromGrammar(result.grammars[-1], dreamDirectory) r = result needsExport = [ str(z) for _, _, z in r.grammars[-1].productions if z.isInvented ] if save is not None: with open(save, 'w') as f: json.dump(needsExport, f)
response = json.loads(result.decode("utf-8")) for b, entry in enumerate(response): frontiers.append( Frontier([ FrontierEntry(program=Program.parse(p), logPrior=entry["ll"], logLikelihood=0.) for p in entry["programs"] ], task=Task(str(b), request, []))) eprint("Total number of Helmholtz frontiers:", len(frontiers)) return frontiers return get if __name__ == "__main__": g = Grammar.uniform([k1, k0, addition, subtraction, multiplication]) frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]], 10.) eprint("average frontier size", mean(len(f.entries) for f in frontiers)) f = DummyFeatureExtractor([]) r = RecognitionModel(f, g, hidden=[], contextual=True) r.trainBiasOptimal(frontiers, frontiers, steps=70) g = r.grammarOfTask(frontiers[0].task).untorch() frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]], 10.) for f in frontiers: eprint(f.summarizeFull()) eprint("average frontier size", mean(len(f.entries) for f in frontiers))
# from dreamcoder.domains.draw.makeDrawTasks import drawDrawings from dreamcoder.domains.draw.drawPrimitives import primitives, taxes, tartist, tangle, tscale, tdist # from dreamcoder.dreamcoder import ecIterator from dreamcoder.grammar import Grammar # from dreamcoder.program import Program # from dreamcoder.recognition import variable, maybe_cuda from dreamcoder.task import Task from dreamcoder.type import arrow # from dreamcoder.utilities import eprint, testTrainSplit, loadPickle g0 = Grammar.uniform(primitives) def dreamFromGrammar(g=g0, directory = "", N=50): # request = taxes # arrow9turtle turtle) just for logl. request = arrow(taxes, taxes) # arrow9turtle turtle) just for logl. programs = [ p for _ in range(N) for p in [g.sample(request, maximumDepth=20)] if p is not None] return programs # drawDrawings(*programs, filenames)
"non", "l", "erase", "m", "comes", "up", "comparison", "during", "'s value is the largest inclusive, which is strictly less than maximum element in numbers from 1 to the element in `a` which'", "'s value is the biggest (inclusive), which is strictly less than maximum element of range from 1 to the element in `a` which'", "'s value is the highest, which is strictly less than maximum element among sequence of digits of the element in `a` which'"] if __name__ == "__main__": #g = Grammar.uniform(deepcoderPrimitives()) g = Grammar.fromProductions(algolispProductions(), logVariable=.9) #p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call == (list_add_symbol 1 (list_init_symbol (fn_call mod ( list_add_symbol 2 (list_init_symbol arg1)) ))) ) (list_init_symbol $0)) )") p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call eq (list_add_symbol (symbol_constant 1) (list_init_symbol (fn_call mod ( list_add_symbol (symbol_constant 2) (list_init_symbol (symbol_constant arg1))) ))) ) (list_init_symbol (symbol_constant $0)))))") print(p) #tree = p.evaluate(["a"]) tree = p.evaluate([]) print(tree("a")) #
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description="") parser.add_argument("--domain", '-d', default="text") parser.add_argument("--taskLikelihood", default=False, action='store_true') parser.add_argument("--sampleLikelihood", default=False, action='store_true') parser.add_argument("--test", type=str, default=False) parser.add_argument("--timeout", type=float, default=600) arguments = parser.parse_args() if arguments.domain == "text": tasks = makeTasks() g = Grammar.uniform(text_primitives.primitives + [p for p in bootstrapTarget()]) input_vocabularies = [ list(printable[:-4]) + ['EOE'], list(printable[:-4]) ] test = loadPBETasks("PBE_Strings_Track")[0] fe = Text.LearnedFeatureExtractor(tasks=tasks, testingTasks=test) BATCHSIZE = 16 elif arguments.domain == "regex": g = Grammar.uniform(reducedConcatPrimitives(), continuationType=tpregex) tasks = makeNewTasks() fe = Regex.LearnedFeatureExtractor(tasks)
def make_grammar(g): Primitive.GLOBALS.clear() return Grammar.uniform(g())
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) dataset = args.pop("dataset") tasks = { "Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(), "bootstrap": make_list_bootstrap_tasks, "sorting": sortBootstrap, "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105], "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928], "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"), }[dataset]() maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: necessaryTasks = [] # maxTasks will not consider these if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1": necessaryTasks = tasks[:105] eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] tasks = necessaryTasks + tasks if dataset.startswith("Lucas"): # extra tasks for filter tasks.extend([ Task("remove empty lists", arrow(tlist(tlist(tbool)), tlist(tlist(tbool))), [((ls, ), list(filter(lambda l: len(l) > 0, ls))) for _ in range(15) for ls in [[[ random.random() < 0.5 for _ in range(random.randint(0, 3)) ] for _ in range(4)]]]), Task("keep squares", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs))) for _ in range(15) for xs in [[ random.choice([0, 1, 4, 9, 16, 25]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), Task("keep primes", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list( filter( lambda x: x in {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs))) for _ in range(15) for xs in [[ random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), ]) for i in range(4): tasks.extend([ Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x == i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x != i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: not x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]) ]) def isIdentityTask(t): return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples) eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function") tasks = [t for t in tasks if not isIdentityTask(t)] prims = { "base": basePrimitives, "McCarthy": McCarthyPrimitives, "common": bootstrapTarget_extra, "noLength": no_length, "rich": primitives }[args.pop("primitives")]() haveLength = not args.pop("noLength") haveMap = not args.pop("noMap") haveUnfold = not args.pop("noUnfold") eprint(f"Including map as a primitive? {haveMap}") eprint(f"Including length as a primitive? {haveLength}") eprint(f"Including unfold as a primitive? {haveUnfold}") baseGrammar = Grammar.uniform([p for p in prims if (p.name != "map" or haveMap) and \ (p.name != "unfold" or haveUnfold) and \ (p.name != "length" or haveLength)]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: necessary = train_necessary(t) if not necessary: continue if necessary == "some": train_some[t.name.split()[0]].append(t) else: t.mustTrain = True for k in sorted(train_some): ts = train_some[k] random.shuffle(ts) ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) if True: test = [t for t in test if t.name not in EASYLISTTASKS] eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] explorationCompression(baseGrammar, train, testingTasks=test, **args)
for char, name in disallowed ] + [ Primitive("r_dot", tpregex, emp_dot_no_letter(corpus)), Primitive("r_d", tpregex, emp_d(corpus)), Primitive("r_s", tpregex, pregex.s), Primitive("r_kleene", arrow(tpregex, tpregex), _kleene), #Primitive("r_plus", arrow(tpregex, tpregex), _plus), #Primitive("r_maybe", arrow(tpregex, tpregex), _maybe), Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt), Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat), ] if __name__ == '__main__': concatPrimitives() from dreamcoder.program import Program p = Program.parse( "(lambda (r_kleene (lambda (r_maybe (lambda (string_x $0)) $0)) $0))") print(p) print(p.runWithArguments([pregex.String("")])) prims = concatPrimitives() g = Grammar.uniform(prims) for i in range(100): prog = g.sample(arrow(tpregex, tpregex)) preg = prog.runWithArguments([pregex.String("")]) print("preg:", preg.__repr__()) print("sample:", preg.sample())
def ocamlInduce(g, frontiers, _=None, topK=1, pseudoCounts=1.0, aic=1.0, structurePenalty=0.001, a=0, CPUs=1, bs=1000000, topI=300): # This is a dirty hack! # Memory consumption increases with the number of CPUs # And early on we have a lot of stuff to compress # If this is the first iteration, only use a fraction of the available CPUs topK = 5 topI = 600 if all(not p.isInvented for p in g.primitives): if a > 3: CPUs = max(1, int(CPUs / 6)) else: CPUs = max(1, int(CPUs / 3)) else: CPUs = max(1, int(CPUs / 2)) CPUs = 2 # X X X FIXME X X X # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems iterations = 99 # maximum number of components to add at once while True: g0 = g originalFrontiers = frontiers t2f = {f.task: f for f in frontiers} frontiers = [f for f in frontiers if not f.empty] message = { "arity": a, "topK": topK, "pseudoCounts": float(pseudoCounts), "aic": aic, "bs": bs, "topI": topI, "structurePenalty": float(structurePenalty), "CPUs": CPUs, "DSL": g.json(), "iterations": iterations, "frontiers": [f.json() for f in frontiers] } message = json.dumps(message) if True: timestamp = datetime.datetime.now().isoformat() os.system("mkdir -p compressionMessages") fn = "compressionMessages/%s" % timestamp with open(fn, "w") as f: f.write(message) eprint("Compression message saved to:", fn) try: # Get relative path compressor_file = os.path.join(get_root_dir(), 'compression') process = subprocess.Popen(compressor_file, stdin=subprocess.PIPE, stdout=subprocess.PIPE) response, error = process.communicate( bytes(message, encoding="utf-8")) response = json.loads(response.decode("utf-8")) except OSError as exc: raise exc g = response["DSL"] g = Grammar(g["logVariable"], [(l, p.infer(), p) for production in g["productions"] for l in [production["logProbability"]] for p in [Program.parse(production["expression"])]], continuationType=g0.continuationType) frontiers = { original.task: Frontier([ FrontierEntry(p, logLikelihood=e["logLikelihood"], logPrior=g.logLikelihood(original.task.request, p)) for e in new["programs"] for p in [Program.parse(e["program"])] ], task=original.task) for original, new in zip(frontiers, response["frontiers"]) } frontiers = [ frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers ] if iterations == 1 and len(g) > len(g0): eprint("Grammar changed - running another round of consolidation.") continue else: eprint("Finished consolidation.") return g, frontiers
def update_grammar(self): programs = [ Invented(smt.program.prog_ori) for smt in self.semantics if smt.solved and smt.program.arity > 0 ] self.grammar = Grammar.uniform(McCarthyPrimitives() + programs)