def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) tasks = make_list_bootstrap_tasks() print(tasks) maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] baseGrammar = Grammar.uniform(McCarthyPrimitives()) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: # necessary = train_necessary(t) # if not necessary: # continue # if necessary == "some": # train_some[t.name.split()[0]].append(t) # else: t.mustTrain = True # for k in sorted(train_some): # ts = train_some[k] # random.shuffle(ts) # ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] explorationCompression(baseGrammar, train, testingTasks=test, **args)
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) dataset = args.pop("dataset") tasks = { "Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(), "bootstrap": make_list_bootstrap_tasks, "sorting": sortBootstrap, "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105], "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928], "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"), }[dataset]() maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: necessaryTasks = [] # maxTasks will not consider these if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1": necessaryTasks = tasks[:105] eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] tasks = necessaryTasks + tasks if dataset.startswith("Lucas"): # extra tasks for filter tasks.extend([ Task("remove empty lists", arrow(tlist(tlist(tbool)), tlist(tlist(tbool))), [((ls, ), list(filter(lambda l: len(l) > 0, ls))) for _ in range(15) for ls in [[[ random.random() < 0.5 for _ in range(random.randint(0, 3)) ] for _ in range(4)]]]), Task("keep squares", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs))) for _ in range(15) for xs in [[ random.choice([0, 1, 4, 9, 16, 25]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), Task("keep primes", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list( filter( lambda x: x in {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs))) for _ in range(15) for xs in [[ random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), ]) for i in range(4): tasks.extend([ Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x == i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x != i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: not x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]) ]) def isIdentityTask(t): return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples) eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function") tasks = [t for t in tasks if not isIdentityTask(t)] prims = { "base": basePrimitives, "McCarthy": McCarthyPrimitives, "common": bootstrapTarget_extra, "noLength": no_length, "rich": primitives }[args.pop("primitives")]() haveLength = not args.pop("noLength") haveMap = not args.pop("noMap") haveUnfold = not args.pop("noUnfold") eprint(f"Including map as a primitive? {haveMap}") eprint(f"Including length as a primitive? {haveLength}") eprint(f"Including unfold as a primitive? {haveUnfold}") baseGrammar = Grammar.uniform([p for p in prims if (p.name != "map" or haveMap) and \ (p.name != "unfold" or haveUnfold) and \ (p.name != "length" or haveLength)]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: necessary = train_necessary(t) if not necessary: continue if necessary == "some": train_some[t.name.split()[0]].append(t) else: t.mustTrain = True for k in sorted(train_some): ts = train_some[k] random.shuffle(ts) ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) if True: test = [t for t in test if t.name not in EASYLISTTASKS] eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] explorationCompression(baseGrammar, train, testingTasks=test, **args)
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on regular expressions. """ #for dreaming #parse use_ll_cutoff use_ll_cutoff = args.pop('use_ll_cutoff') if not use_ll_cutoff is False: #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff #will be tuples of that string followed by the actual model if len(use_ll_cutoff) == 1: train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks)) test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks)) else: assert len(use_ll_cutoff) == 2 train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks)) test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks)) else: train_ll_cutoff = None test_ll_cutoff = None regexTasks = {"old": makeOldTasks, "short": makeShortTasks, "long": makeLongTasks, "words": makeWordTasks, "number": makeNumberTasks, "handpicked": makeHandPickedTasks, "new": makeNewTasks, "newNumber": makeNewNumberTasks }[args.pop("tasks")] tasks = regexTasks() # TODO eprint("Generated", len(tasks), "tasks") maxTasks = args.pop("maxTasks") if len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) seed = 42 # previously this was hardcoded and never changed random.seed(seed) random.shuffle(tasks) del tasks[maxTasks:] maxExamples = args.pop("maxExamples") split = args.pop("split") test, train = testTrainSplit(tasks, split) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) test = add_cutoff_values(test, test_ll_cutoff) train = add_cutoff_values(train, train_ll_cutoff) eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff ) if args.pop("use_str_const"): assert args["primitives"] == "strConst" or args["primitives"] == "reduced" ConstantInstantiateVisitor.SINGLE = \ ConstantInstantiateVisitor() test = add_string_constants(test) train = add_string_constants(train) eprint("added string constants to test and train") for task in test + train: if len(task.examples) > maxExamples: task.examples = task.examples[:maxExamples] task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const}) task.examples = [(xs, [y for y in ys ]) for xs,ys in task.examples ] task.maxParameters = 1 # from list stuff primtype = args.pop("primitives") prims = {"base": basePrimitives, "alt1": altPrimitives, "alt2": alt2Primitives, "easyWords": easyWordsPrimitives, "concat": concatPrimitives, "reduced": reducedConcatPrimitives, "strConst": strConstConcatPrimitives }[primtype] extractor = { "learned": LearnedFeatureExtractor, "json": MyJSONFeatureExtractor }[args.pop("extractor")] extractor.H = args.pop("hidden") #stardecay = args.stardecay #stardecay = args.pop('stardecay') #decaystr = 'd' + str(stardecay) import datetime timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/regex/%s"%timestamp os.system("mkdir -p %s"%outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/regex"%(outputDirectory), "evaluationTimeout": 0.005, "topk_use_only_likelihood": True, "maximumFrontier": 10, "compressor": "ocaml" }) #### # use the #prim_list = prims(stardecay) prim_list = prims() specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"] n_base_prim = len(prim_list) - len(specials) productions = [ (math.log(0.5 / float(n_base_prim)), prim) if prim.name not in specials else ( math.log(0.10), prim) for prim in prim_list] baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex) #baseGrammar = Grammar.uniform(prims()) #for i in range(100): # eprint(baseGrammar.sample(tpregex)) #eprint(baseGrammar) #explore test_stuff = args.pop("debug") if test_stuff: eprint(baseGrammar) eprint("sampled programs from prior:") for i in range(100): #100 eprint(baseGrammar.sample(test[0].request,maximumDepth=1000)) eprint("""half the probability mass is on higher-order primitives. Therefore half of enumerated programs should have more than one node. However, we do not observe this. Instead we see a very small fraction of programs have more than one node. So something seems to be wrong with grammar.sample. Furthermore: observe the large print statement above. This prints the candidates for sampleDistribution in grammar.sample. the first element of each tuple is the probability passed into sampleDistribution. Half of the probability mass should be on the functions, but instead they are equally weighted with the constants. If you look at the grammar above, this is an error!!!! """) assert False del args["likelihoodModel"] explorationCompression(baseGrammar, train, testingTasks = test, **args)
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) tasks = make_list_bootstrap_tasks() print(tasks) maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] primitives = McCarthyPrimitives() from dreamcoder.program import Program, Invented # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))") # plus = Invented(plus) # primitives.append(plus) # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))") # minus = Invented(minus) # primitives.append(minus) # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))") # times = Invented(times) # primitives.append(times) baseGrammar = Grammar.uniform(primitives) baseGrammar = Grammar( 0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p) for p in primitives]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: # necessary = train_necessary(t) # if not necessary: # continue # if necessary == "some": # train_some[t.name.split()[0]].append(t) # else: t.mustTrain = True # for k in sorted(train_some): # ts = train_some[k] # random.shuffle(ts) # ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] result = explorationCompression(baseGrammar, train, testingTasks=test, **args) print([x.bestPosterior for x in result.taskSolutions.values()])
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on LOGO tasks. """ # The below legacy global statement is required since prefix_dreams is used by LogoFeatureCNN. # TODO(lcary): use argument passing instead of global variables. global prefix_dreams # The below global statement is required since primitives is modified within main(). # TODO(lcary): use a function call to retrieve and declare primitives instead. global primitives visualizeCheckpoint = args.pop("visualize") if visualizeCheckpoint is not None: with open(visualizeCheckpoint, 'rb') as handle: primitives = pickle.load(handle).grammars[-1].primitives visualizePrimitives(primitives) sys.exit(0) dreamCheckpoint = args.pop("dreamCheckpoint") dreamDirectory = args.pop("dreamDirectory") proto = args.pop("proto") if dreamCheckpoint is not None: #outputDreams(dreamCheckpoint, dreamDirectory) enumerateDreams(dreamCheckpoint, dreamDirectory) sys.exit(0) animateCheckpoint = args.pop("animate") if animateCheckpoint is not None: animateSolutions(loadPickle(animateCheckpoint).allFrontiers) sys.exit(0) target = args.pop("target") red = args.pop("reduce") save = args.pop("save") prefix = args.pop("prefix") prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/" prefix_pickles = prefix + "/logo." + ('.'.join(target)) if not os.path.exists(prefix_dreams): os.makedirs(prefix_dreams) tasks = makeTasks(target, proto) eprint("Generated", len(tasks), "tasks") costMatters = args.pop("cost") for t in tasks: t.specialTask[1]["costMatters"] = costMatters # disgusting hack - include whether cost matters in the dummy input if costMatters: t.examples = [(([1]), t.examples[0][1])] os.chdir("prototypical-networks") subprocess.Popen(["python", "./protonet_server.py"]) time.sleep(3) os.chdir("..") test, train = testTrainSplit(tasks, args.pop("split")) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) try: if test: montageTasks(test, "test_") montageTasks(train, "train_") except: eprint( "WARNING: couldn't generate montage. Do you have an old version of scipy?" ) if red is not []: for reducing in red: try: with open(reducing, 'r') as f: prods = json.load(f) for e in prods: e = Program.parse(e) if e.isInvented: primitives.append(e) except EOFError: eprint("Couldn't grab frontier from " + reducing) except IOError: eprint("Couldn't grab frontier from " + reducing) except json.decoder.JSONDecodeError: eprint("Couldn't grab frontier from " + reducing) primitives = list(OrderedDict((x, True) for x in primitives).keys()) baseGrammar = Grammar.uniform(primitives, continuationType=turtle) eprint(baseGrammar) timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/logo/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) generator = ecIterator(baseGrammar, train, testingTasks=test, outputPrefix="%s/logo" % outputDirectory, evaluationTimeout=0.01, **args) r = None for result in generator: iteration = len(result.learningCurve) dreamDirectory = "%s/dreams_%d" % (outputDirectory, iteration) os.system("mkdir -p %s" % dreamDirectory) eprint("Dreaming into directory", dreamDirectory) dreamFromGrammar(result.grammars[-1], dreamDirectory) r = result needsExport = [ str(z) for _, _, z in r.grammars[-1].productions if z.isInvented ] if save is not None: with open(save, 'w') as f: json.dump(needsExport, f)
] baseGrammar = Grammar.uniform(primitives) random.seed(42) tasks = makeTasks() smooth = arguments.pop('smooth') for t in tasks: t.features = drawFunction(200, 10., t.f) delattr(t, 'f') if smooth: t.likelihoodThreshold = None eprint("Got %d tasks..." % len(tasks)) test, train = testTrainSplit(tasks, 100) random.shuffle(test) test = test[:100] eprint("Training on", len(train), "tasks") if False: hardTasks = [t for t in train if '/' in t.name and '[' in t.name] for clamp in [True, False]: for lr in [0.1, 0.05, 0.5, 1.]: for steps in [50, 100, 200]: for attempts in [10, 50, 100, 200]: for s in [0.1, 0.5, 1, 3]: start = time.time() losses = callCompiled(debugMany, hardTasks, clamp, lr, steps, attempts, s) losses = dict(zip(hardTasks, losses))