def makeNewTasks(include_only=None): #load new data: taskfile = os.path.join(get_data_dir(), "csv_filtered_all_background_novel.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data['background'] #a list of indices if include_only: regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task['train']]) for i, task in enumerate(tasklist) if i in include_only ] else: regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task['train']]) for i, task in enumerate(tasklist) ] #for i in train_list: # regextasks[i].mustTrain = True return regextasks
def regexHeldOutExamples(task, include_only=None): #load new data: global REGEXTASKS if REGEXTASKS is None: taskfile = os.path.join(get_data_dir(), "csv_filtered_all_background_novel.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data['background'] #a list of indices if include_only: regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in _task['test']]) for i, _task in enumerate(tasklist) if i in include_only ] else: regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in _task['test']]) for i, _task in enumerate(tasklist) ] #for i in train_list: # regextasks[i].mustTrain = True REGEXTASKS = {t.name: t.examples for t in regextasks} fullTask = REGEXTASKS[task.name] return fullTask
def manualLogoTask(name, expression, proto=False, needToTrain=False, supervise=False, lambdaCalculus=False): p = Program.parse(expression) if lambdaCalculus else parseLogo(expression) from dreamcoder.domains.logo.logoPrimitives import primitives from dreamcoder.grammar import Grammar g = Grammar.uniform(primitives, continuationType=turtle) gp = Grammar.uniform(primitives) try: l = g.logLikelihood(arrow(turtle, turtle), p) lp = gp.logLikelihood(arrow(turtle, turtle), p) assert l >= lp eprint(name, -l, "nats") except: eprint("WARNING: could not calculate likelihood of manual logo", p) attempts = 0 while True: [output, highresolution] = drawLogo(p, p, resolution=[28, 128], cost=True) if output == "timeout" or highresolution == "timeout": attempts += 1 else: break if attempts > 0: eprint( f"WARNING: Took {attempts} attempts to render task {name} within timeout" ) cost = output[1] output = output[0] assert highresolution[1] == cost highresolution = highresolution[0] shape = list(map(int, output)) highresolution = list(map(float, highresolution)) t = Task(name, arrow(turtle, turtle), [(([0]), shape)]) t.mustTrain = needToTrain t.proto = proto t.specialTask = ("LOGO", {"proto": proto}) t.specialTask[1]["cost"] = cost * 1.05 t.highresolution = highresolution if supervise: t.supervisedSolution = p return t
def McCarthyPrimitives(): "These are < primitives provided by 1959 lisp as introduced by McCarthy" return [ Primitive("if", arrow(tbool, t0, t0, t0), _if), primitiveRecursion1, primitiveRecursion2, # Primitive("gt?", arrow(tint, tint, tbool), _gt), Primitive("positive?", arrow(tint, tbool), _positive), Primitive("incr", arrow(tint, tint), _succ), Primitive("decr", arrow(tint, tint), _desc) ] + [Primitive(str(j), tint, j) for j in range(2)]
def easyWordsPrimitives(): return [ Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[10:62] if i not in disallowed_list ] + [ Primitive("r_d", tpregex, pregex.d), Primitive("r_s", tpregex, pregex.s), #Primitive("r_w", tpregex, pregex.w), Primitive("r_l", tpregex, pregex.l), Primitive("r_u", tpregex, pregex.u), Primitive("r_kleene", arrow(tpregex, tpregex), _kleene), Primitive("r_plus", arrow(tpregex, tpregex), _plus), Primitive("r_maybe", arrow(tpregex, tpregex), _maybe), Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt), Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat), ]
def retrieveJSONTasks(filename, features=False): """ For JSON of the form: {"name": str, "type": {"input" : bool|int|list-of-bool|list-of-int, "output": bool|int|list-of-bool|list-of-int}, "examples": [{"i": data, "o": data}]} """ with open(filename, "r") as f: loaded = json.load(f) TP = { "bool": tbool, "int": tint, "list-of-bool": tlist(tbool), "list-of-int": tlist(tint), } return [ Task( item["name"], arrow(TP[item["type"]["input"]], TP[item["type"]["output"]]), [((ex["i"], ), ex["o"]) for ex in item["examples"]], features=(None if not features else list_features( [((ex["i"], ), ex["o"]) for ex in item["examples"]])), cache=False, ) for item in loaded ]
def makeTask(name, f): xs = [x / 100. for x in range(-500, 500)] maximum = 10 N = 50 inputs = [] outputs = [] for x in xs: try: y = f(x) except BaseException: continue if abs(y) < maximum: inputs.append(float(x)) outputs.append(float(y)) if len(inputs) >= N: ex = list(zip(inputs, outputs)) ex = ex[::int(len(ex) / N)][:N] t = DifferentiableTask(name, arrow(treal, treal), [((x, ), y) for x, y in ex], BIC=1., restarts=360, steps=50, likelihoodThreshold=-0.05, temperature=0.1, maxParameters=6, loss=squaredErrorLoss) t.f = f return t return None
def dreamFromGrammar(g, directory, N=100): if isinstance(g, Grammar): programs = [ p for _ in range(N) for p in [g.sample(arrow(turtle, turtle), maximumDepth=20)] if p is not None ] else: programs = g drawLogo(*programs, pretty=False, smoothPretty=False, resolution=512, filenames=[f"{directory}/{n}.png" for n in range(len(programs))], timeout=1) drawLogo(*programs, pretty=True, smoothPretty=False, resolution=512, filenames=[ f"{directory}/{n}_pretty.png" for n in range(len(programs)) ], timeout=1) drawLogo(*programs, pretty=False, smoothPretty=True, resolution=512, filenames=[ f"{directory}/{n}_smooth_pretty.png" for n in range(len(programs)) ], timeout=1) for n, p in enumerate(programs): with open(f"{directory}/{n}.dream", "w") as handle: handle.write(str(p))
def makeOldTasks(): # a series of tasks taskfile = os.path.join(get_data_dir(), 'data_filtered.json') #task_list = pickle.load(open(taskfile, 'rb')) with open(taskfile) as f: file_contents = f.read() task_list = json.loads(file_contents) # if I were to just dump all of them: regextasks = [ Task("Luke data column no." + str(i), arrow(tpregex, tpregex), [((), example) for example in task_list[i]]) for i in range(len(task_list)) ] """ regextasks = [ Task("length bool", arrow(none,tstr), [((l,), len(l)) for _ in range(10) for l in [[flip() for _ in range(randint(0,10)) ]] ]), Task("length int", arrow(none,tstr), [((l,), len(l)) for _ in range(10) for l in [randomList()] ]), ] """ return regextasks # some list of tasks
def makeNumberTasks(): #load new data: taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data[0] #a list of indices #match_col(data[0],'\d*\.\d*') raw_decimals = [ 121, 122, 163, 164, 165, 170, 172, 173, 175, 178, 218, 228, 230, 231, 252, 253, 254, 258, 259, 305, 320, 330, 334, 340, 348, 350, 351, 352, 353, 355, 357, 358, 361, 363, 364, 371, 380, 382, 409, 410, 411, 447, 448, 449, 450, 458, 469, 471, 533, 562, 564 ] decimals_pos_neg_dollar = [ 3, 4, 5, 6, 7, 13, 16, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 53, 54, 55, 57, 58, 60, 61, 63, 64, 65, 66, 68, 69, 70, 71, 73, 74, 77, 78, 80, 81, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 126, 128, 129, 131, 132, 134, 135, 139, 146, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 193, 194, 195, 204, 205, 207, 209, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 249, 250, 251, 252, 253, 254, 255, 256, 258, 259, 260, 261, 263, 266, 267, 270, 271, 272, 277, 299, 301, 302, 305, 306, 307, 309, 312, 313, 315, 319, 320, 324, 326, 327, 330, 334, 340, 348, 350, 351, 352, 353, 354, 355, 356, 357, 358, 361, 362, 363, 364, 368, 371, 373, 377, 380, 382, 400, 401, 402, 403, 405, 406, 409, 410, 411, 413, 435, 439, 446, 447, 448, 449, 450, 451, 452, 453, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 469, 470, 471, 477, 498, 500, 502, 503, 507, 512, 518, 519, 520, 532, 533, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 564, 565, 572, 577 ] #match_col(data[0],'(\d*,?\d*)+') commas = [] #match_col(data[0],'(\d*,?\d*)+') commas_and_all = [] #full_list = test_list + train_list train_list = [] full_list = decimals_pos_neg_dollar regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task]) for i, task in enumerate(tasklist) if i in full_list ] for i in train_list: regextasks[i].mustTrain = True return regextasks
def basePrimitives(): return [Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[:-4] if i not in disallowed_list ] + [ Primitive("string_" + name, tpregex, pregex.String(char)) for char, name in disallowed ] + [ Primitive("r_dot", tpregex, pregex.dot), Primitive("r_d", tpregex, pregex.d), Primitive("r_s", tpregex, pregex.s), Primitive("r_w", tpregex, pregex.w), Primitive("r_l", tpregex, pregex.l), Primitive("r_u", tpregex, pregex.u), Primitive("r_kleene", arrow(tpregex, tpregex), _kleene), Primitive("r_plus", arrow(tpregex, tpregex), _plus), Primitive("r_maybe", arrow(tpregex, tpregex), _maybe), Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt), Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat), ]
def matchEmpericalNoLetterPrimitives(corpus): return lambda: [ Primitive("empty_string", tpregex, pregex.String("")) ] + [ Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[:-4] if i not in disallowed_list + list(printable[10:62]) ] + [ Primitive("string_" + name, tpregex, pregex.String(char)) for char, name in disallowed ] + [ Primitive("r_dot", tpregex, emp_dot_no_letter(corpus) ), Primitive("r_d", tpregex, emp_d(corpus) ), Primitive("r_s", tpregex, pregex.s), Primitive("r_kleene", arrow(tpregex, tpregex), _kleene), #Primitive("r_plus", arrow(tpregex, tpregex), _plus), #Primitive("r_maybe", arrow(tpregex, tpregex), _maybe), Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt), Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat), ]
def McCarthyPrimitives(): "These are < primitives provided by 1959 lisp as introduced by McCarthy" primitives = [ Primitive("0", tint, 0), Primitive("incr", arrow(tint, tint), _incr), Primitive("decr0", arrow(tint, tint), _decr0), # Primitive("if", arrow(tbool, t0, t0, t0), _if), # Primitive("eq0", arrow(tint, tbool), _eq0), Primitive("if0", arrow(t0, t0, t0, t0), _if0), # primitiveRecursion1, primitiveRecursion2, # Primitive("gt?", arrow(tint, tint, tbool), _gt), # Primitive("positive?", arrow(tint, tbool), _positive), # Primitive("+", arrow(tint, tint, tint), _add), # Primitive("-0", arrow(tint, tint, tint), _minus0), ] return primitives
def makeWordTasks(): #load new data: taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data[0] #a list of indices all_upper = [0, 2, 8, 9, 10, 11, 12, 17, 18, 19, 20, 22] all_lower = [1] # match_col(data[0],'\\u(\l+)') one_capital_lower_plus = [ 144, 200, 241, 242, 247, 296, 390, 392, 444, 445, 481, 483, 485, 489, 493, 542, 549, 550, 581 ] #match_col(data[0],'(\l ?)+') lower_with_maybe_spaces = [ 1, 42, 47, 99, 100, 102, 201, 246, 248, 293, 294, 345, 437, 545, 590 ] #match_col(data[0],'(\\u\l+ ?)+') capital_then_lower_maybe_spaces = [ 144, 200, 241, 242, 247, 296, 390, 392, 395, 438, 444, 445, 481, 483, 484, 485, 487, 489, 493, 494, 542, 546, 549, 550, 578, 581, 582, 588, 591, 624, 629 ] #match_col(data[0],'(\\u+ ?)+') all_caps_spaces = [ 0, 2, 8, 9, 10, 11, 12, 17, 18, 19, 20, 22, 25, 26, 35, 36, 43, 45, 46, 49, 50, 52, 56, 59, 87, 89, 95, 101, 140, 147, 148, 149, 199, 332, 336, 397, 491, 492, 495, 580, 610 ] #one_capital_and_lower = [566, 550, 549, 542, 505, 493, 494, 489, 488, 485, 483, 481, 445, 444, 438, 296, 241, 242, 200, ] #all_lower_with_a_space = [545] #all_lower_maybe_space = [534] #one_capital_lower_maybe_spaces = [259, 262, 263, 264] #full_list = test_list + train_list train_list = [] full_list = all_upper + all_lower + one_capital_lower_plus + lower_with_maybe_spaces + capital_then_lower_maybe_spaces + all_caps_spaces regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task]) for i, task in enumerate(tasklist) if i in full_list ] for i in train_list: regextasks[i].mustTrain = True return regextasks
def demoLogoTasks(): import scipy.misc import numpy as np g0 = Grammar.uniform(primitives, continuationType=turtle) eprint("dreaming into /tmp/dreams_0...") N = 1000 programs = [ p for _ in range(N) for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)] if p is not None ] os.system("mkdir -p /tmp/dreams_0") for n, p in enumerate(programs): with open(f"/tmp/dreams_0/{n}.dream", "w") as handle: handle.write(str(p)) drawLogo(*programs, pretty=True, smoothPretty=False, resolution=512, filenames=[ f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs)) ], timeout=1) if len(sys.argv) > 1: tasks = makeTasks(sys.argv[1:], proto=False) else: tasks = makeTasks(['all'], proto=False) montageTasks(tasks, columns=16, testTrain=True) for n, t in enumerate(tasks): a = t.highresolution w = int(len(a)**0.5) scipy.misc.imsave('/tmp/logo%d.png' % n, np.array([a[i:i + w] for i in range(0, len(a), w)])) logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace( '/', '_').replace("-", "_") + ".png" #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}") os.system( f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}" ) eprint(len(tasks), "tasks") eprint(sum(t.mustTrain for t in tasks), "need to be trained on") for t in dSLDemo(): a = t.highresolution w = int(len(a)**0.5) scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name, np.array([a[i:i + w] for i in range(0, len(a), w)])) os.system( f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png" ) tasks = [t for t in tasks if t.mustTrain] random.shuffle(tasks) montageTasks(tasks[:16 * 3], "subset", columns=16) montageTasks(rotationalSymmetryDemo(), "rotational")
def tasksOfPrograms(self, ps, types): images = drawLogo(*ps, resolution=128) if len(ps) == 1: images = [images] tasks = [] for i in images: if isinstance(i, str): tasks.append(None) else: t = Task("Helm", arrow(turtle, turtle), []) t.highresolution = i tasks.append(t) return tasks
def make_task(self): if self.solved or self.total_examples == 0: return None task_type = arrow(*([tint] * (self.arity + 1))) examples = [] n_examples = min(self.total_examples, 100) # examples = random.choices([e for e, _ in self.examples], weights=[p for _, p in self.examples], k=n_examples) for e, p in self.examples: examples.extend([e] * int(round(p * n_examples))) examples = examples[:n_examples] return Task(str(self.idx), task_type, examples)
def argumentChoices(t): if t == turtle: return [Index(0)] elif t == arrow(turtle,turtle): return subprograms elif t == tint: return specialNumbers.get(str(p),numbers) elif t == tangle: return specialAngles.get(str(p),angles) elif t == tlength: return specialDistances.get(str(p),distances) else: return []
def genericType(t): if t.name == "real": return treal elif t.name == "positive": return treal elif t.name == "vector": return tlist(treal) elif t.name == "list": return tlist(genericType(t.arguments[0])) elif t.isArrow(): return arrow(genericType(t.arguments[0]), genericType(t.arguments[1])) else: assert False, "could not make type generic: %s" % t
def no_length(): """this is the primitives without length because one of the reviewers wanted this""" return [p for p in bootstrapTarget() if p.name != "length"] + [ Primitive("*", arrow(tint, tint, tint), _multiplication), Primitive("mod", arrow(tint, tint, tint), _mod), Primitive("gt?", arrow(tint, tint, tbool), _gt), Primitive("eq?", arrow(tint, tint, tbool), _eq), Primitive("is-prime", arrow(tint, tbool), _isPrime), Primitive("is-square", arrow(tint, tbool), _isSquare), ]
def bootstrapTarget_extra(): """This is the bootstrap target plus list domain specific stuff""" return bootstrapTarget() + [ Primitive("*", arrow(tint, tint, tint), _multiplication), Primitive("mod", arrow(tint, tint, tint), _mod), Primitive("gt?", arrow(tint, tint, tbool), _gt), Primitive("eq?", arrow(tint, tint, tbool), _eq), Primitive("is-prime", arrow(tint, tbool), _isPrime), Primitive("is-square", arrow(tint, tbool), _isSquare), ]
def McCarthyPrimitives(): "These are < primitives provided by 1959 lisp as introduced by McCarthy" return [ Primitive("empty", tlist(t0), []), Primitive("cons", arrow(t0, tlist(t0), tlist(t0)), _cons), Primitive("car", arrow(tlist(t0), t0), _car), Primitive("cdr", arrow(tlist(t0), tlist(t0)), _cdr), Primitive("empty?", arrow(tlist(t0), tbool), _isEmpty), #Primitive("unfold", arrow(t0, arrow(t0,t1), arrow(t0,t0), arrow(t0,tbool), tlist(t1)), _isEmpty), #Primitive("1+", arrow(tint,tint),None), # Primitive("range", arrow(tint, tlist(tint)), range), # Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map), # Primitive("index", arrow(tint,tlist(t0),t0),None), # Primitive("length", arrow(tlist(t0),tint),None), primitiveRecursion1, #primitiveRecursion2, Primitive("gt?", arrow(tint, tint, tbool), _gt), Primitive("if", arrow(tbool, t0, t0, t0), _if), Primitive("eq?", arrow(tint, tint, tbool), _eq), Primitive("+", arrow(tint, tint, tint), _addition), Primitive("-", arrow(tint, tint, tint), _subtraction), ] + [Primitive(str(j), tint, j) for j in range(2)]
def makeLongTasks(): #load new data: taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data[0] #a list of indices regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task]) for i, task in enumerate(tasklist) ] return regextasks
def make_task(self): min_examples = 30 if self.arity is not None and self.arity > 0 else 10 min_examples = min_examples if not self.fewshot else 0 max_examples = 100 examples = self.examples if len(examples) < min_examples or self.solved or None in [ x[1] for x in examples ]: return None task_type = arrow(*([tint] * (self.arity + 1))) if len(examples) > max_examples: wrong_examples = [e for e, r in zip(examples, self.res) if not r] right_examples = [e for e, r in zip(examples, self.res) if r] right_examples = random.choices(right_examples, k=max_examples - len(wrong_examples)) examples = wrong_examples + right_examples examples = random.sample(examples, k=max_examples) return Task(str(self.idx), task_type, examples)
def algolispPrimitives(): return [ Primitive("fn_call", arrow(tfunction, tlist(tsymbol), tsymbol), _fn_call), Primitive("lambda1_call", arrow(tfunction, tlist(tsymbol), tsymbol), lambda f: lambda sx: ["lambda1", [f] + sx] if type(sx)==list else ["lambda1", [f] + [sx]] ), Primitive("lambda2_call", arrow(tfunction, tlist(tsymbol), tsymbol), lambda f: lambda sx: ["lambda2", [f] + sx] if type(sx)==list else ["lambda2", [f] + [sx]] ), #symbol converters: # SYMBOL = constant | argument | function_call | function | lambda Primitive("symbol_constant", arrow(tconstant, tsymbol), lambda x: x), Primitive("symbol_function", arrow(tfunction, tsymbol), lambda x: x), #list converters Primitive('list_init_symbol', arrow(tsymbol, tlist(tsymbol)), lambda symbol: [symbol] ), Primitive('list_add_symbol', arrow(tsymbol, tlist(tsymbol), tlist(tsymbol)), lambda symbol: lambda symbols: symbols + [symbol] if type(symbols) == list else [symbols] + [symbol]) ] + [ #functions: Primitive(ec_name, tfunction, algo_name) for algo_name, ec_name in fn_lookup.items() ] + [ #Constants Primitive(ec_name, tconstant, algo_name) for algo_name, ec_name in const_lookup.items() ]
def makeHandPickedTasks(): #load new data: taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p") with open(taskfile, 'rb') as handle: data = dill.load(handle) tasklist = data[0] #a list of indices full_list = list(range(199)) + \ [209,218,222,223,224,225,226] + \ list(range(222,233)) + \ [235,237,238,239,243,244,245,252,253,254,255,257,258,259,260,261,264,265,269,272,274] + \ list(range(275,291)) + \ [295,297,300,303,304,305,306,310,311,312,314,315,316,320,321,323,327,329,330,333,334,335,337,338,339,340,341,342,343,344] + \ list(range(348,359)) + \ [361,369,373,379,380,382,387,403,405,407,408] + \ list(range(409,417)) + \ list(range(418,437)) + \ list(range(440,444)) + \ list(range(446,452)) + \ list(range(456,460)) + \ list(range(466,472)) + \ [503,504] regextasks = [ Task("Data column no. " + str(i), arrow(tpregex, tpregex), [((), example) for example in task]) for i, task in enumerate(tasklist) if i in full_list ] #for i in train_list: # regextasks[i].mustTrain = True return regextasks
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) dataset = args.pop("dataset") tasks = { "Lucas-old": lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(), "bootstrap": make_list_bootstrap_tasks, "sorting": sortBootstrap, "Lucas-depth1": lambda: retrieveJSONTasks("data/list_tasks2.json")[:105], "Lucas-depth2": lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928], "Lucas-depth3": lambda: retrieveJSONTasks("data/list_tasks2.json"), }[dataset]() maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: necessaryTasks = [] # maxTasks will not consider these if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1": necessaryTasks = tasks[:105] eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] tasks = necessaryTasks + tasks if dataset.startswith("Lucas"): # extra tasks for filter tasks.extend([ Task("remove empty lists", arrow(tlist(tlist(tbool)), tlist(tlist(tbool))), [((ls, ), list(filter(lambda l: len(l) > 0, ls))) for _ in range(15) for ls in [[[ random.random() < 0.5 for _ in range(random.randint(0, 3)) ] for _ in range(4)]]]), Task("keep squares", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs))) for _ in range(15) for xs in [[ random.choice([0, 1, 4, 9, 16, 25]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), Task("keep primes", arrow(tlist(tint), tlist(tint)), [ ((xs, ), list( filter( lambda x: x in {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs))) for _ in range(15) for xs in [[ random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37]) if random.random() < 0.5 else random.randint(0, 9) for _ in range(7) ]] ]), ]) for i in range(4): tasks.extend([ Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x == i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x != i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]), Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)), [((xs, ), list(filter(lambda x: not x > i, xs))) for _ in range(15) for xs in [[random.randint(0, 6) for _ in range(5)]]]) ]) def isIdentityTask(t): return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples) eprint("Removed", sum(isIdentityTask(t) for t in tasks), "tasks that were just the identity function") tasks = [t for t in tasks if not isIdentityTask(t)] prims = { "base": basePrimitives, "McCarthy": McCarthyPrimitives, "common": bootstrapTarget_extra, "noLength": no_length, "rich": primitives }[args.pop("primitives")]() haveLength = not args.pop("noLength") haveMap = not args.pop("noMap") haveUnfold = not args.pop("noUnfold") eprint(f"Including map as a primitive? {haveMap}") eprint(f"Including length as a primitive? {haveLength}") eprint(f"Including unfold as a primitive? {haveUnfold}") baseGrammar = Grammar.uniform([p for p in prims if (p.name != "map" or haveMap) and \ (p.name != "unfold" or haveUnfold) and \ (p.name != "length" or haveLength)]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: necessary = train_necessary(t) if not necessary: continue if necessary == "some": train_some[t.name.split()[0]].append(t) else: t.mustTrain = True for k in sorted(train_some): ts = train_some[k] random.shuffle(ts) ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) if True: test = [t for t in test if t.name not in EASYLISTTASKS] eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] explorationCompression(baseGrammar, train, testingTasks=test, **args)
def isIntFunction(tp): try: Context().unify(tp, arrow(tint, t0)) return True except UnificationFailure: return False
def robustFillPrimitives(max_len=100, max_index=5): return [ #CPrimitive("concat2", arrow(texpression, texpression, tprogram), _concat2), CPrimitive("concat1", arrow(texpression, tprogram), _concat1), CPrimitive("concat_list", arrow(texpression, tprogram, tprogram), _concat_list), #expressions CPrimitive("Constant", arrow(tcharacter, texpression), lambda x: lambda y: x), # add a constraint CPrimitive("apply", arrow(tnesting, tsubstr, texpression), lambda n: lambda sub: lambda string: n(sub(string))), CPrimitive("apply_n", arrow(tnesting, tnesting, texpression), lambda n1: lambda n2: lambda string: n1(n2(string))), CPrimitive("expr_n", arrow(tnesting, texpression), lambda x: x), CPrimitive("expr_f", arrow(tsubstr, texpression), lambda x: x) ] + [ #substrings CPrimitive("SubStr", arrow(tposition, tposition, tsubstr), _substr), # handled CPrimitive("GetSpan", arrow(tregex, tindex, tboundary, tregex, tindex, tboundary, tsubstr), _getspan, _getspan_const) #TODO constraint ] + [ #nestings CPrimitive("GetToken" + name + str(i), tnesting, _gettoken(tp, i), _gettoken_const(tp, i)) for name, tp in types.items() for i in range(-max_index, max_index) ] + [ CPrimitive("ToCase_ProperCase", tnesting, lambda x: x.title(), (defaultdict(int, {r'[A-Z][a-z]+': 1}), 1)), CPrimitive("ToCase_AllCapsCase", tnesting, lambda x: x.upper(), (defaultdict(int, {r'[A-Z]': 1}), 1)), CPrimitive("ToCase_LowerCase", tnesting, lambda x: x.lower(), (defaultdict(int, {r'[a-z]': 1}), 1)) ] + [ CPrimitive("Replace_" + name1 + name2, tnesting, _replace( char1, char2), (defaultdict(int, {char1: 1}), 1)) for name1, char1 in delim_dict.items() for name2, char2 in delim_dict.items() if char1 is not char2 ] + [ #CPrimitive("Trim", tnesting, _trim), #TODO ] + [ CPrimitive("GetUpTo" + name, tnesting, _getupto(reg), (defaultdict(int, {reg: 1}), 1)) for name, reg in regexes.items() ] + [ CPrimitive("GetFrom" + name, tnesting, _getfrom(reg), (defaultdict(int, {reg: 1}), 1)) for name, reg in regexes.items() ] + [ CPrimitive("GetFirst_" + name + str(i), tnesting, _getfirst(tp, i), (defaultdict(int, {tp: i}), i + 1 if i >= 0 else abs(i))) for name, tp in types.items() for i in list(range(-max_index, 0)) + list(range(1, max_index + 1)) ] + [ CPrimitive("GetAll_" + name, tnesting, _getall(reg), (defaultdict(int, {reg: 1}), 1)) for name, reg in types.items() ] + [ #regexes CPrimitive("type_to_regex", arrow(ttype, tregex), lambda x: x), #TODO also make disappear CPrimitive("delimiter_to_regex", arrow(tdelimiter, tregex), lambda x: re.escape(x)) #TODO also make disappear ] + [ #types CPrimitive("Number", ttype, r'\d+', r'\d+'), #TODO CPrimitive("Word", ttype, r'\w+', r'\w+'), #TODO CPrimitive("Alphanum", ttype, r'\w', r'\w'), #TODO CPrimitive("PropCase", ttype, r'[A-Z][a-z]+', r'[A-Z][a-z]+'), #TODO CPrimitive("AllCaps", ttype, r'[A-Z]', r'[A-Z]'), #TODO CPrimitive("Lower", ttype, r'[a-z]', r'[a-z]'), #TODO CPrimitive("Digit", ttype, r'\d', r'\d'), #TODO CPrimitive("Char", ttype, r'.', r'.') #TODO ] + [ #Cases # CPrimitive("ProperCase", tcase, .title()), #TODO # CPrimitive("AllCapsCase", tcase, .upper()), #TODO # CPrimitive("LowerCase", tcase, .lower()) #TODO ] + [ #positions CPrimitive("position" + str(i), tposition, i, (defaultdict(int), i + 1 if i >= 0 else abs(i))) for i in range(-max_len, max_len + 1) #deal with indicies ] + [ #indices CPrimitive("index" + str(i), tindex, i, i) for i in range(-max_index, max_index + 1) #deal with indicies ] + [ #characters CPrimitive(i, tcharacter, i, (defaultdict(int, {i: 1}), 1)) for i in printable[:-5] if i not in disallowed ] + [ CPrimitive(name, tcharacter, char, (defaultdict(int, {char: 1}), 1)) for char, name in disallowed.items() # NB: disallowed is reversed ] + [ #delimiters CPrimitive("delim_" + name, tdelimiter, char, char) for name, char in delim_dict.items() ] + [ #boundaries CPrimitive("End", tboundary, "End"), CPrimitive("Start", tboundary, "Start") ]
def bootstrapTarget(): """These are the primitives that we hope to learn from the bootstrapping procedure""" return [ # learned primitives Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map), Primitive("unfold", arrow(t0, arrow(t0,tbool), arrow(t0,t1), arrow(t0,t0), tlist(t1)), _unfold), Primitive("range", arrow(tint, tlist(tint)), _range), Primitive("index", arrow(tint, tlist(t0), t0), _index), Primitive("fold", arrow(tlist(t0), t1, arrow(t0, t1, t1), t1), _fold), Primitive("length", arrow(tlist(t0), tint), len), # built-ins Primitive("if", arrow(tbool, t0, t0, t0), _if), Primitive("+", arrow(tint, tint, tint), _addition), Primitive("-", arrow(tint, tint, tint), _subtraction), Primitive("empty", tlist(t0), []), Primitive("cons", arrow(t0, tlist(t0), tlist(t0)), _cons), Primitive("car", arrow(tlist(t0), t0), _car), Primitive("cdr", arrow(tlist(t0), tlist(t0)), _cdr), Primitive("empty?", arrow(tlist(t0), tbool), _isEmpty), ] + [Primitive(str(j), tint, j) for j in range(2)]