def makeNewTasks(include_only=None):

    #load new data:

    taskfile = os.path.join(get_data_dir(),
                            "csv_filtered_all_background_novel.p")

    with open(taskfile, 'rb') as handle:
        data = dill.load(handle)

    tasklist = data['background']  #a list of indices

    if include_only:
        regextasks = [
            Task("Data column no. " + str(i), arrow(tpregex, tpregex),
                 [((), example) for example in task['train']])
            for i, task in enumerate(tasklist) if i in include_only
        ]
    else:
        regextasks = [
            Task("Data column no. " + str(i), arrow(tpregex, tpregex),
                 [((), example) for example in task['train']])
            for i, task in enumerate(tasklist)
        ]

    #for i in train_list:
    #    regextasks[i].mustTrain = True

    return regextasks
def regexHeldOutExamples(task, include_only=None):

    #load new data:
    global REGEXTASKS
    if REGEXTASKS is None:
        taskfile = os.path.join(get_data_dir(),
                                "csv_filtered_all_background_novel.p")

        with open(taskfile, 'rb') as handle:
            data = dill.load(handle)

        tasklist = data['background']  #a list of indices

        if include_only:
            regextasks = [
                Task("Data column no. " + str(i), arrow(tpregex, tpregex),
                     [((), example) for example in _task['test']])
                for i, _task in enumerate(tasklist) if i in include_only
            ]
        else:
            regextasks = [
                Task("Data column no. " + str(i), arrow(tpregex, tpregex),
                     [((), example) for example in _task['test']])
                for i, _task in enumerate(tasklist)
            ]

        #for i in train_list:
        #    regextasks[i].mustTrain = True

        REGEXTASKS = {t.name: t.examples for t in regextasks}
    fullTask = REGEXTASKS[task.name]
    return fullTask
Exemple #3
0
def manualLogoTask(name,
                   expression,
                   proto=False,
                   needToTrain=False,
                   supervise=False,
                   lambdaCalculus=False):
    p = Program.parse(expression) if lambdaCalculus else parseLogo(expression)
    from dreamcoder.domains.logo.logoPrimitives import primitives
    from dreamcoder.grammar import Grammar
    g = Grammar.uniform(primitives, continuationType=turtle)
    gp = Grammar.uniform(primitives)
    try:
        l = g.logLikelihood(arrow(turtle, turtle), p)
        lp = gp.logLikelihood(arrow(turtle, turtle), p)
        assert l >= lp
        eprint(name, -l, "nats")

    except:
        eprint("WARNING: could not calculate likelihood of manual logo", p)

    attempts = 0
    while True:
        [output, highresolution] = drawLogo(p,
                                            p,
                                            resolution=[28, 128],
                                            cost=True)
        if output == "timeout" or highresolution == "timeout":
            attempts += 1
        else:
            break
    if attempts > 0:
        eprint(
            f"WARNING: Took {attempts} attempts to render task {name} within timeout"
        )

    cost = output[1]
    output = output[0]
    assert highresolution[1] == cost
    highresolution = highresolution[0]

    shape = list(map(int, output))
    highresolution = list(map(float, highresolution))
    t = Task(name, arrow(turtle, turtle), [(([0]), shape)])
    t.mustTrain = needToTrain
    t.proto = proto
    t.specialTask = ("LOGO", {"proto": proto})
    t.specialTask[1]["cost"] = cost * 1.05

    t.highresolution = highresolution

    if supervise:
        t.supervisedSolution = p

    return t
def McCarthyPrimitives():
    "These are < primitives provided by 1959 lisp as introduced by McCarthy"
    return [
        Primitive("if", arrow(tbool, t0, t0, t0), _if),
        primitiveRecursion1,
        primitiveRecursion2,
        # Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("positive?", arrow(tint, tbool), _positive),
        Primitive("incr", arrow(tint, tint), _succ),
        Primitive("decr", arrow(tint, tint), _desc)
    ] + [Primitive(str(j), tint, j) for j in range(2)]
Exemple #5
0
def easyWordsPrimitives():
    return [
        Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[10:62] if i not in disallowed_list
    ] + [
        Primitive("r_d", tpregex, pregex.d),
        Primitive("r_s", tpregex, pregex.s),
        #Primitive("r_w", tpregex, pregex.w),
        Primitive("r_l", tpregex, pregex.l),
        Primitive("r_u", tpregex, pregex.u),
        Primitive("r_kleene", arrow(tpregex, tpregex), _kleene),
        Primitive("r_plus", arrow(tpregex, tpregex), _plus),
        Primitive("r_maybe", arrow(tpregex, tpregex), _maybe),
        Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt),
        Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat),
    ]
Exemple #6
0
def retrieveJSONTasks(filename, features=False):
    """
    For JSON of the form:
        {"name": str,
         "type": {"input" : bool|int|list-of-bool|list-of-int,
                  "output": bool|int|list-of-bool|list-of-int},
         "examples": [{"i": data, "o": data}]}
    """
    with open(filename, "r") as f:
        loaded = json.load(f)
    TP = {
        "bool": tbool,
        "int": tint,
        "list-of-bool": tlist(tbool),
        "list-of-int": tlist(tint),
    }
    return [
        Task(
            item["name"],
            arrow(TP[item["type"]["input"]], TP[item["type"]["output"]]),
            [((ex["i"], ), ex["o"]) for ex in item["examples"]],
            features=(None if not features else list_features(
                [((ex["i"], ), ex["o"]) for ex in item["examples"]])),
            cache=False,
        ) for item in loaded
    ]
def makeTask(name, f):
    xs = [x / 100. for x in range(-500, 500)]

    maximum = 10

    N = 50

    inputs = []
    outputs = []
    for x in xs:
        try:
            y = f(x)
        except BaseException:
            continue
        if abs(y) < maximum:
            inputs.append(float(x))
            outputs.append(float(y))

    if len(inputs) >= N:
        ex = list(zip(inputs, outputs))
        ex = ex[::int(len(ex) / N)][:N]
        t = DifferentiableTask(name,
                               arrow(treal, treal),
                               [((x, ), y) for x, y in ex],
                               BIC=1.,
                               restarts=360,
                               steps=50,
                               likelihoodThreshold=-0.05,
                               temperature=0.1,
                               maxParameters=6,
                               loss=squaredErrorLoss)
        t.f = f
        return t

    return None
Exemple #8
0
def dreamFromGrammar(g, directory, N=100):
    if isinstance(g, Grammar):
        programs = [
            p for _ in range(N)
            for p in [g.sample(arrow(turtle, turtle), maximumDepth=20)]
            if p is not None
        ]
    else:
        programs = g
    drawLogo(*programs,
             pretty=False,
             smoothPretty=False,
             resolution=512,
             filenames=[f"{directory}/{n}.png" for n in range(len(programs))],
             timeout=1)
    drawLogo(*programs,
             pretty=True,
             smoothPretty=False,
             resolution=512,
             filenames=[
                 f"{directory}/{n}_pretty.png" for n in range(len(programs))
             ],
             timeout=1)
    drawLogo(*programs,
             pretty=False,
             smoothPretty=True,
             resolution=512,
             filenames=[
                 f"{directory}/{n}_smooth_pretty.png"
                 for n in range(len(programs))
             ],
             timeout=1)
    for n, p in enumerate(programs):
        with open(f"{directory}/{n}.dream", "w") as handle:
            handle.write(str(p))
def makeOldTasks():
    # a series of tasks

    taskfile = os.path.join(get_data_dir(), 'data_filtered.json')
    #task_list = pickle.load(open(taskfile, 'rb'))

    with open(taskfile) as f:
        file_contents = f.read()
    task_list = json.loads(file_contents)

    # if I were to just dump all of them:
    regextasks = [
        Task("Luke data column no." + str(i), arrow(tpregex, tpregex),
             [((), example) for example in task_list[i]])
        for i in range(len(task_list))
    ]
    """ regextasks = [
        Task("length bool", arrow(none,tstr),
             [((l,), len(l))
              for _ in range(10)
              for l in [[flip() for _ in range(randint(0,10)) ]] ]),
        Task("length int", arrow(none,tstr),
             [((l,), len(l))
              for _ in range(10)
              for l in [randomList()] ]),
    ]
  """
    return regextasks  # some list of tasks
def makeNumberTasks():

    #load new data:

    taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p")

    with open(taskfile, 'rb') as handle:
        data = dill.load(handle)

    tasklist = data[0]  #a list of indices

    #match_col(data[0],'\d*\.\d*')
    raw_decimals = [
        121, 122, 163, 164, 165, 170, 172, 173, 175, 178, 218, 228, 230, 231,
        252, 253, 254, 258, 259, 305, 320, 330, 334, 340, 348, 350, 351, 352,
        353, 355, 357, 358, 361, 363, 364, 371, 380, 382, 409, 410, 411, 447,
        448, 449, 450, 458, 469, 471, 533, 562, 564
    ]

    decimals_pos_neg_dollar = [
        3, 4, 5, 6, 7, 13, 16, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39,
        40, 53, 54, 55, 57, 58, 60, 61, 63, 64, 65, 66, 68, 69, 70, 71, 73, 74,
        77, 78, 80, 81, 103, 104, 105, 106, 107, 109, 110, 111, 112, 113, 114,
        115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 126, 128, 129, 131,
        132, 134, 135, 139, 146, 153, 154, 155, 156, 157, 158, 159, 160, 161,
        162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
        176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 193, 194, 195, 204,
        205, 207, 209, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,
        223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 249, 250, 251, 252,
        253, 254, 255, 256, 258, 259, 260, 261, 263, 266, 267, 270, 271, 272,
        277, 299, 301, 302, 305, 306, 307, 309, 312, 313, 315, 319, 320, 324,
        326, 327, 330, 334, 340, 348, 350, 351, 352, 353, 354, 355, 356, 357,
        358, 361, 362, 363, 364, 368, 371, 373, 377, 380, 382, 400, 401, 402,
        403, 405, 406, 409, 410, 411, 413, 435, 439, 446, 447, 448, 449, 450,
        451, 452, 453, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466,
        469, 470, 471, 477, 498, 500, 502, 503, 507, 512, 518, 519, 520, 532,
        533, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 564, 565, 572,
        577
    ]

    #match_col(data[0],'(\d*,?\d*)+')
    commas = []
    #match_col(data[0],'(\d*,?\d*)+')
    commas_and_all = []

    #full_list = test_list + train_list
    train_list = []
    full_list = decimals_pos_neg_dollar

    regextasks = [
        Task("Data column no. " + str(i), arrow(tpregex, tpregex),
             [((), example) for example in task])
        for i, task in enumerate(tasklist) if i in full_list
    ]

    for i in train_list:
        regextasks[i].mustTrain = True

    return regextasks
Exemple #11
0
def basePrimitives():
    return [Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[:-4] if i not in disallowed_list
            ] + [
        Primitive("string_" + name, tpregex, pregex.String(char)) for char, name in disallowed
    ] + [
        Primitive("r_dot", tpregex, pregex.dot),
        Primitive("r_d", tpregex, pregex.d),
        Primitive("r_s", tpregex, pregex.s),
        Primitive("r_w", tpregex, pregex.w),
        Primitive("r_l", tpregex, pregex.l),
        Primitive("r_u", tpregex, pregex.u),
        Primitive("r_kleene", arrow(tpregex, tpregex), _kleene),
        Primitive("r_plus", arrow(tpregex, tpregex), _plus),
        Primitive("r_maybe", arrow(tpregex, tpregex), _maybe),
        Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt),
        Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat),
    ]
Exemple #12
0
def matchEmpericalNoLetterPrimitives(corpus):
    return lambda: [
        Primitive("empty_string", tpregex, pregex.String(""))
    ] + [
        Primitive("string_" + i, tpregex, pregex.String(i)) for i in printable[:-4] if i not in disallowed_list + list(printable[10:62])
    ] + [
        Primitive("string_" + name, tpregex, pregex.String(char)) for char, name in disallowed
    ] + [
        Primitive("r_dot", tpregex, emp_dot_no_letter(corpus) ),
        Primitive("r_d", tpregex, emp_d(corpus) ),
        Primitive("r_s", tpregex, pregex.s),
        Primitive("r_kleene", arrow(tpregex, tpregex), _kleene),
        #Primitive("r_plus", arrow(tpregex, tpregex), _plus),
        #Primitive("r_maybe", arrow(tpregex, tpregex), _maybe),
        Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt),
        Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat),
    ]
def McCarthyPrimitives():
    "These are < primitives provided by 1959 lisp as introduced by McCarthy"
    primitives = [
        Primitive("0", tint, 0),
        Primitive("incr", arrow(tint, tint), _incr),
        Primitive("decr0", arrow(tint, tint), _decr0),
        # Primitive("if", arrow(tbool, t0, t0, t0), _if),
        # Primitive("eq0", arrow(tint, tbool), _eq0),
        Primitive("if0", arrow(t0, t0, t0, t0), _if0),
        # primitiveRecursion1,
        primitiveRecursion2,
        # Primitive("gt?", arrow(tint, tint, tbool), _gt),
        # Primitive("positive?", arrow(tint, tbool), _positive),
        # Primitive("+", arrow(tint, tint, tint), _add),
        # Primitive("-0", arrow(tint, tint, tint), _minus0),
    ]
    return primitives
def makeWordTasks():

    #load new data:

    taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p")

    with open(taskfile, 'rb') as handle:
        data = dill.load(handle)

    tasklist = data[0]  #a list of indices

    all_upper = [0, 2, 8, 9, 10, 11, 12, 17, 18, 19, 20, 22]
    all_lower = [1]

    # match_col(data[0],'\\u(\l+)')
    one_capital_lower_plus = [
        144, 200, 241, 242, 247, 296, 390, 392, 444, 445, 481, 483, 485, 489,
        493, 542, 549, 550, 581
    ]

    #match_col(data[0],'(\l ?)+')
    lower_with_maybe_spaces = [
        1, 42, 47, 99, 100, 102, 201, 246, 248, 293, 294, 345, 437, 545, 590
    ]

    #match_col(data[0],'(\\u\l+ ?)+')
    capital_then_lower_maybe_spaces = [
        144, 200, 241, 242, 247, 296, 390, 392, 395, 438, 444, 445, 481, 483,
        484, 485, 487, 489, 493, 494, 542, 546, 549, 550, 578, 581, 582, 588,
        591, 624, 629
    ]

    #match_col(data[0],'(\\u+ ?)+')
    all_caps_spaces = [
        0, 2, 8, 9, 10, 11, 12, 17, 18, 19, 20, 22, 25, 26, 35, 36, 43, 45, 46,
        49, 50, 52, 56, 59, 87, 89, 95, 101, 140, 147, 148, 149, 199, 332, 336,
        397, 491, 492, 495, 580, 610
    ]

    #one_capital_and_lower = [566, 550, 549, 542, 505, 493, 494, 489, 488, 485, 483, 481, 445, 444, 438, 296, 241, 242, 200, ]
    #all_lower_with_a_space = [545]
    #all_lower_maybe_space = [534]
    #one_capital_lower_maybe_spaces = [259, 262, 263, 264]

    #full_list = test_list + train_list
    train_list = []
    full_list = all_upper + all_lower + one_capital_lower_plus + lower_with_maybe_spaces + capital_then_lower_maybe_spaces + all_caps_spaces

    regextasks = [
        Task("Data column no. " + str(i), arrow(tpregex, tpregex),
             [((), example) for example in task])
        for i, task in enumerate(tasklist) if i in full_list
    ]

    for i in train_list:
        regextasks[i].mustTrain = True

    return regextasks
Exemple #15
0
def demoLogoTasks():
    import scipy.misc
    import numpy as np

    g0 = Grammar.uniform(primitives, continuationType=turtle)
    eprint("dreaming into /tmp/dreams_0...")
    N = 1000
    programs = [
        p for _ in range(N)
        for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)]
        if p is not None
    ]
    os.system("mkdir  -p /tmp/dreams_0")
    for n, p in enumerate(programs):
        with open(f"/tmp/dreams_0/{n}.dream", "w") as handle:
            handle.write(str(p))
    drawLogo(*programs,
             pretty=True,
             smoothPretty=False,
             resolution=512,
             filenames=[
                 f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs))
             ],
             timeout=1)

    if len(sys.argv) > 1:
        tasks = makeTasks(sys.argv[1:], proto=False)
    else:
        tasks = makeTasks(['all'], proto=False)
    montageTasks(tasks, columns=16, testTrain=True)
    for n, t in enumerate(tasks):
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logo%d.png' % n,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace(
            '/', '_').replace("-", "_") + ".png"
        #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}")
        os.system(
            f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}"
        )
    eprint(len(tasks), "tasks")
    eprint(sum(t.mustTrain for t in tasks), "need to be trained on")

    for t in dSLDemo():
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        os.system(
            f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png"
        )

    tasks = [t for t in tasks if t.mustTrain]
    random.shuffle(tasks)
    montageTasks(tasks[:16 * 3], "subset", columns=16)

    montageTasks(rotationalSymmetryDemo(), "rotational")
Exemple #16
0
 def tasksOfPrograms(self, ps, types):
     images = drawLogo(*ps, resolution=128)
     if len(ps) == 1: images = [images]
     tasks = []
     for i in images:
         if isinstance(i, str): tasks.append(None)
         else:
             t = Task("Helm", arrow(turtle, turtle), [])
             t.highresolution = i
             tasks.append(t)
     return tasks
Exemple #17
0
    def make_task(self):
        if self.solved or self.total_examples == 0:
            return None
        task_type = arrow(*([tint] * (self.arity + 1)))

        examples = []
        n_examples = min(self.total_examples, 100)
        # examples = random.choices([e for e, _ in self.examples], weights=[p for _, p in self.examples], k=n_examples)
        for e, p in self.examples:
            examples.extend([e] * int(round(p * n_examples)))
        examples = examples[:n_examples]
        return Task(str(self.idx), task_type, examples)
Exemple #18
0
 def argumentChoices(t):
     if t == turtle:
         return [Index(0)]
     elif t == arrow(turtle,turtle):
         return subprograms
     elif t == tint:
         return specialNumbers.get(str(p),numbers)
     elif t == tangle:
         return specialAngles.get(str(p),angles)
     elif t == tlength:
         return specialDistances.get(str(p),distances)
     else: return []
Exemple #19
0
 def genericType(t):
     if t.name == "real":
         return treal
     elif t.name == "positive":
         return treal
     elif t.name == "vector":
         return tlist(treal)
     elif t.name == "list":
         return tlist(genericType(t.arguments[0]))
     elif t.isArrow():
         return arrow(genericType(t.arguments[0]),
                      genericType(t.arguments[1]))
     else:
         assert False, "could not make type generic: %s" % t
Exemple #20
0
def no_length():
    """this is the primitives without length because one of the reviewers wanted this"""
    return [p for p in bootstrapTarget() if p.name != "length"] + [
        Primitive("*", arrow(tint, tint, tint), _multiplication),
        Primitive("mod", arrow(tint, tint, tint), _mod),
        Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("eq?", arrow(tint, tint, tbool), _eq),
        Primitive("is-prime", arrow(tint, tbool), _isPrime),
        Primitive("is-square", arrow(tint, tbool), _isSquare),
    ]
Exemple #21
0
def bootstrapTarget_extra():
    """This is the bootstrap target plus list domain specific stuff"""
    return bootstrapTarget() + [
        Primitive("*", arrow(tint, tint, tint), _multiplication),
        Primitive("mod", arrow(tint, tint, tint), _mod),
        Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("eq?", arrow(tint, tint, tbool), _eq),
        Primitive("is-prime", arrow(tint, tbool), _isPrime),
        Primitive("is-square", arrow(tint, tbool), _isSquare),
    ]
Exemple #22
0
def McCarthyPrimitives():
    "These are < primitives provided by 1959 lisp as introduced by McCarthy"
    return [
        Primitive("empty", tlist(t0), []),
        Primitive("cons", arrow(t0, tlist(t0), tlist(t0)), _cons),
        Primitive("car", arrow(tlist(t0), t0), _car),
        Primitive("cdr", arrow(tlist(t0), tlist(t0)), _cdr),
        Primitive("empty?", arrow(tlist(t0), tbool), _isEmpty),
        #Primitive("unfold", arrow(t0, arrow(t0,t1), arrow(t0,t0), arrow(t0,tbool), tlist(t1)), _isEmpty),
        #Primitive("1+", arrow(tint,tint),None),
        # Primitive("range", arrow(tint, tlist(tint)), range),
        # Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map),
        # Primitive("index", arrow(tint,tlist(t0),t0),None),
        # Primitive("length", arrow(tlist(t0),tint),None),
        primitiveRecursion1,
        #primitiveRecursion2,
        Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("if", arrow(tbool, t0, t0, t0), _if),
        Primitive("eq?", arrow(tint, tint, tbool), _eq),
        Primitive("+", arrow(tint, tint, tint), _addition),
        Primitive("-", arrow(tint, tint, tint), _subtraction),
    ] + [Primitive(str(j), tint, j) for j in range(2)]
def makeLongTasks():

    #load new data:

    taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p")

    with open(taskfile, 'rb') as handle:
        data = dill.load(handle)

    tasklist = data[0]  #a list of indices

    regextasks = [
        Task("Data column no. " + str(i), arrow(tpregex, tpregex),
             [((), example) for example in task])
        for i, task in enumerate(tasklist)
    ]

    return regextasks
Exemple #24
0
 def make_task(self):
     min_examples = 30 if self.arity is not None and self.arity > 0 else 10
     min_examples = min_examples if not self.fewshot else 0
     max_examples = 100
     examples = self.examples
     if len(examples) < min_examples or self.solved or None in [
             x[1] for x in examples
     ]:
         return None
     task_type = arrow(*([tint] * (self.arity + 1)))
     if len(examples) > max_examples:
         wrong_examples = [e for e, r in zip(examples, self.res) if not r]
         right_examples = [e for e, r in zip(examples, self.res) if r]
         right_examples = random.choices(right_examples,
                                         k=max_examples -
                                         len(wrong_examples))
         examples = wrong_examples + right_examples
         examples = random.sample(examples, k=max_examples)
     return Task(str(self.idx), task_type, examples)
Exemple #25
0
def algolispPrimitives():
    return [
    Primitive("fn_call", arrow(tfunction, tlist(tsymbol), tsymbol), _fn_call),

    Primitive("lambda1_call", arrow(tfunction, tlist(tsymbol), tsymbol), lambda f: lambda sx: ["lambda1", [f] + sx] if type(sx)==list else ["lambda1", [f] + [sx]] ),
    Primitive("lambda2_call", arrow(tfunction, tlist(tsymbol), tsymbol), lambda f: lambda sx: ["lambda2", [f] + sx] if type(sx)==list else ["lambda2", [f] + [sx]] ),
    #symbol converters:
    # SYMBOL = constant | argument | function_call | function | lambda
    Primitive("symbol_constant", arrow(tconstant, tsymbol), lambda x: x),
    Primitive("symbol_function", arrow(tfunction, tsymbol), lambda x: x),
    #list converters
    Primitive('list_init_symbol', arrow(tsymbol, tlist(tsymbol)), lambda symbol: [symbol] ),
    Primitive('list_add_symbol', arrow(tsymbol, tlist(tsymbol), tlist(tsymbol)), lambda symbol: lambda symbols: symbols + [symbol] if type(symbols) == list else [symbols] + [symbol])
    ] + [
    #functions:
    Primitive(ec_name, tfunction, algo_name) for algo_name, ec_name in fn_lookup.items()
    ] + [
    #Constants
    Primitive(ec_name, tconstant, algo_name) for algo_name, ec_name in const_lookup.items()
    ]
def makeHandPickedTasks():

    #load new data:

    taskfile = os.path.join(get_data_dir(), "regex_data_csv_900.p")

    with open(taskfile, 'rb') as handle:
        data = dill.load(handle)

    tasklist = data[0]  #a list of indices


    full_list = list(range(199)) + \
    [209,218,222,223,224,225,226] + \
    list(range(222,233)) + \
    [235,237,238,239,243,244,245,252,253,254,255,257,258,259,260,261,264,265,269,272,274] + \
    list(range(275,291)) + \
    [295,297,300,303,304,305,306,310,311,312,314,315,316,320,321,323,327,329,330,333,334,335,337,338,339,340,341,342,343,344] + \
    list(range(348,359)) + \
    [361,369,373,379,380,382,387,403,405,407,408] + \
    list(range(409,417)) + \
    list(range(418,437)) + \
    list(range(440,444)) + \
    list(range(446,452)) + \
    list(range(456,460)) + \
    list(range(466,472)) + \
    [503,504]

    regextasks = [
        Task("Data column no. " + str(i), arrow(tpregex, tpregex),
             [((), example) for example in task])
        for i, task in enumerate(tasklist) if i in full_list
    ]

    #for i in train_list:
    #    regextasks[i].mustTrain = True

    return regextasks
Exemple #27
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    dataset = args.pop("dataset")
    tasks = {
        "Lucas-old":
        lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
        "bootstrap":
        make_list_bootstrap_tasks,
        "sorting":
        sortBootstrap,
        "Lucas-depth1":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
        "Lucas-depth2":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
        "Lucas-depth3":
        lambda: retrieveJSONTasks("data/list_tasks2.json"),
    }[dataset]()

    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:
        necessaryTasks = []  # maxTasks will not consider these
        if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
            necessaryTasks = tasks[:105]

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]
        tasks = necessaryTasks + tasks

    if dataset.startswith("Lucas"):
        # extra tasks for filter
        tasks.extend([
            Task("remove empty lists",
                 arrow(tlist(tlist(tbool)), tlist(tlist(tbool))),
                 [((ls, ), list(filter(lambda l: len(l) > 0, ls)))
                  for _ in range(15) for ls in [[[
                      random.random() < 0.5
                      for _ in range(random.randint(0, 3))
                  ] for _ in range(4)]]]),
            Task("keep squares", arrow(tlist(tint), tlist(tint)), [
                ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs)))
                for _ in range(15) for xs in [[
                    random.choice([0, 1, 4, 9, 16, 25])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
            Task("keep primes", arrow(tlist(tint), tlist(tint)), [
                ((xs, ),
                 list(
                     filter(
                         lambda x: x in
                         {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs)))
                for _ in range(15) for xs in [[
                    random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
        ])
        for i in range(4):
            tasks.extend([
                Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x == i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x != i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: not x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]])
            ])

    def isIdentityTask(t):
        return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples)

    eprint("Removed", sum(isIdentityTask(t) for t in tasks),
           "tasks that were just the identity function")
    tasks = [t for t in tasks if not isIdentityTask(t)]

    prims = {
        "base": basePrimitives,
        "McCarthy": McCarthyPrimitives,
        "common": bootstrapTarget_extra,
        "noLength": no_length,
        "rich": primitives
    }[args.pop("primitives")]()
    haveLength = not args.pop("noLength")
    haveMap = not args.pop("noMap")
    haveUnfold = not args.pop("noUnfold")
    eprint(f"Including map as a primitive? {haveMap}")
    eprint(f"Including length as a primitive? {haveLength}")
    eprint(f"Including unfold as a primitive? {haveUnfold}")
    baseGrammar = Grammar.uniform([p
                                   for p in prims
                                   if (p.name != "map" or haveMap) and \
                                   (p.name != "unfold" or haveUnfold) and \
                                   (p.name != "length" or haveLength)])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            necessary = train_necessary(t)
            if not necessary:
                continue
            if necessary == "some":
                train_some[t.name.split()[0]].append(t)
            else:
                t.mustTrain = True
        for k in sorted(train_some):
            ts = train_some[k]
            random.shuffle(ts)
            ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)
        if True:
            test = [t for t in test if t.name not in EASYLISTTASKS]

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Exemple #28
0
def isIntFunction(tp):
    try:
        Context().unify(tp, arrow(tint, t0))
        return True
    except UnificationFailure:
        return False
Exemple #29
0
def robustFillPrimitives(max_len=100, max_index=5):
    return [
        #CPrimitive("concat2", arrow(texpression, texpression, tprogram), _concat2),
        CPrimitive("concat1", arrow(texpression, tprogram), _concat1),
        CPrimitive("concat_list", arrow(texpression, tprogram, tprogram),
                   _concat_list),
        #expressions
        CPrimitive("Constant", arrow(tcharacter, texpression),
                   lambda x: lambda y: x),  # add a constraint
        CPrimitive("apply", arrow(tnesting, tsubstr, texpression),
                   lambda n: lambda sub: lambda string: n(sub(string))),
        CPrimitive("apply_n", arrow(tnesting, tnesting, texpression),
                   lambda n1: lambda n2: lambda string: n1(n2(string))),
        CPrimitive("expr_n", arrow(tnesting, texpression), lambda x: x),
        CPrimitive("expr_f", arrow(tsubstr, texpression), lambda x: x)
    ] + [
        #substrings
        CPrimitive("SubStr", arrow(tposition, tposition, tsubstr),
                   _substr),  # handled
        CPrimitive("GetSpan",
                   arrow(tregex, tindex, tboundary, tregex, tindex, tboundary,
                         tsubstr), _getspan, _getspan_const)  #TODO constraint
    ] + [
        #nestings
        CPrimitive("GetToken" + name + str(i), tnesting, _gettoken(tp, i),
                   _gettoken_const(tp, i)) for name, tp in types.items()
        for i in range(-max_index, max_index)
    ] + [
        CPrimitive("ToCase_ProperCase", tnesting, lambda x: x.title(),
                   (defaultdict(int, {r'[A-Z][a-z]+': 1}), 1)),
        CPrimitive("ToCase_AllCapsCase", tnesting, lambda x: x.upper(),
                   (defaultdict(int, {r'[A-Z]': 1}), 1)),
        CPrimitive("ToCase_LowerCase", tnesting, lambda x: x.lower(),
                   (defaultdict(int, {r'[a-z]': 1}), 1))
    ] + [
        CPrimitive("Replace_" + name1 + name2, tnesting, _replace(
            char1, char2), (defaultdict(int, {char1: 1}), 1))
        for name1, char1 in delim_dict.items()
        for name2, char2 in delim_dict.items() if char1 is not char2
    ] + [
        #CPrimitive("Trim", tnesting, _trim), #TODO
    ] + [
        CPrimitive("GetUpTo" + name, tnesting, _getupto(reg),
                   (defaultdict(int, {reg: 1}), 1))
        for name, reg in regexes.items()
    ] + [
        CPrimitive("GetFrom" + name, tnesting, _getfrom(reg),
                   (defaultdict(int, {reg: 1}), 1))
        for name, reg in regexes.items()
    ] + [
        CPrimitive("GetFirst_" + name + str(i), tnesting, _getfirst(tp, i),
                   (defaultdict(int, {tp: i}), i + 1 if i >= 0 else abs(i)))
        for name, tp in types.items()
        for i in list(range(-max_index, 0)) + list(range(1, max_index + 1))
    ] + [
        CPrimitive("GetAll_" + name, tnesting, _getall(reg),
                   (defaultdict(int, {reg: 1}), 1))
        for name, reg in types.items()
    ] + [
        #regexes
        CPrimitive("type_to_regex", arrow(ttype, tregex),
                   lambda x: x),  #TODO also make disappear
        CPrimitive("delimiter_to_regex", arrow(tdelimiter, tregex),
                   lambda x: re.escape(x))  #TODO also make disappear
    ] + [
        #types
        CPrimitive("Number", ttype, r'\d+', r'\d+'),  #TODO
        CPrimitive("Word", ttype, r'\w+', r'\w+'),  #TODO
        CPrimitive("Alphanum", ttype, r'\w', r'\w'),  #TODO
        CPrimitive("PropCase", ttype, r'[A-Z][a-z]+', r'[A-Z][a-z]+'),  #TODO
        CPrimitive("AllCaps", ttype, r'[A-Z]', r'[A-Z]'),  #TODO
        CPrimitive("Lower", ttype, r'[a-z]', r'[a-z]'),  #TODO
        CPrimitive("Digit", ttype, r'\d', r'\d'),  #TODO
        CPrimitive("Char", ttype, r'.', r'.')  #TODO
    ] + [
        #Cases
        # CPrimitive("ProperCase", tcase, .title()), #TODO
        # CPrimitive("AllCapsCase", tcase, .upper()), #TODO
        # CPrimitive("LowerCase", tcase, .lower()) #TODO
    ] + [
        #positions
        CPrimitive("position" + str(i), tposition, i,
                   (defaultdict(int), i + 1 if i >= 0 else abs(i)))
        for i in range(-max_len, max_len + 1)  #deal with indicies 
    ] + [
        #indices
        CPrimitive("index" + str(i), tindex, i, i)
        for i in range(-max_index, max_index + 1)  #deal with indicies
    ] + [
        #characters
        CPrimitive(i, tcharacter, i, (defaultdict(int, {i: 1}), 1))
        for i in printable[:-5] if i not in disallowed
    ] + [
        CPrimitive(name, tcharacter, char, (defaultdict(int, {char: 1}), 1))
        for char, name in disallowed.items()  # NB: disallowed is reversed
    ] + [
        #delimiters
        CPrimitive("delim_" + name, tdelimiter, char, char)
        for name, char in delim_dict.items()
    ] + [
        #boundaries
        CPrimitive("End", tboundary, "End"),
        CPrimitive("Start", tboundary, "Start")
    ]
Exemple #30
0
def bootstrapTarget():
    """These are the primitives that we hope to learn from the bootstrapping procedure"""
    return [
        # learned primitives
        Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map),
        Primitive("unfold", arrow(t0, arrow(t0,tbool), arrow(t0,t1), arrow(t0,t0), tlist(t1)), _unfold),
        Primitive("range", arrow(tint, tlist(tint)), _range),
        Primitive("index", arrow(tint, tlist(t0), t0), _index),
        Primitive("fold", arrow(tlist(t0), t1, arrow(t0, t1, t1), t1), _fold),
        Primitive("length", arrow(tlist(t0), tint), len),

        # built-ins
        Primitive("if", arrow(tbool, t0, t0, t0), _if),
        Primitive("+", arrow(tint, tint, tint), _addition),
        Primitive("-", arrow(tint, tint, tint), _subtraction),
        Primitive("empty", tlist(t0), []),
        Primitive("cons", arrow(t0, tlist(t0), tlist(t0)), _cons),
        Primitive("car", arrow(tlist(t0), t0), _car),
        Primitive("cdr", arrow(tlist(t0), tlist(t0)), _cdr),
        Primitive("empty?", arrow(tlist(t0), tbool), _isEmpty),
    ] + [Primitive(str(j), tint, j) for j in range(2)]