Beispiel #1
0
def manualLogoTask(name,
                   expression,
                   proto=False,
                   needToTrain=False,
                   supervise=False,
                   lambdaCalculus=False):
    p = Program.parse(expression) if lambdaCalculus else parseLogo(expression)
    from dreamcoder.domains.logo.logoPrimitives import primitives
    from dreamcoder.grammar import Grammar
    g = Grammar.uniform(primitives, continuationType=turtle)
    gp = Grammar.uniform(primitives)
    try:
        l = g.logLikelihood(arrow(turtle, turtle), p)
        lp = gp.logLikelihood(arrow(turtle, turtle), p)
        assert l >= lp
        eprint(name, -l, "nats")

    except:
        eprint("WARNING: could not calculate likelihood of manual logo", p)

    attempts = 0
    while True:
        [output, highresolution] = drawLogo(p,
                                            p,
                                            resolution=[28, 128],
                                            cost=True)
        if output == "timeout" or highresolution == "timeout":
            attempts += 1
        else:
            break
    if attempts > 0:
        eprint(
            f"WARNING: Took {attempts} attempts to render task {name} within timeout"
        )

    cost = output[1]
    output = output[0]
    assert highresolution[1] == cost
    highresolution = highresolution[0]

    shape = list(map(int, output))
    highresolution = list(map(float, highresolution))
    t = Task(name, arrow(turtle, turtle), [(([0]), shape)])
    t.mustTrain = needToTrain
    t.proto = proto
    t.specialTask = ("LOGO", {"proto": proto})
    t.specialTask[1]["cost"] = cost * 1.05

    t.highresolution = highresolution

    if supervise:
        t.supervisedSolution = p

    return t
Beispiel #2
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    baseGrammar = Grammar.uniform(McCarthyPrimitives())

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Beispiel #3
0
def demoLogoTasks():
    import scipy.misc
    import numpy as np

    g0 = Grammar.uniform(primitives, continuationType=turtle)
    eprint("dreaming into /tmp/dreams_0...")
    N = 1000
    programs = [
        p for _ in range(N)
        for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)]
        if p is not None
    ]
    os.system("mkdir  -p /tmp/dreams_0")
    for n, p in enumerate(programs):
        with open(f"/tmp/dreams_0/{n}.dream", "w") as handle:
            handle.write(str(p))
    drawLogo(*programs,
             pretty=True,
             smoothPretty=False,
             resolution=512,
             filenames=[
                 f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs))
             ],
             timeout=1)

    if len(sys.argv) > 1:
        tasks = makeTasks(sys.argv[1:], proto=False)
    else:
        tasks = makeTasks(['all'], proto=False)
    montageTasks(tasks, columns=16, testTrain=True)
    for n, t in enumerate(tasks):
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logo%d.png' % n,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace(
            '/', '_').replace("-", "_") + ".png"
        #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}")
        os.system(
            f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}"
        )
    eprint(len(tasks), "tasks")
    eprint(sum(t.mustTrain for t in tasks), "need to be trained on")

    for t in dSLDemo():
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        os.system(
            f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png"
        )

    tasks = [t for t in tasks if t.mustTrain]
    random.shuffle(tasks)
    montageTasks(tasks[:16 * 3], "subset", columns=16)

    montageTasks(rotationalSymmetryDemo(), "rotational")
Beispiel #4
0
    def __init__(self):
        args = commandlineArguments(enumerationTimeout=200,
                                    activation='tanh',
                                    iterations=1,
                                    recognitionTimeout=3600,
                                    a=3,
                                    maximumFrontier=5,
                                    topK=2,
                                    pseudoCounts=30.0,
                                    helmholtzRatio=0.5,
                                    structurePenalty=1.,
                                    CPUs=min(numberOfCPUs(), 8),
                                    extras=list_options)

        args['noConsolidation'] = True
        args.pop("random_seed")
        args['contextual'] = True
        args['biasOptimal'] = True
        args['auxiliaryLoss'] = True
        args['activation'] = "relu"
        args['useDSL'] = False

        extractor = {
            "learned": LearnedFeatureExtractor,
        }[args.pop("extractor")]
        extractor.H = args.pop("hidden")

        timestamp = datetime.datetime.now().isoformat()
        outputDirectory = "tmp/%s" % timestamp
        os.system("mkdir -p %s" % outputDirectory)

        args.update({
            "featureExtractor": extractor,
            "outputPrefix": "%s/hint" % outputDirectory,
            "evaluationTimeout": 0.0005,
        })
        args.pop("maxTasks")
        args.pop("split")

        self.primitives = McCarthyPrimitives()
        baseGrammar = Grammar.uniform(self.primitives)
        self.grammar = baseGrammar
        self.train_args = args
        self.semantics = [Semantics(i) for i in range(len(SYMBOLS))]
        self.allFrontiers = None
        self.helmholtzFrontiers = None
Beispiel #5
0
 def update_grammar(self):
     programs = [
         Invented(smt.program.prog) for smt in self.semantics
         if smt.learnable and smt.solved and smt.program is not None
         and smt.program.arity > 0 and '#' not in str(smt.program)
     ]
     # if '#' in the program, the program uses a invented primitive, it is very likely to have a high computation cost.
     # Therefore we don't add this program into primitives, since it might slow the enumeration a lot.
     # it might be resolved by increasing the enumeration time
     new_grammar = Grammar.uniform(self.primitives + programs)
     # self.train_args['enumerationTimeout'] += 100 * len(programs)
     if new_grammar != self.grammar:
         self.grammar = new_grammar
         self.helmholtzFrontiers = None
         self.allFrontiers = None
         print(
             "Update grammar with invented programs and set frontiers to none."
         )
Beispiel #6
0
def memorizeInduce(g, frontiers, **kwargs):
    existingInventions = {p.uncurry()
                          for p in g.primitives }
    programs = {f.bestPosterior.program for f in frontiers if not f.empty}
    newInventions = programs - existingInventions
    newGrammar = Grammar.uniform([p for p in g.primitives] + \
                                 [Invented(ni) for ni in newInventions])
    
    # rewrite in terms of new primitives
    def substitute(p):
        nonlocal newInventions
        if p in newInventions: return Invented(p).uncurry()
        return p
    newFrontiers = [Frontier([FrontierEntry(program=np,
                                            logPrior=newGrammar.logLikelihood(f.task.request, np),
                                            logLikelihood=e.logLikelihood)
                           for e in f
                           for np in [substitute(e.program)] ],
                             task=f.task)
                 for f in frontiers ]
    return newGrammar, newFrontiers
Beispiel #7
0
        ('mapping', [
            '(map _ $x)',
            '(mapi _ $x)',
            '(flatten $x)',
            '(map _ (zip (droplast 1 $x) (drop 1 $x)))',
            '(map _ (drop 1 $x))'
        ], lambda e: e == '$l')
    ])



## features computed in 'predict()' in bin/list_routines_misc.py

Primitive.GLOBALS.clear()
Grammar.uniform(list_routines_misc.primitives())
pre_features = {
    'program_length': lambda _, __, ___, p: Program.parse(p).size(),
    'depth': lambda _, __, ___, p: Program.parse(p).depth(),
    'apps': lambda _, __, ___, p: list_routines_misc.count_applications(Program.parse(p))
}


## miscellaneous features

misc_features = {}

concept_examples = {'model': {}, 'dataset': {}}
for purpose, num in [('model', 100), ('dataset', 150)]:
    concepts = os.listdir('analysis/concept_examples/{}'.format(purpose))
    for concept in map(lambda n: 'c{:03}'.format(n), range(1, num+1)):
Beispiel #8
0
        # Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map),
        # Primitive("index", arrow(tint,tlist(t0),t0),None),
        # Primitive("length", arrow(tlist(t0),tint),None),
        primitiveRecursion1,
        #primitiveRecursion2,
        Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("if", arrow(tbool, t0, t0, t0), _if),
        Primitive("eq?", arrow(tint, tint, tbool), _eq),
        Primitive("+", arrow(tint, tint, tint), _addition),
        Primitive("-", arrow(tint, tint, tint), _subtraction),
    ] + [Primitive(str(j), tint, j) for j in range(2)]


if __name__ == "__main__":
    bootstrapTarget()
    g = Grammar.uniform(McCarthyPrimitives())
    # with open("/home/ellisk/om/ec/experimentOutputs/list_aic=1.0_arity=3_ET=1800_expandFrontier=2.0_it=4_likelihoodModel=all-or-nothing_MF=5_baseline=False_pc=10.0_L=1.0_K=5_rec=False.pickle", "rb") as handle:
    #     b = pickle.load(handle).grammars[-1]
    # print b

    p = Program.parse(
        "(lambda (lambda (lambda (if (empty? $0) empty (cons (+ (car $1) (car $0)) ($2 (cdr $1) (cdr $0)))))))")
    t = arrow(tlist(tint), tlist(tint), tlist(tint))  # ,tlist(tbool))
    print(g.logLikelihood(arrow(t, t), p))
    assert False
    print(b.logLikelihood(arrow(t, t), p))

    # p = Program.parse("""(lambda (lambda
    # (unfold 0
    # (lambda (+ (index $0 $2) (index $0 $1)))
    # (lambda (1+ $0))
Beispiel #9
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    primitives = McCarthyPrimitives()
    from dreamcoder.program import Program, Invented
    # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))")
    # plus = Invented(plus)
    # primitives.append(plus)
    # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))")
    # minus = Invented(minus)
    # primitives.append(minus)
    # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))")
    # times = Invented(times)
    # primitives.append(times)
    baseGrammar = Grammar.uniform(primitives)
    baseGrammar = Grammar(
        0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p)
              for p in primitives])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    result = explorationCompression(baseGrammar,
                                    train,
                                    testingTasks=test,
                                    **args)
    print([x.bestPosterior for x in result.taskSolutions.values()])
Beispiel #10
0
        for char, name in disallowed
    ] + [
        Primitive("r_dot", tpregex, emp_dot_no_letter(corpus)),
        Primitive("r_d", tpregex, emp_d(corpus)),
        Primitive("r_s", tpregex, pregex.s),
        Primitive("r_kleene", arrow(tpregex, tpregex), _kleene),
        #Primitive("r_plus", arrow(tpregex, tpregex), _plus),
        #Primitive("r_maybe", arrow(tpregex, tpregex), _maybe),
        Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt),
        Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat),
    ]


if __name__ == '__main__':
    concatPrimitives()
    from dreamcoder.program import Program

    p = Program.parse(
        "(lambda (r_kleene (lambda (r_maybe (lambda (string_x $0)) $0)) $0))")
    print(p)
    print(p.runWithArguments([pregex.String("")]))

    prims = concatPrimitives()
    g = Grammar.uniform(prims)

    for i in range(100):
        prog = g.sample(arrow(tpregex, tpregex))
        preg = prog.runWithArguments([pregex.String("")])
        print("preg:", preg.__repr__())
        print("sample:", preg.sample())
Beispiel #11
0
    bootstrapTarget()
    equationPrimitives = [
        #        real,
        f0,
        f1,
        fpi,
        real_power,
        real_subtraction,
        real_addition,
        real_division,
        real_multiplication
    ] + [
        Program.parse(n)
        for n in ["map", "fold", "empty", "cons", "car", "cdr", "zip"]
    ]
    baseGrammar = Grammar.uniform(equationPrimitives)

    eprint("Got %d equation discovery tasks..." % len(tasks))

    explorationCompression(baseGrammar,
                           tasks,
                           outputPrefix="experimentOutputs/scientificLaws",
                           evaluationTimeout=0.1,
                           testingTasks=[],
                           **commandlineArguments(
                               compressor="ocaml",
                               featureExtractor=DummyFeatureExtractor,
                               iterations=10,
                               CPUs=numberOfCPUs(),
                               structurePenalty=0.5,
                               helmholtzRatio=0.5,
Beispiel #12
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on LOGO tasks.
    """

    # The below legacy global statement is required since prefix_dreams is used by LogoFeatureCNN.
    # TODO(lcary): use argument passing instead of global variables.
    global prefix_dreams

    # The below global statement is required since primitives is modified within main().
    # TODO(lcary): use a function call to retrieve and declare primitives instead.
    global primitives

    visualizeCheckpoint = args.pop("visualize")
    if visualizeCheckpoint is not None:
        with open(visualizeCheckpoint, 'rb') as handle:
            primitives = pickle.load(handle).grammars[-1].primitives
        visualizePrimitives(primitives)
        sys.exit(0)

    dreamCheckpoint = args.pop("dreamCheckpoint")
    dreamDirectory = args.pop("dreamDirectory")

    proto = args.pop("proto")

    if dreamCheckpoint is not None:
        #outputDreams(dreamCheckpoint, dreamDirectory)
        enumerateDreams(dreamCheckpoint, dreamDirectory)
        sys.exit(0)

    animateCheckpoint = args.pop("animate")
    if animateCheckpoint is not None:
        animateSolutions(loadPickle(animateCheckpoint).allFrontiers)
        sys.exit(0)

    target = args.pop("target")
    red = args.pop("reduce")
    save = args.pop("save")
    prefix = args.pop("prefix")
    prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/"
    prefix_pickles = prefix + "/logo." + ('.'.join(target))
    if not os.path.exists(prefix_dreams):
        os.makedirs(prefix_dreams)
    tasks = makeTasks(target, proto)
    eprint("Generated", len(tasks), "tasks")

    costMatters = args.pop("cost")
    for t in tasks:
        t.specialTask[1]["costMatters"] = costMatters
        # disgusting hack - include whether cost matters in the dummy input
        if costMatters: t.examples = [(([1]), t.examples[0][1])]

    os.chdir("prototypical-networks")
    subprocess.Popen(["python", "./protonet_server.py"])
    time.sleep(3)
    os.chdir("..")

    test, train = testTrainSplit(tasks, args.pop("split"))
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))
    try:
        if test: montageTasks(test, "test_")
        montageTasks(train, "train_")
    except:
        eprint(
            "WARNING: couldn't generate montage. Do you have an old version of scipy?"
        )

    if red is not []:
        for reducing in red:
            try:
                with open(reducing, 'r') as f:
                    prods = json.load(f)
                    for e in prods:
                        e = Program.parse(e)
                        if e.isInvented:
                            primitives.append(e)
            except EOFError:
                eprint("Couldn't grab frontier from " + reducing)
            except IOError:
                eprint("Couldn't grab frontier from " + reducing)
            except json.decoder.JSONDecodeError:
                eprint("Couldn't grab frontier from " + reducing)

    primitives = list(OrderedDict((x, True) for x in primitives).keys())
    baseGrammar = Grammar.uniform(primitives, continuationType=turtle)

    eprint(baseGrammar)

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/logo/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    generator = ecIterator(baseGrammar,
                           train,
                           testingTasks=test,
                           outputPrefix="%s/logo" % outputDirectory,
                           evaluationTimeout=0.01,
                           **args)

    r = None
    for result in generator:
        iteration = len(result.learningCurve)
        dreamDirectory = "%s/dreams_%d" % (outputDirectory, iteration)
        os.system("mkdir  -p %s" % dreamDirectory)
        eprint("Dreaming into directory", dreamDirectory)
        dreamFromGrammar(result.grammars[-1], dreamDirectory)
        r = result

    needsExport = [
        str(z) for _, _, z in r.grammars[-1].productions if z.isInvented
    ]
    if save is not None:
        with open(save, 'w') as f:
            json.dump(needsExport, f)
Beispiel #13
0
                response = json.loads(result.decode("utf-8"))
                for b, entry in enumerate(response):
                    frontiers.append(
                        Frontier([
                            FrontierEntry(program=Program.parse(p),
                                          logPrior=entry["ll"],
                                          logLikelihood=0.)
                            for p in entry["programs"]
                        ],
                                 task=Task(str(b), request, [])))
        eprint("Total number of Helmholtz frontiers:", len(frontiers))
        return frontiers

    return get


if __name__ == "__main__":
    g = Grammar.uniform([k1, k0, addition, subtraction, multiplication])
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
    f = DummyFeatureExtractor([])
    r = RecognitionModel(f, g, hidden=[], contextual=True)
    r.trainBiasOptimal(frontiers, frontiers, steps=70)
    g = r.grammarOfTask(frontiers[0].task).untorch()
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    for f in frontiers:
        eprint(f.summarizeFull())
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
Beispiel #14
0
# from dreamcoder.domains.draw.makeDrawTasks import drawDrawings
from dreamcoder.domains.draw.drawPrimitives import primitives, taxes, tartist, tangle, tscale, tdist

# from dreamcoder.dreamcoder import ecIterator
from dreamcoder.grammar import Grammar
# from dreamcoder.program import Program
# from dreamcoder.recognition import variable, maybe_cuda
from dreamcoder.task import Task
from dreamcoder.type import arrow
# from dreamcoder.utilities import eprint, testTrainSplit, loadPickle

g0 = Grammar.uniform(primitives)

def dreamFromGrammar(g=g0, directory = "", N=50):
	# request = taxes # arrow9turtle turtle) just for logl.
	request = arrow(taxes, taxes) # arrow9turtle turtle) just for logl.
	programs = [ p for _ in range(N) for p in [g.sample(request, maximumDepth=20)] if p is not None]
	return programs
	# drawDrawings(*programs, filenames)

Beispiel #15
0
                                     helmholtzRatio=0.5,
                                     activation="tanh",
                                     maximumFrontier=5,
                                     a=3,
                                     topK=2,
                                     pseudoCounts=30.0,
                                     extras=rational_options)

    primitives = [
        real,
        # f1,
        real_division,
        real_addition,
        real_multiplication
    ]
    baseGrammar = Grammar.uniform(primitives)
    random.seed(42)
    tasks = makeTasks()

    smooth = arguments.pop('smooth')

    for t in tasks:
        t.features = drawFunction(200, 10., t.f)
        delattr(t, 'f')
        if smooth:
            t.likelihoodThreshold = None

    eprint("Got %d tasks..." % len(tasks))

    test, train = testTrainSplit(tasks, 100)
    random.shuffle(test)
Beispiel #16
0
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--domain", '-d', default="text")
    parser.add_argument("--taskLikelihood", default=False, action='store_true')
    parser.add_argument("--sampleLikelihood",
                        default=False,
                        action='store_true')
    parser.add_argument("--test", type=str, default=False)
    parser.add_argument("--timeout", type=float, default=600)
    arguments = parser.parse_args()

    if arguments.domain == "text":
        tasks = makeTasks()
        g = Grammar.uniform(text_primitives.primitives +
                            [p for p in bootstrapTarget()])
        input_vocabularies = [
            list(printable[:-4]) + ['EOE'],
            list(printable[:-4])
        ]
        test = loadPBETasks("PBE_Strings_Track")[0]
        fe = Text.LearnedFeatureExtractor(tasks=tasks, testingTasks=test)

        BATCHSIZE = 16

    elif arguments.domain == "regex":
        g = Grammar.uniform(reducedConcatPrimitives(),
                            continuationType=tpregex)
        tasks = makeNewTasks()
        fe = Regex.LearnedFeatureExtractor(tasks)
Beispiel #17
0
def make_grammar(g):
    Primitive.GLOBALS.clear()
    return Grammar.uniform(g())
Beispiel #18
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    dataset = args.pop("dataset")
    tasks = {
        "Lucas-old":
        lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
        "bootstrap":
        make_list_bootstrap_tasks,
        "sorting":
        sortBootstrap,
        "Lucas-depth1":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
        "Lucas-depth2":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
        "Lucas-depth3":
        lambda: retrieveJSONTasks("data/list_tasks2.json"),
    }[dataset]()

    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:
        necessaryTasks = []  # maxTasks will not consider these
        if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
            necessaryTasks = tasks[:105]

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]
        tasks = necessaryTasks + tasks

    if dataset.startswith("Lucas"):
        # extra tasks for filter
        tasks.extend([
            Task("remove empty lists",
                 arrow(tlist(tlist(tbool)), tlist(tlist(tbool))),
                 [((ls, ), list(filter(lambda l: len(l) > 0, ls)))
                  for _ in range(15) for ls in [[[
                      random.random() < 0.5
                      for _ in range(random.randint(0, 3))
                  ] for _ in range(4)]]]),
            Task("keep squares", arrow(tlist(tint), tlist(tint)), [
                ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs)))
                for _ in range(15) for xs in [[
                    random.choice([0, 1, 4, 9, 16, 25])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
            Task("keep primes", arrow(tlist(tint), tlist(tint)), [
                ((xs, ),
                 list(
                     filter(
                         lambda x: x in
                         {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs)))
                for _ in range(15) for xs in [[
                    random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
        ])
        for i in range(4):
            tasks.extend([
                Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x == i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x != i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: not x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]])
            ])

    def isIdentityTask(t):
        return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples)

    eprint("Removed", sum(isIdentityTask(t) for t in tasks),
           "tasks that were just the identity function")
    tasks = [t for t in tasks if not isIdentityTask(t)]

    prims = {
        "base": basePrimitives,
        "McCarthy": McCarthyPrimitives,
        "common": bootstrapTarget_extra,
        "noLength": no_length,
        "rich": primitives
    }[args.pop("primitives")]()
    haveLength = not args.pop("noLength")
    haveMap = not args.pop("noMap")
    haveUnfold = not args.pop("noUnfold")
    eprint(f"Including map as a primitive? {haveMap}")
    eprint(f"Including length as a primitive? {haveLength}")
    eprint(f"Including unfold as a primitive? {haveUnfold}")
    baseGrammar = Grammar.uniform([p
                                   for p in prims
                                   if (p.name != "map" or haveMap) and \
                                   (p.name != "unfold" or haveUnfold) and \
                                   (p.name != "length" or haveLength)])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            necessary = train_necessary(t)
            if not necessary:
                continue
            if necessary == "some":
                train_some[t.name.split()[0]].append(t)
            else:
                t.mustTrain = True
        for k in sorted(train_some):
            ts = train_some[k]
            random.shuffle(ts)
            ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)
        if True:
            test = [t for t in test if t.name not in EASYLISTTASKS]

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Beispiel #19
0
 def update_grammar(self):
     programs = [
         Invented(smt.program.prog_ori) for smt in self.semantics
         if smt.solved and smt.program.arity > 0
     ]
     self.grammar = Grammar.uniform(McCarthyPrimitives() + programs)