Ejemplo n.º 1
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

        if taskBatchSize is None:
            return unsolvedTasks
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        if ec_result.recognitionModel is None:
            eprint(
                "No recognition model, falling back on random %d tasks from the remaining %d"
                % (taskBatchSize, len(unsolvedTasks)))
            return random.sample(unsolvedTasks, taskBatchSize)
        else:
            lowEntropyUnsolved = entropyRandomBatch(ec_result,
                                                    unsolvedTasks,
                                                    taskBatchSize,
                                                    randomRatio=0)
            randomTask = random.choice(lowEntropyUnsolved)
            kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1,
                                    randomTask)
            return [randomTask] + kNN
Ejemplo n.º 2
0
def enumerateDreams(checkpoint, directory):
    from dreamcoder.dreaming import backgroundHelmholtzEnumeration
    from dreamcoder.utilities import loadPickle
    result = loadPickle(checkpoint)
    eprint(" [+] Loaded checkpoint",checkpoint)
    g = result.grammars[-1]
    if directory is None: assert False, "please specify a directory"
    eprint(" Dreaming into",directory)
    os.system("mkdir  -p %s"%directory)
    frontiers = backgroundHelmholtzEnumeration(makeTasks(None,None), g, 100,
                                               evaluationTimeout=0.01,
                                               special=LogoFeatureCNN.special)()
    print(f"{len(frontiers)} total frontiers.")
    MDL = 0
    def L(f):
        return -list(f.entries)[0].logPrior
    frontiers.sort(key=lambda f: -L(f))
    while len(frontiers) > 0:
        # get frontiers whose MDL is between [MDL,MDL + 1)
        fs = []
        while len(frontiers) > 0 and L(frontiers[-1]) < MDL + 1:
            fs.append(frontiers.pop(len(frontiers) - 1))
        if fs:
            random.shuffle(fs)
            print(f"{len(fs)} programs with MDL between [{MDL}, {MDL + 1})")

            fs = fs[:500]
            os.system(f"mkdir {directory}/{MDL}")
            dreamFromGrammar([list(f.entries)[0].program for f in fs],
                             f"{directory}/{MDL}")
        MDL += 1
Ejemplo n.º 3
0
def induceGrammar(*args, **kwargs):
    if sum(not f.empty for f in args[1]) == 0:
        eprint("No nonempty frontiers, exiting grammar induction early.")
        return args[0], args[1]
    with timing("Induced a grammar"):
        backend = kwargs.pop("backend", "pypy")
        if backend == "pypy":
            g, newFrontiers = callCompiled(pypyInduce, *args, **kwargs)
        elif backend == "rust":
            g, newFrontiers = rustInduce(*args, **kwargs)
        elif backend == "vs":
            g, newFrontiers = rustInduce(*args, vs=True, **kwargs)
        elif backend == "pypy_vs":
            kwargs.pop('iteration')
            kwargs.pop('topk_use_only_likelihood')
            fn = '/tmp/vs.pickle'
            with open(fn, 'wb') as handle:
                pickle.dump((args, kwargs), handle)
            eprint(
                "For debugging purposes, the version space compression invocation has been saved to",
                fn)
            g, newFrontiers = callCompiled(induceGrammar_Beta, *args, **kwargs)
        elif backend == "ocaml":
            kwargs.pop('iteration')
            kwargs.pop('topk_use_only_likelihood')
            kwargs['topI'] = 300
            kwargs['bs'] = 1000000
            g, newFrontiers = ocamlInduce(*args, **kwargs)
        elif backend == "memorize":
            g, newFrontiers = memorizeInduce(*args, **kwargs)
        else:
            assert False, "unknown compressor"
    return g, newFrontiers
Ejemplo n.º 4
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        return random.sample(tasks, taskBatchSize)
Ejemplo n.º 5
0
def outputDreams(checkpoint, directory):
    from dreamcoder.utilities import loadPickle
    result = loadPickle(checkpoint)
    eprint(" [+] Loaded checkpoint", checkpoint)
    g = result.grammars[-1]
    if directory is None:
        randomStr = ''.join(random.choice('0123456789') for _ in range(10))
        directory = "/tmp/" + randomStr
    eprint(" Dreaming into", directory)
    os.system("mkdir  -p %s" % directory)
    dreamFromGrammar(g, directory)
Ejemplo n.º 6
0
def induceGrammar(*args, **kwargs):
    if sum(not f.empty for f in args[1]) == 0:
        eprint("No nonempty frontiers, exiting grammar induction early.")
        return args[0], args[1]
    backend = kwargs.pop("backend", "pypy")
    if 'pypy' in backend:
        # pypy might not like some of the imports needed for the primitives
        # but the primitive values are irrelevant for compression
        # therefore strip them out and then replace them once we are done
        # ditto for task data
        g0,frontiers = args[0].strip_primitive_values(), \
                       [front.strip_primitive_values() for front in args[1]]
        original_tasks = {f.task.name: f.task for f in frontiers}
        frontiers = [Frontier(f.entries, Task(f.task.name,f.task.request,[]))
                     for f in frontiers ]
        args = [g0,frontiers]

    
    with timing("Induced a grammar"):
        if backend == "pypy":
            g, newFrontiers = callCompiled(pypyInduce, *args, **kwargs)
        elif backend == "rust":
            g, newFrontiers = rustInduce(*args, **kwargs)
        elif backend == "vs":
            g, newFrontiers = rustInduce(*args, vs=True, **kwargs)
        elif backend == "pypy_vs":
            kwargs.pop('iteration')
            kwargs.pop('topk_use_only_likelihood')
            fn = '/tmp/vs.pickle'
            with open(fn, 'wb') as handle:
                pickle.dump((args, kwargs), handle)
            eprint("For debugging purposes, the version space compression invocation has been saved to", fn)
            g, newFrontiers = callCompiled(induceGrammar_Beta, *args, **kwargs)
        elif backend == "ocaml":
            kwargs.pop('iteration')
            kwargs.pop('topk_use_only_likelihood')
            kwargs['topI'] = 300
            kwargs['bs'] = 1000000
            g, newFrontiers = ocamlInduce(*args, **kwargs)
        elif backend == "memorize":
            g, newFrontiers = memorizeInduce(*args, **kwargs)
        else:
            assert False, "unknown compressor"

    if 'pypy' in backend:
        g, newFrontiers = g.unstrip_primitive_values(), \
                          [front.unstrip_primitive_values() for front in newFrontiers]
        newFrontiers = [Frontier(f.entries, original_tasks[f.task.name])
                        for f in newFrontiers] 
        

    return g, newFrontiers
Ejemplo n.º 7
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        start = (taskBatchSize * currIteration) % len(tasks)
        end = start + taskBatchSize
        taskBatch = (tasks + tasks)[start:end]  # Handle wraparound.
        return taskBatch
Ejemplo n.º 8
0
 def get():
     results = [p.get() for p in promises]
     frontiers = []
     with timing("(Helmholtz enumeration) Decoded json into frontiers"):
         for request, result in zip(requests, results):
             response = json.loads(result.decode("utf-8"))
             for b, entry in enumerate(response):
                 frontiers.append(
                     Frontier([
                         FrontierEntry(program=Program.parse(p),
                                       logPrior=entry["ll"],
                                       logLikelihood=0.)
                         for p in entry["programs"]
                     ],
                              task=Task(str(b), request, [])))
     eprint("Total number of Helmholtz frontiers:", len(frontiers))
     return frontiers
Ejemplo n.º 9
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        if ec_result.recognitionModel is None:
            eprint("No recognition model, falling back on random %d" %
                   taskBatchSize)
            return random.sample(tasks, taskBatchSize)
        else:
            randomTask = random.choice(tasks)
            kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1,
                                    randomTask)
            return [randomTask] + kNN
Ejemplo n.º 10
0
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
    numRandom = int(randomRatio * taskBatchSize)
    numEntropy = taskBatchSize - numRandom

    eprint(
        "Selecting top %d tasks from the %d overall tasks given lowest entropy."
        % (taskBatchSize, len(tasks)))
    eprint("Will be selecting %d by lowest entropy and %d randomly." %
           (numEntropy, numRandom))
    taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(
        tasks)
    sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x: x[1])

    entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
    randomBatch = random.sample(
        [task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
    batch = entropyBatch + randomBatch

    return batch
Ejemplo n.º 11
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    baseGrammar = Grammar.uniform(McCarthyPrimitives())

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Ejemplo n.º 12
0
def demoLogoTasks():
    import scipy.misc
    import numpy as np

    g0 = Grammar.uniform(primitives, continuationType=turtle)
    eprint("dreaming into /tmp/dreams_0...")
    N = 1000
    programs = [
        p for _ in range(N)
        for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)]
        if p is not None
    ]
    os.system("mkdir  -p /tmp/dreams_0")
    for n, p in enumerate(programs):
        with open(f"/tmp/dreams_0/{n}.dream", "w") as handle:
            handle.write(str(p))
    drawLogo(*programs,
             pretty=True,
             smoothPretty=False,
             resolution=512,
             filenames=[
                 f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs))
             ],
             timeout=1)

    if len(sys.argv) > 1:
        tasks = makeTasks(sys.argv[1:], proto=False)
    else:
        tasks = makeTasks(['all'], proto=False)
    montageTasks(tasks, columns=16, testTrain=True)
    for n, t in enumerate(tasks):
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logo%d.png' % n,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace(
            '/', '_').replace("-", "_") + ".png"
        #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}")
        os.system(
            f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}"
        )
    eprint(len(tasks), "tasks")
    eprint(sum(t.mustTrain for t in tasks), "need to be trained on")

    for t in dSLDemo():
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        os.system(
            f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png"
        )

    tasks = [t for t in tasks if t.mustTrain]
    random.shuffle(tasks)
    montageTasks(tasks[:16 * 3], "subset", columns=16)

    montageTasks(rotationalSymmetryDemo(), "rotational")
Ejemplo n.º 13
0
    def _featuresOfProgram(self, program, tp):
        try:
            preg = program.evaluate([])
            # if 'left_paren' in program.show(False):
            #eprint("string_pregex:", string_pregex)
            #eprint("string_pregex:", string_pregex)

        except IndexError:
            # free variable
            return None
        except Exception as e:
            eprint("Exception during evaluation:", e)
            if "Attempt to evaluate fragment variable" in e:
                eprint("program (bc fragment error)", program)
            return None

        examples = []

        for _ in range(self.N_EXAMPLES * 5):  # oh this is arbitrary ig

            try:
                y = preg.sample()  # TODO

                #this line should keep inputs short, so that helmholtzbatch can be large
                #allows it to try other samples
                #(Could also return None off the bat... idk which is better)
                #if len(y) > 20:
                #    continue
                #eprint(tp, program, x, y)
                examples.append(y)
            except BaseException:
                continues
            if len(examples) >= self.N_EXAMPLES:
                break
        else:
            return None
        return examples  # changed to list_features(examples) from examples
Ejemplo n.º 14
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        # Reshuffles tasks in a fixed way across epochs for reproducibility.
        currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))

        shuffledTasks = tasks.copy()  # Since shuffle works in place.
        random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)

        shuffledTasksWrap = tasks.copy()  # Since shuffle works in place.
        random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)

        start = (taskBatchSize * currIteration) % len(shuffledTasks)
        end = start + taskBatchSize
        taskBatch = (shuffledTasks +
                     shuffledTasksWrap)[start:end]  # Wraparound nicely.

        return list(set(taskBatch))
Ejemplo n.º 15
0
def manualLogoTask(name,
                   expression,
                   proto=False,
                   needToTrain=False,
                   supervise=False,
                   lambdaCalculus=False):
    p = Program.parse(expression) if lambdaCalculus else parseLogo(expression)
    from dreamcoder.domains.logo.logoPrimitives import primitives
    from dreamcoder.grammar import Grammar
    g = Grammar.uniform(primitives, continuationType=turtle)
    gp = Grammar.uniform(primitives)
    try:
        l = g.logLikelihood(arrow(turtle, turtle), p)
        lp = gp.logLikelihood(arrow(turtle, turtle), p)
        assert l >= lp
        eprint(name, -l, "nats")

    except:
        eprint("WARNING: could not calculate likelihood of manual logo", p)

    attempts = 0
    while True:
        [output, highresolution] = drawLogo(p,
                                            p,
                                            resolution=[28, 128],
                                            cost=True)
        if output == "timeout" or highresolution == "timeout":
            attempts += 1
        else:
            break
    if attempts > 0:
        eprint(
            f"WARNING: Took {attempts} attempts to render task {name} within timeout"
        )

    cost = output[1]
    output = output[0]
    assert highresolution[1] == cost
    highresolution = highresolution[0]

    shape = list(map(int, output))
    highresolution = list(map(float, highresolution))
    t = Task(name, arrow(turtle, turtle), [(([0]), shape)])
    t.mustTrain = needToTrain
    t.proto = proto
    t.specialTask = ("LOGO", {"proto": proto})
    t.specialTask[1]["cost"] = cost * 1.05

    t.highresolution = highresolution

    if supervise:
        t.supervisedSolution = p

    return t
Ejemplo n.º 16
0
def exportTasks():
    import sys
    import pickle as pickle

    n_examples = 15
    if len(sys.argv) > 1:
        n_examples = int(sys.argv[1])

    eprint("Downloading and generating dataset")
    tasks = sorted(make_list_tasks(n_examples), key=lambda t: t.name)
    eprint("Got {} list tasks".format(len(tasks)))

    with open("data/list_tasks.pkl", "w") as f:
        pickle.dump(tasks, f)
    eprint("Wrote list tasks to data/list_tasks.pkl")
Ejemplo n.º 17
0
        #        real,
        f0,
        f1,
        fpi,
        real_power,
        real_subtraction,
        real_addition,
        real_division,
        real_multiplication
    ] + [
        Program.parse(n)
        for n in ["map", "fold", "empty", "cons", "car", "cdr", "zip"]
    ]
    baseGrammar = Grammar.uniform(equationPrimitives)

    eprint("Got %d equation discovery tasks..." % len(tasks))

    explorationCompression(baseGrammar,
                           tasks,
                           outputPrefix="experimentOutputs/scientificLaws",
                           evaluationTimeout=0.1,
                           testingTasks=[],
                           **commandlineArguments(
                               compressor="ocaml",
                               featureExtractor=DummyFeatureExtractor,
                               iterations=10,
                               CPUs=numberOfCPUs(),
                               structurePenalty=0.5,
                               helmholtzRatio=0.5,
                               a=3,
                               maximumFrontier=10000,
Ejemplo n.º 18
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    dataset = args.pop("dataset")
    tasks = {
        "Lucas-old":
        lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
        "bootstrap":
        make_list_bootstrap_tasks,
        "sorting":
        sortBootstrap,
        "Lucas-depth1":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
        "Lucas-depth2":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
        "Lucas-depth3":
        lambda: retrieveJSONTasks("data/list_tasks2.json"),
    }[dataset]()

    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:
        necessaryTasks = []  # maxTasks will not consider these
        if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
            necessaryTasks = tasks[:105]

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]
        tasks = necessaryTasks + tasks

    if dataset.startswith("Lucas"):
        # extra tasks for filter
        tasks.extend([
            Task("remove empty lists",
                 arrow(tlist(tlist(tbool)), tlist(tlist(tbool))),
                 [((ls, ), list(filter(lambda l: len(l) > 0, ls)))
                  for _ in range(15) for ls in [[[
                      random.random() < 0.5
                      for _ in range(random.randint(0, 3))
                  ] for _ in range(4)]]]),
            Task("keep squares", arrow(tlist(tint), tlist(tint)), [
                ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs)))
                for _ in range(15) for xs in [[
                    random.choice([0, 1, 4, 9, 16, 25])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
            Task("keep primes", arrow(tlist(tint), tlist(tint)), [
                ((xs, ),
                 list(
                     filter(
                         lambda x: x in
                         {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs)))
                for _ in range(15) for xs in [[
                    random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
        ])
        for i in range(4):
            tasks.extend([
                Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x == i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x != i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: not x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]])
            ])

    def isIdentityTask(t):
        return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples)

    eprint("Removed", sum(isIdentityTask(t) for t in tasks),
           "tasks that were just the identity function")
    tasks = [t for t in tasks if not isIdentityTask(t)]

    prims = {
        "base": basePrimitives,
        "McCarthy": McCarthyPrimitives,
        "common": bootstrapTarget_extra,
        "noLength": no_length,
        "rich": primitives
    }[args.pop("primitives")]()
    haveLength = not args.pop("noLength")
    haveMap = not args.pop("noMap")
    haveUnfold = not args.pop("noUnfold")
    eprint(f"Including map as a primitive? {haveMap}")
    eprint(f"Including length as a primitive? {haveLength}")
    eprint(f"Including unfold as a primitive? {haveUnfold}")
    baseGrammar = Grammar.uniform([p
                                   for p in prims
                                   if (p.name != "map" or haveMap) and \
                                   (p.name != "unfold" or haveUnfold) and \
                                   (p.name != "length" or haveLength)])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            necessary = train_necessary(t)
            if not necessary:
                continue
            if necessary == "some":
                train_some[t.name.split()[0]].append(t)
            else:
                t.mustTrain = True
        for k in sorted(train_some):
            ts = train_some[k]
            random.shuffle(ts)
            ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)
        if True:
            test = [t for t in test if t.name not in EASYLISTTASKS]

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Ejemplo n.º 19
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on regular expressions.
    """
    #for dreaming

    #parse use_ll_cutoff
    use_ll_cutoff = args.pop('use_ll_cutoff')
    if not use_ll_cutoff is False:

        #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff 
        #will be tuples of that string followed by the actual model

        if len(use_ll_cutoff) == 1:
            train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
        else:
            assert len(use_ll_cutoff) == 2
            train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks))
    else:
        train_ll_cutoff = None
        test_ll_cutoff = None


    regexTasks = {"old": makeOldTasks,
                "short": makeShortTasks,
                "long": makeLongTasks,
                "words": makeWordTasks,
                "number": makeNumberTasks,
                "handpicked": makeHandPickedTasks,
                "new": makeNewTasks,
                "newNumber": makeNewNumberTasks
                }[args.pop("tasks")]

    tasks = regexTasks()  # TODO
    eprint("Generated", len(tasks), "tasks")

    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        seed = 42 # previously this was hardcoded and never changed
        random.seed(seed)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    maxExamples = args.pop("maxExamples")
   

    split = args.pop("split")
    test, train = testTrainSplit(tasks, split)
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))


    test = add_cutoff_values(test, test_ll_cutoff)
    train = add_cutoff_values(train, train_ll_cutoff)
    eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff )


    if args.pop("use_str_const"):
        assert args["primitives"] == "strConst" or args["primitives"] == "reduced"
        ConstantInstantiateVisitor.SINGLE = \
            ConstantInstantiateVisitor()
        test = add_string_constants(test)
        train = add_string_constants(train)
        eprint("added string constants to test and train")
    
    for task in test + train:
        if len(task.examples) > maxExamples:
            task.examples = task.examples[:maxExamples]

        task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const})
        task.examples = [(xs, [y for y in ys ])
                         for xs,ys in task.examples ]
        task.maxParameters = 1

    # from list stuff
    primtype = args.pop("primitives")
    prims = {"base": basePrimitives,
             "alt1": altPrimitives,
             "alt2": alt2Primitives,
             "easyWords": easyWordsPrimitives,
             "concat": concatPrimitives,
             "reduced": reducedConcatPrimitives,
             "strConst": strConstConcatPrimitives
             }[primtype]

    extractor = {
        "learned": LearnedFeatureExtractor,
        "json": MyJSONFeatureExtractor
    }[args.pop("extractor")]

    extractor.H = args.pop("hidden")

    #stardecay = args.stardecay
    #stardecay = args.pop('stardecay')
    #decaystr = 'd' + str(stardecay)
    import datetime

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/regex/%s"%timestamp
    os.system("mkdir -p %s"%outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/regex"%(outputDirectory),
        "evaluationTimeout": 0.005,
        "topk_use_only_likelihood": True,
        "maximumFrontier": 10,
        "compressor": "ocaml"
    })
    ####


        # use the
    #prim_list = prims(stardecay)
    prim_list = prims()
    specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"]
    n_base_prim = len(prim_list) - len(specials)

    productions = [
        (math.log(0.5 / float(n_base_prim)),
         prim) if prim.name not in specials else (
            math.log(0.10),
            prim) for prim in prim_list]


    baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex)
    #baseGrammar = Grammar.uniform(prims())

    #for i in range(100):
    #    eprint(baseGrammar.sample(tpregex))

    #eprint(baseGrammar)
    #explore
    test_stuff = args.pop("debug")
    if test_stuff:
        eprint(baseGrammar)
        eprint("sampled programs from prior:")
        for i in range(100): #100
            eprint(baseGrammar.sample(test[0].request,maximumDepth=1000))
        eprint("""half the probability mass is on higher-order primitives.
Therefore half of enumerated programs should have more than one node.
However, we do not observe this.
Instead we see a very small fraction of programs have more than one node. 
So something seems to be wrong with grammar.sample.

Furthermore: observe the large print statement above. 
This prints the candidates for sampleDistribution in grammar.sample.
the first element of each tuple is the probability passed into sampleDistribution.
Half of the probability mass should be on the functions, but instead they are equally 
weighted with the constants. If you look at the grammar above, this is an error!!!!
""")
        assert False

    del args["likelihoodModel"]
    explorationCompression(baseGrammar, train,
                           testingTasks = test,
                           **args)
Ejemplo n.º 20
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    primitives = McCarthyPrimitives()
    from dreamcoder.program import Program, Invented
    # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))")
    # plus = Invented(plus)
    # primitives.append(plus)
    # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))")
    # minus = Invented(minus)
    # primitives.append(minus)
    # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))")
    # times = Invented(times)
    # primitives.append(times)
    baseGrammar = Grammar.uniform(primitives)
    baseGrammar = Grammar(
        0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p)
              for p in primitives])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    result = explorationCompression(baseGrammar,
                                    train,
                                    testingTasks=test,
                                    **args)
    print([x.bestPosterior for x in result.taskSolutions.values()])
Ejemplo n.º 21
0
try:
    import binutil  # required to import from dreamcoder modules
except ModuleNotFoundError:
    import bin.binutil  # alt import if called as module

from dreamcoder.utilities import eprint

if __name__ == "__main__":
    sys.setrecursionlimit(10000)

    start = time.time()
    request = pickle.load(sys.stdin.buffer)
    dt = time.time() - start
    if dt > 1:
        eprint(
            "(compiled driver warning: SLOW) Compiled driver unpacked the message in time",
            dt)

    response = (False, None)
    try:
        start = time.time()
        f = request["function"]
        result = f(*request["arguments"], **request["keywordArguments"])
        response = (True, result)
    except Exception as e:
        eprint("Exception thrown in pypy process for %s:" % f.__name__)
        sys.stderr.write(traceback.format_exc())
        sys.stderr.flush()
    finally:
        start = time.time()
        pickle.dump(response, sys.stdout.buffer)
Ejemplo n.º 22
0
                response = json.loads(result.decode("utf-8"))
                for b, entry in enumerate(response):
                    frontiers.append(
                        Frontier([
                            FrontierEntry(program=Program.parse(p),
                                          logPrior=entry["ll"],
                                          logLikelihood=0.)
                            for p in entry["programs"]
                        ],
                                 task=Task(str(b), request, [])))
        eprint("Total number of Helmholtz frontiers:", len(frontiers))
        return frontiers

    return get


if __name__ == "__main__":
    g = Grammar.uniform([k1, k0, addition, subtraction, multiplication])
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
    f = DummyFeatureExtractor([])
    r = RecognitionModel(f, g, hidden=[], contextual=True)
    r.trainBiasOptimal(frontiers, frontiers, steps=70)
    g = r.grammarOfTask(frontiers[0].task).untorch()
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    for f in frontiers:
        eprint(f.summarizeFull())
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
Ejemplo n.º 23
0
def rustInduce(g0,
               frontiers,
               _=None,
               topK=1,
               pseudoCounts=1.0,
               aic=1.0,
               structurePenalty=0.001,
               a=0,
               CPUs=1,
               iteration=-1,
               topk_use_only_likelihood=False,
               vs=False):
    def finite_logp(l):
        return l if l != float("-inf") else -1000

    message = {
        "strategy": {
            "version-spaces": {
                "top_i": 50
            }
        } if vs else {
            "fragment-grammars": {}
        },
        "params": {
            "structure_penalty": structurePenalty,
            "pseudocounts": int(pseudoCounts + 0.5),
            "topk": topK,
            "topk_use_only_likelihood": topk_use_only_likelihood,
            "aic": aic if aic != float("inf") else None,
            "arity": a,
        },
        "primitives": [{
            "name": p.name,
            "tp": str(t),
            "logp": finite_logp(l)
        } for l, t, p in g0.productions if p.isPrimitive],
        "inventions": [
            {
                "expression": str(p.body),
                "logp": finite_logp(l)
            }  # -inf=-100
            for l, t, p in g0.productions if p.isInvented
        ],
        "variable_logprob":
        finite_logp(g0.logVariable),
        "frontiers": [{
            "task_tp":
            str(f.task.request),
            "solutions": [{
                "expression": str(e.program),
                "logprior": finite_logp(e.logPrior),
                "loglikelihood": e.logLikelihood,
            } for e in f],
        } for f in frontiers],
    }

    eprint("running rust compressor")

    messageJson = json.dumps(message)

    with open("jsonDebug", "w") as f:
        f.write(messageJson)

    # check which version of python we are using
    # if >=3.6 do:
    if sys.version_info[1] >= 6:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             encoding='utf-8',
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
    elif sys.version_info[1] == 5:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

        messageJson = bytearray(messageJson, encoding='utf-8')
        # convert messageJson string to bytes
    else:
        eprint("must be python 3.5 or 3.6")
        assert False

    p.stdin.write(messageJson)
    p.stdin.flush()
    p.stdin.close()

    if p.returncode is not None:
        raise ValueError("rust compressor failed")

    if sys.version_info[1] >= 6:
        resp = json.load(p.stdout)
    elif sys.version_info[1] == 5:
        import codecs
        resp = json.load(codecs.getreader('utf-8')(p.stdout))

    productions = [(x["logp"], p) for p, x in
                   zip((p for (_, _, p) in g0.productions if p.isPrimitive), resp["primitives"])] + \
                  [(i["logp"], Invented(Program.parse(i["expression"])))
                   for i in resp["inventions"]]
    productions = [(l if l is not None else float("-inf"), p)
                   for l, p in productions]
    g = Grammar.fromProductions(productions,
                                resp["variable_logprob"],
                                continuationType=g0.continuationType)
    newFrontiers = [
        Frontier([
            FrontierEntry(Program.parse(s["expression"]),
                          logPrior=s["logprior"],
                          logLikelihood=s["loglikelihood"])
            for s in r["solutions"]
        ], f.task) for f, r in zip(frontiers, resp["frontiers"])
    ]
    return g, newFrontiers
Ejemplo n.º 24
0
def visualizePrimitives(primitives, export='/tmp/logo_primitives.png'):
    from itertools import product
    from dreamcoder.program import Index, Abstraction, Application
    from dreamcoder.utilities import montageMatrix, makeNiceArray
    from dreamcoder.type import tint
    import scipy.misc
    from dreamcoder.domains.logo.makeLogoTasks import parseLogo

    angles = [
        Program.parse(a) for a in [
            "logo_ZA",
            "logo_epsA",
            "(logo_MULA logo_epsA 2)",
            "(logo_DIVA logo_UA 4)",
            "(logo_DIVA logo_UA 5)",
            "(logo_DIVA logo_UA 7)",
            "(logo_DIVA logo_UA 9)",
        ]
    ]
    specialAngles = {
        "#(lambda (lambda (logo_forLoop logo_IFTY (lambda (lambda (logo_FWRT (logo_MULL logo_UL 3) (logo_MULA $2 4) $0))) $1)))":
        [Program.parse("(logo_MULA logo_epsA 4)")] +
        [Program.parse("(logo_DIVA logo_UA %d)" % n) for n in [7, 9]]
    }
    numbers = [Program.parse(n) for n in ["1", "2", "5", "7", "logo_IFTY"]]
    specialNumbers = {
        "#(lambda (#(lambda (lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $5 (logo_DIVA logo_UA $3) $0))) $0))))) (logo_MULL logo_UL $0) 4 4))":
        [Program.parse(str(n)) for n in [1, 2, 3]]
    }
    distances = [
        Program.parse(l) for l in [
            "logo_ZL", "logo_epsL", "(logo_MULL logo_epsL 2)",
            "(logo_DIVL logo_UL 2)", "logo_UL"
        ]
    ]
    subprograms = [
        parseLogo(sp) for sp in [
            "(move 1d 0a)",
            "(loop i infinity (move (*l epsilonLength 4) (*a epsilonAngle 2)))",
            "(loop i infinity (move (*l epsilonLength 5) (/a epsilonAngle 2)))",
            "(loop i 4 (move 1d (/a 1a 4)))"
        ]
    ]

    entireArguments = {
        "#(lambda (lambda (#(#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) logo_IFTY) (logo_MULA (#(logo_DIVA logo_UA) $1) $0) (#(logo_MULL logo_UL) 3))))":
        [[Program.parse(str(x)) for x in xs]
         for xs in [("3", "1", "$0"), ("4", "1",
                                       "$0"), ("5", "1",
                                               "$0"), ("5", "3",
                                                       "$0"), ("7", "3", "$0")]
         ]
    }
    specialDistances = {
        "#(lambda (lambda (logo_forLoop 7 (lambda (lambda (#(lambda (lambda (lambda (#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) 7 $1 $2 $0)))) $3 logo_epsA $0))) $0)))":
        [Program.parse("(logo_MULL logo_epsL %d)" % n) for n in range(5)]
    }

    matrix = []
    for p in primitives:
        if not p.isInvented: continue
        t = p.tp
        eprint(p, ":", p.tp)
        if t.returns() != turtle:
            eprint("\t(does not return a turtle)")
            continue

        def argumentChoices(t):
            if t == turtle:
                return [Index(0)]
            elif t == arrow(turtle, turtle):
                return subprograms
            elif t == tint:
                return specialNumbers.get(str(p), numbers)
            elif t == tangle:
                return specialAngles.get(str(p), angles)
            elif t == tlength:
                return specialDistances.get(str(p), distances)
            else:
                return []

        ts = []
        for arguments in entireArguments.get(
                str(p),
                product(*[argumentChoices(t) for t in t.functionArguments()])):
            eprint(arguments)
            pp = p
            for a in arguments:
                pp = Application(pp, a)
            pp = Abstraction(pp)
            i = np.reshape(np.array(drawLogo(pp, resolution=128)), (128, 128))
            if i is not None:
                ts.append(i)

        if ts == []: continue

        matrix.append(ts)
        if len(ts) < 6: ts = [ts]
        else: ts = makeNiceArray(ts)
        r = montageMatrix(ts)
        fn = "/tmp/logo_primitive_%d.png" % len(matrix)
        eprint("\tExported to", fn)
        scipy.misc.imsave(fn, r)

    matrix = montageMatrix(matrix)
    scipy.misc.imsave(export, matrix)
Ejemplo n.º 25
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on LOGO tasks.
    """

    # The below legacy global statement is required since prefix_dreams is used by LogoFeatureCNN.
    # TODO(lcary): use argument passing instead of global variables.
    global prefix_dreams

    # The below global statement is required since primitives is modified within main().
    # TODO(lcary): use a function call to retrieve and declare primitives instead.
    global primitives

    visualizeCheckpoint = args.pop("visualize")
    if visualizeCheckpoint is not None:
        with open(visualizeCheckpoint, 'rb') as handle:
            primitives = pickle.load(handle).grammars[-1].primitives
        visualizePrimitives(primitives)
        sys.exit(0)

    dreamCheckpoint = args.pop("dreamCheckpoint")
    dreamDirectory = args.pop("dreamDirectory")

    proto = args.pop("proto")

    if dreamCheckpoint is not None:
        #outputDreams(dreamCheckpoint, dreamDirectory)
        enumerateDreams(dreamCheckpoint, dreamDirectory)
        sys.exit(0)

    animateCheckpoint = args.pop("animate")
    if animateCheckpoint is not None:
        animateSolutions(loadPickle(animateCheckpoint).allFrontiers)
        sys.exit(0)

    target = args.pop("target")
    red = args.pop("reduce")
    save = args.pop("save")
    prefix = args.pop("prefix")
    prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/"
    prefix_pickles = prefix + "/logo." + ('.'.join(target))
    if not os.path.exists(prefix_dreams):
        os.makedirs(prefix_dreams)
    tasks = makeTasks(target, proto)
    eprint("Generated", len(tasks), "tasks")

    costMatters = args.pop("cost")
    for t in tasks:
        t.specialTask[1]["costMatters"] = costMatters
        # disgusting hack - include whether cost matters in the dummy input
        if costMatters: t.examples = [(([1]), t.examples[0][1])]

    os.chdir("prototypical-networks")
    subprocess.Popen(["python", "./protonet_server.py"])
    time.sleep(3)
    os.chdir("..")

    test, train = testTrainSplit(tasks, args.pop("split"))
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))
    try:
        if test: montageTasks(test, "test_")
        montageTasks(train, "train_")
    except:
        eprint(
            "WARNING: couldn't generate montage. Do you have an old version of scipy?"
        )

    if red is not []:
        for reducing in red:
            try:
                with open(reducing, 'r') as f:
                    prods = json.load(f)
                    for e in prods:
                        e = Program.parse(e)
                        if e.isInvented:
                            primitives.append(e)
            except EOFError:
                eprint("Couldn't grab frontier from " + reducing)
            except IOError:
                eprint("Couldn't grab frontier from " + reducing)
            except json.decoder.JSONDecodeError:
                eprint("Couldn't grab frontier from " + reducing)

    primitives = list(OrderedDict((x, True) for x in primitives).keys())
    baseGrammar = Grammar.uniform(primitives, continuationType=turtle)

    eprint(baseGrammar)

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/logo/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    generator = ecIterator(baseGrammar,
                           train,
                           testingTasks=test,
                           outputPrefix="%s/logo" % outputDirectory,
                           evaluationTimeout=0.01,
                           **args)

    r = None
    for result in generator:
        iteration = len(result.learningCurve)
        dreamDirectory = "%s/dreams_%d" % (outputDirectory, iteration)
        os.system("mkdir  -p %s" % dreamDirectory)
        eprint("Dreaming into directory", dreamDirectory)
        dreamFromGrammar(result.grammars[-1], dreamDirectory)
        r = result

    needsExport = [
        str(z) for _, _, z in r.grammars[-1].productions if z.isInvented
    ]
    if save is not None:
        with open(save, 'w') as f:
            json.dump(needsExport, f)
Ejemplo n.º 26
0
def ocamlInduce(g,
                frontiers,
                _=None,
                topK=1,
                pseudoCounts=1.0,
                aic=1.0,
                structurePenalty=0.001,
                a=0,
                CPUs=1,
                bs=1000000,
                topI=300):
    # This is a dirty hack!
    # Memory consumption increases with the number of CPUs
    # And early on we have a lot of stuff to compress
    # If this is the first iteration, only use a fraction of the available CPUs
    topK = 5
    topI = 600
    if all(not p.isInvented for p in g.primitives):
        if a > 3:
            CPUs = max(1, int(CPUs / 6))
        else:
            CPUs = max(1, int(CPUs / 3))
    else:
        CPUs = max(1, int(CPUs / 2))
    CPUs = 2

    # X X X FIXME X X X
    # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems
    iterations = 99  # maximum number of components to add at once

    while True:
        g0 = g

        originalFrontiers = frontiers
        t2f = {f.task: f for f in frontiers}
        frontiers = [f for f in frontiers if not f.empty]
        message = {
            "arity": a,
            "topK": topK,
            "pseudoCounts": float(pseudoCounts),
            "aic": aic,
            "bs": bs,
            "topI": topI,
            "structurePenalty": float(structurePenalty),
            "CPUs": CPUs,
            "DSL": g.json(),
            "iterations": iterations,
            "frontiers": [f.json() for f in frontiers]
        }

        message = json.dumps(message)
        if True:
            timestamp = datetime.datetime.now().isoformat()
            os.system("mkdir  -p compressionMessages")
            fn = "compressionMessages/%s" % timestamp
            with open(fn, "w") as f:
                f.write(message)
            eprint("Compression message saved to:", fn)

        try:
            # Get relative path
            compressor_file = os.path.join(get_root_dir(), 'compression')
            process = subprocess.Popen(compressor_file,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE)
            response, error = process.communicate(
                bytes(message, encoding="utf-8"))
            response = json.loads(response.decode("utf-8"))
        except OSError as exc:
            raise exc

        g = response["DSL"]
        g = Grammar(g["logVariable"],
                    [(l, p.infer(), p) for production in g["productions"]
                     for l in [production["logProbability"]]
                     for p in [Program.parse(production["expression"])]],
                    continuationType=g0.continuationType)

        frontiers = {
            original.task: Frontier([
                FrontierEntry(p,
                              logLikelihood=e["logLikelihood"],
                              logPrior=g.logLikelihood(original.task.request,
                                                       p))
                for e in new["programs"]
                for p in [Program.parse(e["program"])]
            ],
                                    task=original.task)
            for original, new in zip(frontiers, response["frontiers"])
        }
        frontiers = [
            frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers
        ]
        if iterations == 1 and len(g) > len(g0):
            eprint("Grammar changed - running another round of consolidation.")
            continue
        else:
            eprint("Finished consolidation.")
            return g, frontiers
Ejemplo n.º 27
0
        real_addition,
        real_multiplication
    ]
    baseGrammar = Grammar.uniform(primitives)
    random.seed(42)
    tasks = makeTasks()

    smooth = arguments.pop('smooth')

    for t in tasks:
        t.features = drawFunction(200, 10., t.f)
        delattr(t, 'f')
        if smooth:
            t.likelihoodThreshold = None

    eprint("Got %d tasks..." % len(tasks))

    test, train = testTrainSplit(tasks, 100)
    random.shuffle(test)
    test = test[:100]
    eprint("Training on", len(train), "tasks")

    if False:
        hardTasks = [t for t in train if '/' in t.name and '[' in t.name]
        for clamp in [True, False]:
            for lr in [0.1, 0.05, 0.5, 1.]:
                for steps in [50, 100, 200]:
                    for attempts in [10, 50, 100, 200]:
                        for s in [0.1, 0.5, 1, 3]:
                            start = time.time()
                            losses = callCompiled(debugMany, hardTasks, clamp,