Exemple #1
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    baseGrammar = Grammar.uniform(McCarthyPrimitives())

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Exemple #2
0
    def learn(self, dataset):
        tasks = []
        max_arity = 0
        for smt, exps in zip(self.semantics, dataset):
            if smt.solved: continue
            smt.update_examples(exps)
            t = smt.make_task()
            if t is not None:
                tasks.append(t)
                max_arity = max(smt.arity, max_arity)
        self.train_args['enumerationTimeout'] = 10 if max_arity == 0 else 200
        self.train_args['iterations'] = 1 if max_arity == 0 else 3
        n_solved = len(['' for t in self.semantics if t.solved])
        print("Semantics: %d/%d/%d (total/solved/learn)." %
              (len(self.semantics), n_solved, len(tasks)))
        if len(tasks) == 0:
            self._print_semantics()
            return
        self._print_tasks(tasks)
        self.update_grammar()
        print(self.grammar)
        result = explorationCompression(self.grammar, tasks, **self.train_args)

        for frontier in result.taskSolutions.values():
            if not frontier.entries: continue
            symbol_idx = int(frontier.task.name)
            self.semantics[symbol_idx].update_program(frontier.bestPosterior)
        examples = [xs for t in tasks for xs, y in t.examples]
        self._removeEquivalentSemantics(examples)
        self._print_semantics()
Exemple #3
0
    def learn(self, dataset):
        tasks = []
        max_arity = 0
        for smt, exps in zip(self.semantics, dataset):
            if not smt.learnable:
                continue
            smt.update_examples(exps)
            t = smt.make_task()
            if t is not None:
                tasks.append(t)
                max_arity = max(smt.arity, max_arity)
        self.train_args['enumerationTimeout'] = 5 if max_arity == 0 else 300
        # self.train_args['iterations'] = 1 if max_arity == 0 else 3
        n_solved = len(['' for t in self.semantics if t.solved])
        print("Semantics: %d/%d/%d (total/solved/learn)." %
              (len(self.semantics), n_solved, len(tasks)))
        if len(tasks) == 0:
            self._print_semantics()
            return
        self._print_tasks(tasks)
        self.update_grammar()
        print(self.grammar)
        # print(self.allFrontiers)
        self.rescore_frontiers(tasks)
        # if self.allFrontiers is not None:
        #     print(self.allFrontiers.values())

        if self.helmholtzFrontiers is not None:
            requests_old = {x.task.request for x in self.helmholtzFrontiers()}
            requests = {t.request for t in tasks}
            # if new requests, discard old helmholtz frontiers
            if requests != requests_old:
                self.helmholtzFrontiers = None

        result = explorationCompression(
            self.grammar,
            tasks,
            allFrontiers=self.allFrontiers,
            helmholtzFrontiers=self.helmholtzFrontiers,
            **self.train_args)
        self.allFrontiers = list(result.allFrontiers.values())
        self.helmholtzFrontiers = result.helmholtzFrontiers

        for frontier in result.taskSolutions.values():
            if not frontier.entries: continue
            symbol_idx = int(frontier.task.name)
            # print(frontier)
            self.semantics[symbol_idx].update_program(frontier.bestPosterior)
        # examples = [xs for t in tasks for xs, y in t.examples]
        # self._removeEquivalentSemantics(examples)
        self._removeEquivalentSemantics()
        self._print_semantics()
Exemple #4
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    dataset = args.pop("dataset")
    tasks = {
        "Lucas-old":
        lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
        "bootstrap":
        make_list_bootstrap_tasks,
        "sorting":
        sortBootstrap,
        "Lucas-depth1":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
        "Lucas-depth2":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
        "Lucas-depth3":
        lambda: retrieveJSONTasks("data/list_tasks2.json"),
    }[dataset]()

    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:
        necessaryTasks = []  # maxTasks will not consider these
        if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
            necessaryTasks = tasks[:105]

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]
        tasks = necessaryTasks + tasks

    if dataset.startswith("Lucas"):
        # extra tasks for filter
        tasks.extend([
            Task("remove empty lists",
                 arrow(tlist(tlist(tbool)), tlist(tlist(tbool))),
                 [((ls, ), list(filter(lambda l: len(l) > 0, ls)))
                  for _ in range(15) for ls in [[[
                      random.random() < 0.5
                      for _ in range(random.randint(0, 3))
                  ] for _ in range(4)]]]),
            Task("keep squares", arrow(tlist(tint), tlist(tint)), [
                ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs)))
                for _ in range(15) for xs in [[
                    random.choice([0, 1, 4, 9, 16, 25])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
            Task("keep primes", arrow(tlist(tint), tlist(tint)), [
                ((xs, ),
                 list(
                     filter(
                         lambda x: x in
                         {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs)))
                for _ in range(15) for xs in [[
                    random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
        ])
        for i in range(4):
            tasks.extend([
                Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x == i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x != i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: not x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]])
            ])

    def isIdentityTask(t):
        return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples)

    eprint("Removed", sum(isIdentityTask(t) for t in tasks),
           "tasks that were just the identity function")
    tasks = [t for t in tasks if not isIdentityTask(t)]

    prims = {
        "base": basePrimitives,
        "McCarthy": McCarthyPrimitives,
        "common": bootstrapTarget_extra,
        "noLength": no_length,
        "rich": primitives
    }[args.pop("primitives")]()
    haveLength = not args.pop("noLength")
    haveMap = not args.pop("noMap")
    haveUnfold = not args.pop("noUnfold")
    eprint(f"Including map as a primitive? {haveMap}")
    eprint(f"Including length as a primitive? {haveLength}")
    eprint(f"Including unfold as a primitive? {haveUnfold}")
    baseGrammar = Grammar.uniform([p
                                   for p in prims
                                   if (p.name != "map" or haveMap) and \
                                   (p.name != "unfold" or haveUnfold) and \
                                   (p.name != "length" or haveLength)])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            necessary = train_necessary(t)
            if not necessary:
                continue
            if necessary == "some":
                train_some[t.name.split()[0]].append(t)
            else:
                t.mustTrain = True
        for k in sorted(train_some):
            ts = train_some[k]
            random.shuffle(ts)
            ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)
        if True:
            test = [t for t in test if t.name not in EASYLISTTASKS]

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Exemple #5
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on regular expressions.
    """
    #for dreaming

    #parse use_ll_cutoff
    use_ll_cutoff = args.pop('use_ll_cutoff')
    if not use_ll_cutoff is False:

        #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff 
        #will be tuples of that string followed by the actual model

        if len(use_ll_cutoff) == 1:
            train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
        else:
            assert len(use_ll_cutoff) == 2
            train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks))
    else:
        train_ll_cutoff = None
        test_ll_cutoff = None


    regexTasks = {"old": makeOldTasks,
                "short": makeShortTasks,
                "long": makeLongTasks,
                "words": makeWordTasks,
                "number": makeNumberTasks,
                "handpicked": makeHandPickedTasks,
                "new": makeNewTasks,
                "newNumber": makeNewNumberTasks
                }[args.pop("tasks")]

    tasks = regexTasks()  # TODO
    eprint("Generated", len(tasks), "tasks")

    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        seed = 42 # previously this was hardcoded and never changed
        random.seed(seed)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    maxExamples = args.pop("maxExamples")
   

    split = args.pop("split")
    test, train = testTrainSplit(tasks, split)
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))


    test = add_cutoff_values(test, test_ll_cutoff)
    train = add_cutoff_values(train, train_ll_cutoff)
    eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff )


    if args.pop("use_str_const"):
        assert args["primitives"] == "strConst" or args["primitives"] == "reduced"
        ConstantInstantiateVisitor.SINGLE = \
            ConstantInstantiateVisitor()
        test = add_string_constants(test)
        train = add_string_constants(train)
        eprint("added string constants to test and train")
    
    for task in test + train:
        if len(task.examples) > maxExamples:
            task.examples = task.examples[:maxExamples]

        task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const})
        task.examples = [(xs, [y for y in ys ])
                         for xs,ys in task.examples ]
        task.maxParameters = 1

    # from list stuff
    primtype = args.pop("primitives")
    prims = {"base": basePrimitives,
             "alt1": altPrimitives,
             "alt2": alt2Primitives,
             "easyWords": easyWordsPrimitives,
             "concat": concatPrimitives,
             "reduced": reducedConcatPrimitives,
             "strConst": strConstConcatPrimitives
             }[primtype]

    extractor = {
        "learned": LearnedFeatureExtractor,
        "json": MyJSONFeatureExtractor
    }[args.pop("extractor")]

    extractor.H = args.pop("hidden")

    #stardecay = args.stardecay
    #stardecay = args.pop('stardecay')
    #decaystr = 'd' + str(stardecay)
    import datetime

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/regex/%s"%timestamp
    os.system("mkdir -p %s"%outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/regex"%(outputDirectory),
        "evaluationTimeout": 0.005,
        "topk_use_only_likelihood": True,
        "maximumFrontier": 10,
        "compressor": "ocaml"
    })
    ####


        # use the
    #prim_list = prims(stardecay)
    prim_list = prims()
    specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"]
    n_base_prim = len(prim_list) - len(specials)

    productions = [
        (math.log(0.5 / float(n_base_prim)),
         prim) if prim.name not in specials else (
            math.log(0.10),
            prim) for prim in prim_list]


    baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex)
    #baseGrammar = Grammar.uniform(prims())

    #for i in range(100):
    #    eprint(baseGrammar.sample(tpregex))

    #eprint(baseGrammar)
    #explore
    test_stuff = args.pop("debug")
    if test_stuff:
        eprint(baseGrammar)
        eprint("sampled programs from prior:")
        for i in range(100): #100
            eprint(baseGrammar.sample(test[0].request,maximumDepth=1000))
        eprint("""half the probability mass is on higher-order primitives.
Therefore half of enumerated programs should have more than one node.
However, we do not observe this.
Instead we see a very small fraction of programs have more than one node. 
So something seems to be wrong with grammar.sample.

Furthermore: observe the large print statement above. 
This prints the candidates for sampleDistribution in grammar.sample.
the first element of each tuple is the probability passed into sampleDistribution.
Half of the probability mass should be on the functions, but instead they are equally 
weighted with the constants. If you look at the grammar above, this is an error!!!!
""")
        assert False

    del args["likelihoodModel"]
    explorationCompression(baseGrammar, train,
                           testingTasks = test,
                           **args)
Exemple #6
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    primitives = McCarthyPrimitives()
    from dreamcoder.program import Program, Invented
    # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))")
    # plus = Invented(plus)
    # primitives.append(plus)
    # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))")
    # minus = Invented(minus)
    # primitives.append(minus)
    # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))")
    # times = Invented(times)
    # primitives.append(times)
    baseGrammar = Grammar.uniform(primitives)
    baseGrammar = Grammar(
        0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p)
              for p in primitives])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    result = explorationCompression(baseGrammar,
                                    train,
                                    testingTasks=test,
                                    **args)
    print([x.bestPosterior for x in result.taskSolutions.values()])
                        for s in [0.1, 0.5, 1, 3]:
                            start = time.time()
                            losses = callCompiled(debugMany, hardTasks, clamp,
                                                  lr, steps, attempts, s)
                            losses = dict(zip(hardTasks, losses))
                            failures = 0
                            for t, l in sorted(losses.items(),
                                               key=lambda t_l: t_l[1]):
                                # print t,l
                                if l > -t.likelihoodThreshold:
                                    failures += 1
                            eprint("clamp,lr,steps, attempts,std", clamp, lr,
                                   steps, attempts, s)
                            eprint("%d/%d failures" %
                                   (failures, len(hardTasks)))
                            eprint("dt=", time.time() - start)
                            eprint()
                            eprint()

        assert False
    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/rational/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    explorationCompression(baseGrammar,
                           train,
                           outputPrefix="%s/rational" % outputDirectory,
                           evaluationTimeout=0.1,
                           testingTasks=test,
                           **arguments)
Exemple #8
0
        fpi,
        real_power,
        real_subtraction,
        real_addition,
        real_division,
        real_multiplication
    ] + [
        Program.parse(n)
        for n in ["map", "fold", "empty", "cons", "car", "cdr", "zip"]
    ]
    baseGrammar = Grammar.uniform(equationPrimitives)

    eprint("Got %d equation discovery tasks..." % len(tasks))

    explorationCompression(baseGrammar,
                           tasks,
                           outputPrefix="experimentOutputs/scientificLaws",
                           evaluationTimeout=0.1,
                           testingTasks=[],
                           **commandlineArguments(
                               compressor="ocaml",
                               featureExtractor=DummyFeatureExtractor,
                               iterations=10,
                               CPUs=numberOfCPUs(),
                               structurePenalty=0.5,
                               helmholtzRatio=0.5,
                               a=3,
                               maximumFrontier=10000,
                               topK=2,
                               pseudoCounts=10.0))