Beispiel #1
0
def manualLogoTask(name,
                   expression,
                   proto=False,
                   needToTrain=False,
                   supervise=False,
                   lambdaCalculus=False):
    p = Program.parse(expression) if lambdaCalculus else parseLogo(expression)
    from dreamcoder.domains.logo.logoPrimitives import primitives
    from dreamcoder.grammar import Grammar
    g = Grammar.uniform(primitives, continuationType=turtle)
    gp = Grammar.uniform(primitives)
    try:
        l = g.logLikelihood(arrow(turtle, turtle), p)
        lp = gp.logLikelihood(arrow(turtle, turtle), p)
        assert l >= lp
        eprint(name, -l, "nats")

    except:
        eprint("WARNING: could not calculate likelihood of manual logo", p)

    attempts = 0
    while True:
        [output, highresolution] = drawLogo(p,
                                            p,
                                            resolution=[28, 128],
                                            cost=True)
        if output == "timeout" or highresolution == "timeout":
            attempts += 1
        else:
            break
    if attempts > 0:
        eprint(
            f"WARNING: Took {attempts} attempts to render task {name} within timeout"
        )

    cost = output[1]
    output = output[0]
    assert highresolution[1] == cost
    highresolution = highresolution[0]

    shape = list(map(int, output))
    highresolution = list(map(float, highresolution))
    t = Task(name, arrow(turtle, turtle), [(([0]), shape)])
    t.mustTrain = needToTrain
    t.proto = proto
    t.specialTask = ("LOGO", {"proto": proto})
    t.specialTask[1]["cost"] = cost * 1.05

    t.highresolution = highresolution

    if supervise:
        t.supervisedSolution = p

    return t
Beispiel #2
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    baseGrammar = Grammar.uniform(McCarthyPrimitives())

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Beispiel #3
0
def demoLogoTasks():
    import scipy.misc
    import numpy as np

    g0 = Grammar.uniform(primitives, continuationType=turtle)
    eprint("dreaming into /tmp/dreams_0...")
    N = 1000
    programs = [
        p for _ in range(N)
        for p in [g0.sample(arrow(turtle, turtle), maximumDepth=20)]
        if p is not None
    ]
    os.system("mkdir  -p /tmp/dreams_0")
    for n, p in enumerate(programs):
        with open(f"/tmp/dreams_0/{n}.dream", "w") as handle:
            handle.write(str(p))
    drawLogo(*programs,
             pretty=True,
             smoothPretty=False,
             resolution=512,
             filenames=[
                 f"/tmp/dreams_0/{n}_pretty.png" for n in range(len(programs))
             ],
             timeout=1)

    if len(sys.argv) > 1:
        tasks = makeTasks(sys.argv[1:], proto=False)
    else:
        tasks = makeTasks(['all'], proto=False)
    montageTasks(tasks, columns=16, testTrain=True)
    for n, t in enumerate(tasks):
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logo%d.png' % n,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        logo_safe_name = t.name.replace("=", "_").replace(' ', '_').replace(
            '/', '_').replace("-", "_") + ".png"
        #os.system(f"convert /tmp/logo{n}.png -morphology Dilate Octagon /tmp/{logo_safe_name}")
        os.system(
            f"convert /tmp/logo{n}.png -channel RGB -negate /tmp/{logo_safe_name}"
        )
    eprint(len(tasks), "tasks")
    eprint(sum(t.mustTrain for t in tasks), "need to be trained on")

    for t in dSLDemo():
        a = t.highresolution
        w = int(len(a)**0.5)
        scipy.misc.imsave('/tmp/logoDemo%s.png' % t.name,
                          np.array([a[i:i + w] for i in range(0, len(a), w)]))
        os.system(
            f"convert /tmp/logoDemo{t.name}.png -morphology Dilate Octagon /tmp/logoDemo{t.name}_dilated.png"
        )

    tasks = [t for t in tasks if t.mustTrain]
    random.shuffle(tasks)
    montageTasks(tasks[:16 * 3], "subset", columns=16)

    montageTasks(rotationalSymmetryDemo(), "rotational")
Beispiel #4
0
    def __init__(self):
        args = commandlineArguments(enumerationTimeout=200,
                                    activation='tanh',
                                    iterations=1,
                                    recognitionTimeout=3600,
                                    a=3,
                                    maximumFrontier=5,
                                    topK=2,
                                    pseudoCounts=30.0,
                                    helmholtzRatio=0.5,
                                    structurePenalty=1.,
                                    CPUs=min(numberOfCPUs(), 8),
                                    extras=list_options)

        args['noConsolidation'] = True
        args.pop("random_seed")
        args['contextual'] = True
        args['biasOptimal'] = True
        args['auxiliaryLoss'] = True
        args['activation'] = "relu"
        args['useDSL'] = False

        extractor = {
            "learned": LearnedFeatureExtractor,
        }[args.pop("extractor")]
        extractor.H = args.pop("hidden")

        timestamp = datetime.datetime.now().isoformat()
        outputDirectory = "tmp/%s" % timestamp
        os.system("mkdir -p %s" % outputDirectory)

        args.update({
            "featureExtractor": extractor,
            "outputPrefix": "%s/hint" % outputDirectory,
            "evaluationTimeout": 0.0005,
        })
        args.pop("maxTasks")
        args.pop("split")

        self.primitives = McCarthyPrimitives()
        baseGrammar = Grammar.uniform(self.primitives)
        self.grammar = baseGrammar
        self.train_args = args
        self.semantics = [Semantics(i) for i in range(len(SYMBOLS))]
        self.allFrontiers = None
        self.helmholtzFrontiers = None
Beispiel #5
0
 def update_grammar(self):
     programs = [
         Invented(smt.program.prog) for smt in self.semantics
         if smt.learnable and smt.solved and smt.program is not None
         and smt.program.arity > 0 and '#' not in str(smt.program)
     ]
     # if '#' in the program, the program uses a invented primitive, it is very likely to have a high computation cost.
     # Therefore we don't add this program into primitives, since it might slow the enumeration a lot.
     # it might be resolved by increasing the enumeration time
     new_grammar = Grammar.uniform(self.primitives + programs)
     # self.train_args['enumerationTimeout'] += 100 * len(programs)
     if new_grammar != self.grammar:
         self.grammar = new_grammar
         self.helmholtzFrontiers = None
         self.allFrontiers = None
         print(
             "Update grammar with invented programs and set frontiers to none."
         )
Beispiel #6
0
def memorizeInduce(g, frontiers, **kwargs):
    existingInventions = {p.uncurry()
                          for p in g.primitives }
    programs = {f.bestPosterior.program for f in frontiers if not f.empty}
    newInventions = programs - existingInventions
    newGrammar = Grammar.uniform([p for p in g.primitives] + \
                                 [Invented(ni) for ni in newInventions])
    
    # rewrite in terms of new primitives
    def substitute(p):
        nonlocal newInventions
        if p in newInventions: return Invented(p).uncurry()
        return p
    newFrontiers = [Frontier([FrontierEntry(program=np,
                                            logPrior=newGrammar.logLikelihood(f.task.request, np),
                                            logLikelihood=e.logLikelihood)
                           for e in f
                           for np in [substitute(e.program)] ],
                             task=f.task)
                 for f in frontiers ]
    return newGrammar, newFrontiers
Beispiel #7
0

def deepcoderProductions():
    return [(0.0, prim) for prim in deepcoderPrimitives()]


# def flatten_program(p):
#     string = p.show(False)
#     num_inputs = string.count('lambda')
#     string = string.replace('lambda', '')
#     string = string.replace('(', '')
#     string = string.replace(')', '')
#     #remove '_fn' (optional)
#     for i in range(num_inputs):
#         string = string.replace('$' + str(num_inputs-i-1),'input_' + str(i))
#     string = string.split(' ')
#     string = list(filter(lambda x: x is not '', string))
#     return string

if __name__ == "__main__":
    #g = Grammar.uniform(deepcoderPrimitives())
    g = Grammar.fromProductions(deepcoderProductions(), logVariable=.9)
    request = arrow(tlist(tint), tint, tint)
    p = g.sample(request)
    print("request:", request)
    print("program:")
    print(prettyProgram(p))
    print("flattened_program:")
    flat = flatten_program(p)
    print(flat)
Beispiel #8
0
                                     helmholtzRatio=0.5,
                                     activation="tanh",
                                     maximumFrontier=5,
                                     a=3,
                                     topK=2,
                                     pseudoCounts=30.0,
                                     extras=rational_options)

    primitives = [
        real,
        # f1,
        real_division,
        real_addition,
        real_multiplication
    ]
    baseGrammar = Grammar.uniform(primitives)
    random.seed(42)
    tasks = makeTasks()

    smooth = arguments.pop('smooth')

    for t in tasks:
        t.features = drawFunction(200, 10., t.f)
        delattr(t, 'f')
        if smooth:
            t.likelihoodThreshold = None

    eprint("Got %d tasks..." % len(tasks))

    test, train = testTrainSplit(tasks, 100)
    random.shuffle(test)
Beispiel #9
0
        ('mapping', [
            '(map _ $x)',
            '(mapi _ $x)',
            '(flatten $x)',
            '(map _ (zip (droplast 1 $x) (drop 1 $x)))',
            '(map _ (drop 1 $x))'
        ], lambda e: e == '$l')
    ])



## features computed in 'predict()' in bin/list_routines_misc.py

Primitive.GLOBALS.clear()
Grammar.uniform(list_routines_misc.primitives())
pre_features = {
    'program_length': lambda _, __, ___, p: Program.parse(p).size(),
    'depth': lambda _, __, ___, p: Program.parse(p).depth(),
    'apps': lambda _, __, ___, p: list_routines_misc.count_applications(Program.parse(p))
}


## miscellaneous features

misc_features = {}

concept_examples = {'model': {}, 'dataset': {}}
for purpose, num in [('model', 100), ('dataset', 150)]:
    concepts = os.listdir('analysis/concept_examples/{}'.format(purpose))
    for concept in map(lambda n: 'c{:03}'.format(n), range(1, num+1)):
Beispiel #10
0
        # Primitive("map", arrow(arrow(t0, t1), tlist(t0), tlist(t1)), _map),
        # Primitive("index", arrow(tint,tlist(t0),t0),None),
        # Primitive("length", arrow(tlist(t0),tint),None),
        primitiveRecursion1,
        #primitiveRecursion2,
        Primitive("gt?", arrow(tint, tint, tbool), _gt),
        Primitive("if", arrow(tbool, t0, t0, t0), _if),
        Primitive("eq?", arrow(tint, tint, tbool), _eq),
        Primitive("+", arrow(tint, tint, tint), _addition),
        Primitive("-", arrow(tint, tint, tint), _subtraction),
    ] + [Primitive(str(j), tint, j) for j in range(2)]


if __name__ == "__main__":
    bootstrapTarget()
    g = Grammar.uniform(McCarthyPrimitives())
    # with open("/home/ellisk/om/ec/experimentOutputs/list_aic=1.0_arity=3_ET=1800_expandFrontier=2.0_it=4_likelihoodModel=all-or-nothing_MF=5_baseline=False_pc=10.0_L=1.0_K=5_rec=False.pickle", "rb") as handle:
    #     b = pickle.load(handle).grammars[-1]
    # print b

    p = Program.parse(
        "(lambda (lambda (lambda (if (empty? $0) empty (cons (+ (car $1) (car $0)) ($2 (cdr $1) (cdr $0)))))))")
    t = arrow(tlist(tint), tlist(tint), tlist(tint))  # ,tlist(tbool))
    print(g.logLikelihood(arrow(t, t), p))
    assert False
    print(b.logLikelihood(arrow(t, t), p))

    # p = Program.parse("""(lambda (lambda
    # (unfold 0
    # (lambda (+ (index $0 $2) (index $0 $1)))
    # (lambda (1+ $0))
Beispiel #11
0
    bootstrapTarget()
    equationPrimitives = [
        #        real,
        f0,
        f1,
        fpi,
        real_power,
        real_subtraction,
        real_addition,
        real_division,
        real_multiplication
    ] + [
        Program.parse(n)
        for n in ["map", "fold", "empty", "cons", "car", "cdr", "zip"]
    ]
    baseGrammar = Grammar.uniform(equationPrimitives)

    eprint("Got %d equation discovery tasks..." % len(tasks))

    explorationCompression(baseGrammar,
                           tasks,
                           outputPrefix="experimentOutputs/scientificLaws",
                           evaluationTimeout=0.1,
                           testingTasks=[],
                           **commandlineArguments(
                               compressor="ocaml",
                               featureExtractor=DummyFeatureExtractor,
                               iterations=10,
                               CPUs=numberOfCPUs(),
                               structurePenalty=0.5,
                               helmholtzRatio=0.5,
Beispiel #12
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    primitives = McCarthyPrimitives()
    from dreamcoder.program import Program, Invented
    # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))")
    # plus = Invented(plus)
    # primitives.append(plus)
    # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))")
    # minus = Invented(minus)
    # primitives.append(minus)
    # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))")
    # times = Invented(times)
    # primitives.append(times)
    baseGrammar = Grammar.uniform(primitives)
    baseGrammar = Grammar(
        0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p)
              for p in primitives])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    result = explorationCompression(baseGrammar,
                                    train,
                                    testingTasks=test,
                                    **args)
    print([x.bestPosterior for x in result.taskSolutions.values()])
Beispiel #13
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on regular expressions.
    """
    #for dreaming

    #parse use_ll_cutoff
    use_ll_cutoff = args.pop('use_ll_cutoff')
    if not use_ll_cutoff is False:

        #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff 
        #will be tuples of that string followed by the actual model

        if len(use_ll_cutoff) == 1:
            train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
        else:
            assert len(use_ll_cutoff) == 2
            train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks))
    else:
        train_ll_cutoff = None
        test_ll_cutoff = None


    regexTasks = {"old": makeOldTasks,
                "short": makeShortTasks,
                "long": makeLongTasks,
                "words": makeWordTasks,
                "number": makeNumberTasks,
                "handpicked": makeHandPickedTasks,
                "new": makeNewTasks,
                "newNumber": makeNewNumberTasks
                }[args.pop("tasks")]

    tasks = regexTasks()  # TODO
    eprint("Generated", len(tasks), "tasks")

    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        seed = 42 # previously this was hardcoded and never changed
        random.seed(seed)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    maxExamples = args.pop("maxExamples")
   

    split = args.pop("split")
    test, train = testTrainSplit(tasks, split)
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))


    test = add_cutoff_values(test, test_ll_cutoff)
    train = add_cutoff_values(train, train_ll_cutoff)
    eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff )


    if args.pop("use_str_const"):
        assert args["primitives"] == "strConst" or args["primitives"] == "reduced"
        ConstantInstantiateVisitor.SINGLE = \
            ConstantInstantiateVisitor()
        test = add_string_constants(test)
        train = add_string_constants(train)
        eprint("added string constants to test and train")
    
    for task in test + train:
        if len(task.examples) > maxExamples:
            task.examples = task.examples[:maxExamples]

        task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const})
        task.examples = [(xs, [y for y in ys ])
                         for xs,ys in task.examples ]
        task.maxParameters = 1

    # from list stuff
    primtype = args.pop("primitives")
    prims = {"base": basePrimitives,
             "alt1": altPrimitives,
             "alt2": alt2Primitives,
             "easyWords": easyWordsPrimitives,
             "concat": concatPrimitives,
             "reduced": reducedConcatPrimitives,
             "strConst": strConstConcatPrimitives
             }[primtype]

    extractor = {
        "learned": LearnedFeatureExtractor,
        "json": MyJSONFeatureExtractor
    }[args.pop("extractor")]

    extractor.H = args.pop("hidden")

    #stardecay = args.stardecay
    #stardecay = args.pop('stardecay')
    #decaystr = 'd' + str(stardecay)
    import datetime

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/regex/%s"%timestamp
    os.system("mkdir -p %s"%outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/regex"%(outputDirectory),
        "evaluationTimeout": 0.005,
        "topk_use_only_likelihood": True,
        "maximumFrontier": 10,
        "compressor": "ocaml"
    })
    ####


        # use the
    #prim_list = prims(stardecay)
    prim_list = prims()
    specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"]
    n_base_prim = len(prim_list) - len(specials)

    productions = [
        (math.log(0.5 / float(n_base_prim)),
         prim) if prim.name not in specials else (
            math.log(0.10),
            prim) for prim in prim_list]


    baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex)
    #baseGrammar = Grammar.uniform(prims())

    #for i in range(100):
    #    eprint(baseGrammar.sample(tpregex))

    #eprint(baseGrammar)
    #explore
    test_stuff = args.pop("debug")
    if test_stuff:
        eprint(baseGrammar)
        eprint("sampled programs from prior:")
        for i in range(100): #100
            eprint(baseGrammar.sample(test[0].request,maximumDepth=1000))
        eprint("""half the probability mass is on higher-order primitives.
Therefore half of enumerated programs should have more than one node.
However, we do not observe this.
Instead we see a very small fraction of programs have more than one node. 
So something seems to be wrong with grammar.sample.

Furthermore: observe the large print statement above. 
This prints the candidates for sampleDistribution in grammar.sample.
the first element of each tuple is the probability passed into sampleDistribution.
Half of the probability mass should be on the functions, but instead they are equally 
weighted with the constants. If you look at the grammar above, this is an error!!!!
""")
        assert False

    del args["likelihoodModel"]
    explorationCompression(baseGrammar, train,
                           testingTasks = test,
                           **args)
Beispiel #14
0
def rustInduce(g0,
               frontiers,
               _=None,
               topK=1,
               pseudoCounts=1.0,
               aic=1.0,
               structurePenalty=0.001,
               a=0,
               CPUs=1,
               iteration=-1,
               topk_use_only_likelihood=False,
               vs=False):
    def finite_logp(l):
        return l if l != float("-inf") else -1000

    message = {
        "strategy": {
            "version-spaces": {
                "top_i": 50
            }
        } if vs else {
            "fragment-grammars": {}
        },
        "params": {
            "structure_penalty": structurePenalty,
            "pseudocounts": int(pseudoCounts + 0.5),
            "topk": topK,
            "topk_use_only_likelihood": topk_use_only_likelihood,
            "aic": aic if aic != float("inf") else None,
            "arity": a,
        },
        "primitives": [{
            "name": p.name,
            "tp": str(t),
            "logp": finite_logp(l)
        } for l, t, p in g0.productions if p.isPrimitive],
        "inventions": [
            {
                "expression": str(p.body),
                "logp": finite_logp(l)
            }  # -inf=-100
            for l, t, p in g0.productions if p.isInvented
        ],
        "variable_logprob":
        finite_logp(g0.logVariable),
        "frontiers": [{
            "task_tp":
            str(f.task.request),
            "solutions": [{
                "expression": str(e.program),
                "logprior": finite_logp(e.logPrior),
                "loglikelihood": e.logLikelihood,
            } for e in f],
        } for f in frontiers],
    }

    eprint("running rust compressor")

    messageJson = json.dumps(message)

    with open("jsonDebug", "w") as f:
        f.write(messageJson)

    # check which version of python we are using
    # if >=3.6 do:
    if sys.version_info[1] >= 6:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             encoding='utf-8',
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
    elif sys.version_info[1] == 5:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

        messageJson = bytearray(messageJson, encoding='utf-8')
        # convert messageJson string to bytes
    else:
        eprint("must be python 3.5 or 3.6")
        assert False

    p.stdin.write(messageJson)
    p.stdin.flush()
    p.stdin.close()

    if p.returncode is not None:
        raise ValueError("rust compressor failed")

    if sys.version_info[1] >= 6:
        resp = json.load(p.stdout)
    elif sys.version_info[1] == 5:
        import codecs
        resp = json.load(codecs.getreader('utf-8')(p.stdout))

    productions = [(x["logp"], p) for p, x in
                   zip((p for (_, _, p) in g0.productions if p.isPrimitive), resp["primitives"])] + \
                  [(i["logp"], Invented(Program.parse(i["expression"])))
                   for i in resp["inventions"]]
    productions = [(l if l is not None else float("-inf"), p)
                   for l, p in productions]
    g = Grammar.fromProductions(productions,
                                resp["variable_logprob"],
                                continuationType=g0.continuationType)
    newFrontiers = [
        Frontier([
            FrontierEntry(Program.parse(s["expression"]),
                          logPrior=s["logprior"],
                          logLikelihood=s["loglikelihood"])
            for s in r["solutions"]
        ], f.task) for f, r in zip(frontiers, resp["frontiers"])
    ]
    return g, newFrontiers
Beispiel #15
0
        #print("self.name", self.name)
        return self.name

    def __setstate__(self, state):
        #for backwards compatibility:
        if type(state) == dict:
            pass  #do nothing, i don't need to load them if they are old...
        else:
            p = Primitive.GLOBALS[state]
            self.__init__(p.name, p.tp, p.value, p.constraint)


if __name__ == '__main__':
    import time
    CPrimitive("testCPrim", tint, lambda x: x, 17)
    g = Grammar.fromProductions(RobustFillProductions())
    print(len(g))
    request = tprogram
    p = g.sample(request)
    print("request:", request)
    print("program:")
    print(prettyProgram(p))
    s = 'abcdefg'
    e = p.evaluate([])
    #print("prog applied to", s)
    #print(e(s))
    print("flattened_program:")
    flat = flatten_program(p)
    print(flat)
    t = time.time()
    constraints = Constraint_prop().execute(p)
Beispiel #16
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on LOGO tasks.
    """

    # The below legacy global statement is required since prefix_dreams is used by LogoFeatureCNN.
    # TODO(lcary): use argument passing instead of global variables.
    global prefix_dreams

    # The below global statement is required since primitives is modified within main().
    # TODO(lcary): use a function call to retrieve and declare primitives instead.
    global primitives

    visualizeCheckpoint = args.pop("visualize")
    if visualizeCheckpoint is not None:
        with open(visualizeCheckpoint, 'rb') as handle:
            primitives = pickle.load(handle).grammars[-1].primitives
        visualizePrimitives(primitives)
        sys.exit(0)

    dreamCheckpoint = args.pop("dreamCheckpoint")
    dreamDirectory = args.pop("dreamDirectory")

    proto = args.pop("proto")

    if dreamCheckpoint is not None:
        #outputDreams(dreamCheckpoint, dreamDirectory)
        enumerateDreams(dreamCheckpoint, dreamDirectory)
        sys.exit(0)

    animateCheckpoint = args.pop("animate")
    if animateCheckpoint is not None:
        animateSolutions(loadPickle(animateCheckpoint).allFrontiers)
        sys.exit(0)

    target = args.pop("target")
    red = args.pop("reduce")
    save = args.pop("save")
    prefix = args.pop("prefix")
    prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/"
    prefix_pickles = prefix + "/logo." + ('.'.join(target))
    if not os.path.exists(prefix_dreams):
        os.makedirs(prefix_dreams)
    tasks = makeTasks(target, proto)
    eprint("Generated", len(tasks), "tasks")

    costMatters = args.pop("cost")
    for t in tasks:
        t.specialTask[1]["costMatters"] = costMatters
        # disgusting hack - include whether cost matters in the dummy input
        if costMatters: t.examples = [(([1]), t.examples[0][1])]

    os.chdir("prototypical-networks")
    subprocess.Popen(["python", "./protonet_server.py"])
    time.sleep(3)
    os.chdir("..")

    test, train = testTrainSplit(tasks, args.pop("split"))
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))
    try:
        if test: montageTasks(test, "test_")
        montageTasks(train, "train_")
    except:
        eprint(
            "WARNING: couldn't generate montage. Do you have an old version of scipy?"
        )

    if red is not []:
        for reducing in red:
            try:
                with open(reducing, 'r') as f:
                    prods = json.load(f)
                    for e in prods:
                        e = Program.parse(e)
                        if e.isInvented:
                            primitives.append(e)
            except EOFError:
                eprint("Couldn't grab frontier from " + reducing)
            except IOError:
                eprint("Couldn't grab frontier from " + reducing)
            except json.decoder.JSONDecodeError:
                eprint("Couldn't grab frontier from " + reducing)

    primitives = list(OrderedDict((x, True) for x in primitives).keys())
    baseGrammar = Grammar.uniform(primitives, continuationType=turtle)

    eprint(baseGrammar)

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/logo/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    generator = ecIterator(baseGrammar,
                           train,
                           testingTasks=test,
                           outputPrefix="%s/logo" % outputDirectory,
                           evaluationTimeout=0.01,
                           **args)

    r = None
    for result in generator:
        iteration = len(result.learningCurve)
        dreamDirectory = "%s/dreams_%d" % (outputDirectory, iteration)
        os.system("mkdir  -p %s" % dreamDirectory)
        eprint("Dreaming into directory", dreamDirectory)
        dreamFromGrammar(result.grammars[-1], dreamDirectory)
        r = result

    needsExport = [
        str(z) for _, _, z in r.grammars[-1].productions if z.isInvented
    ]
    if save is not None:
        with open(save, 'w') as f:
            json.dump(needsExport, f)
Beispiel #17
0
                response = json.loads(result.decode("utf-8"))
                for b, entry in enumerate(response):
                    frontiers.append(
                        Frontier([
                            FrontierEntry(program=Program.parse(p),
                                          logPrior=entry["ll"],
                                          logLikelihood=0.)
                            for p in entry["programs"]
                        ],
                                 task=Task(str(b), request, [])))
        eprint("Total number of Helmholtz frontiers:", len(frontiers))
        return frontiers

    return get


if __name__ == "__main__":
    g = Grammar.uniform([k1, k0, addition, subtraction, multiplication])
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
    f = DummyFeatureExtractor([])
    r = RecognitionModel(f, g, hidden=[], contextual=True)
    r.trainBiasOptimal(frontiers, frontiers, steps=70)
    g = r.grammarOfTask(frontiers[0].task).untorch()
    frontiers = helmholtzEnumeration(g, arrow(tint, tint), [[0], [1], [2]],
                                     10.)
    for f in frontiers:
        eprint(f.summarizeFull())
    eprint("average frontier size", mean(len(f.entries) for f in frontiers))
Beispiel #18
0
# from dreamcoder.domains.draw.makeDrawTasks import drawDrawings
from dreamcoder.domains.draw.drawPrimitives import primitives, taxes, tartist, tangle, tscale, tdist

# from dreamcoder.dreamcoder import ecIterator
from dreamcoder.grammar import Grammar
# from dreamcoder.program import Program
# from dreamcoder.recognition import variable, maybe_cuda
from dreamcoder.task import Task
from dreamcoder.type import arrow
# from dreamcoder.utilities import eprint, testTrainSplit, loadPickle

g0 = Grammar.uniform(primitives)

def dreamFromGrammar(g=g0, directory = "", N=50):
	# request = taxes # arrow9turtle turtle) just for logl.
	request = arrow(taxes, taxes) # arrow9turtle turtle) just for logl.
	programs = [ p for _ in range(N) for p in [g.sample(request, maximumDepth=20)] if p is not None]
	return programs
	# drawDrawings(*programs, filenames)

Beispiel #19
0
"non",
"l",
"erase",
"m",
"comes",
"up",
"comparison",
"during",
"'s value is the largest inclusive, which is strictly less than maximum element in numbers from 1 to the element in `a` which'",
"'s value is the biggest (inclusive), which is strictly less than maximum element of range from 1 to the element in `a` which'",
"'s value is the highest, which is strictly less than maximum element among sequence of digits of the element in `a` which'"]


if __name__ == "__main__":
    #g = Grammar.uniform(deepcoderPrimitives())

    g = Grammar.fromProductions(algolispProductions(), logVariable=.9)

    #p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call == (list_add_symbol 1 (list_init_symbol (fn_call mod ( list_add_symbol 2 (list_init_symbol arg1)) ))) ) (list_init_symbol $0)) )")
    p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call eq (list_add_symbol (symbol_constant 1) (list_init_symbol (fn_call mod ( list_add_symbol (symbol_constant 2) (list_init_symbol (symbol_constant arg1))) ))) ) (list_init_symbol (symbol_constant $0)))))")

    print(p)

    #tree = p.evaluate(["a"])
    tree = p.evaluate([])
    print(tree("a"))

#

  
Beispiel #20
0
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--domain", '-d', default="text")
    parser.add_argument("--taskLikelihood", default=False, action='store_true')
    parser.add_argument("--sampleLikelihood",
                        default=False,
                        action='store_true')
    parser.add_argument("--test", type=str, default=False)
    parser.add_argument("--timeout", type=float, default=600)
    arguments = parser.parse_args()

    if arguments.domain == "text":
        tasks = makeTasks()
        g = Grammar.uniform(text_primitives.primitives +
                            [p for p in bootstrapTarget()])
        input_vocabularies = [
            list(printable[:-4]) + ['EOE'],
            list(printable[:-4])
        ]
        test = loadPBETasks("PBE_Strings_Track")[0]
        fe = Text.LearnedFeatureExtractor(tasks=tasks, testingTasks=test)

        BATCHSIZE = 16

    elif arguments.domain == "regex":
        g = Grammar.uniform(reducedConcatPrimitives(),
                            continuationType=tpregex)
        tasks = makeNewTasks()
        fe = Regex.LearnedFeatureExtractor(tasks)
Beispiel #21
0
def make_grammar(g):
    Primitive.GLOBALS.clear()
    return Grammar.uniform(g())
Beispiel #22
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    dataset = args.pop("dataset")
    tasks = {
        "Lucas-old":
        lambda: retrieveJSONTasks("data/list_tasks.json") + sortBootstrap(),
        "bootstrap":
        make_list_bootstrap_tasks,
        "sorting":
        sortBootstrap,
        "Lucas-depth1":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:105],
        "Lucas-depth2":
        lambda: retrieveJSONTasks("data/list_tasks2.json")[:4928],
        "Lucas-depth3":
        lambda: retrieveJSONTasks("data/list_tasks2.json"),
    }[dataset]()

    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:
        necessaryTasks = []  # maxTasks will not consider these
        if dataset.startswith("Lucas2.0") and dataset != "Lucas2.0-depth1":
            necessaryTasks = tasks[:105]

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]
        tasks = necessaryTasks + tasks

    if dataset.startswith("Lucas"):
        # extra tasks for filter
        tasks.extend([
            Task("remove empty lists",
                 arrow(tlist(tlist(tbool)), tlist(tlist(tbool))),
                 [((ls, ), list(filter(lambda l: len(l) > 0, ls)))
                  for _ in range(15) for ls in [[[
                      random.random() < 0.5
                      for _ in range(random.randint(0, 3))
                  ] for _ in range(4)]]]),
            Task("keep squares", arrow(tlist(tint), tlist(tint)), [
                ((xs, ), list(filter(lambda x: int(math.sqrt(x))**2 == x, xs)))
                for _ in range(15) for xs in [[
                    random.choice([0, 1, 4, 9, 16, 25])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
            Task("keep primes", arrow(tlist(tint), tlist(tint)), [
                ((xs, ),
                 list(
                     filter(
                         lambda x: x in
                         {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37}, xs)))
                for _ in range(15) for xs in [[
                    random.choice([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37])
                    if random.random() < 0.5 else random.randint(0, 9)
                    for _ in range(7)
                ]]
            ]),
        ])
        for i in range(4):
            tasks.extend([
                Task("keep eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x == i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove eq %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x != i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("keep gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]]),
                Task("remove gt %s" % i, arrow(tlist(tint), tlist(tint)),
                     [((xs, ), list(filter(lambda x: not x > i, xs)))
                      for _ in range(15)
                      for xs in [[random.randint(0, 6) for _ in range(5)]]])
            ])

    def isIdentityTask(t):
        return all(len(xs) == 1 and xs[0] == y for xs, y in t.examples)

    eprint("Removed", sum(isIdentityTask(t) for t in tasks),
           "tasks that were just the identity function")
    tasks = [t for t in tasks if not isIdentityTask(t)]

    prims = {
        "base": basePrimitives,
        "McCarthy": McCarthyPrimitives,
        "common": bootstrapTarget_extra,
        "noLength": no_length,
        "rich": primitives
    }[args.pop("primitives")]()
    haveLength = not args.pop("noLength")
    haveMap = not args.pop("noMap")
    haveUnfold = not args.pop("noUnfold")
    eprint(f"Including map as a primitive? {haveMap}")
    eprint(f"Including length as a primitive? {haveLength}")
    eprint(f"Including unfold as a primitive? {haveUnfold}")
    baseGrammar = Grammar.uniform([p
                                   for p in prims
                                   if (p.name != "map" or haveMap) and \
                                   (p.name != "unfold" or haveUnfold) and \
                                   (p.name != "length" or haveLength)])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            necessary = train_necessary(t)
            if not necessary:
                continue
            if necessary == "some":
                train_some[t.name.split()[0]].append(t)
            else:
                t.mustTrain = True
        for k in sorted(train_some):
            ts = train_some[k]
            random.shuffle(ts)
            ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)
        if True:
            test = [t for t in test if t.name not in EASYLISTTASKS]

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    explorationCompression(baseGrammar, train, testingTasks=test, **args)
Beispiel #23
0
        for char, name in disallowed
    ] + [
        Primitive("r_dot", tpregex, emp_dot_no_letter(corpus)),
        Primitive("r_d", tpregex, emp_d(corpus)),
        Primitive("r_s", tpregex, pregex.s),
        Primitive("r_kleene", arrow(tpregex, tpregex), _kleene),
        #Primitive("r_plus", arrow(tpregex, tpregex), _plus),
        #Primitive("r_maybe", arrow(tpregex, tpregex), _maybe),
        Primitive("r_alt", arrow(tpregex, tpregex, tpregex), _alt),
        Primitive("r_concat", arrow(tpregex, tpregex, tpregex), _concat),
    ]


if __name__ == '__main__':
    concatPrimitives()
    from dreamcoder.program import Program

    p = Program.parse(
        "(lambda (r_kleene (lambda (r_maybe (lambda (string_x $0)) $0)) $0))")
    print(p)
    print(p.runWithArguments([pregex.String("")]))

    prims = concatPrimitives()
    g = Grammar.uniform(prims)

    for i in range(100):
        prog = g.sample(arrow(tpregex, tpregex))
        preg = prog.runWithArguments([pregex.String("")])
        print("preg:", preg.__repr__())
        print("sample:", preg.sample())
Beispiel #24
0
def ocamlInduce(g,
                frontiers,
                _=None,
                topK=1,
                pseudoCounts=1.0,
                aic=1.0,
                structurePenalty=0.001,
                a=0,
                CPUs=1,
                bs=1000000,
                topI=300):
    # This is a dirty hack!
    # Memory consumption increases with the number of CPUs
    # And early on we have a lot of stuff to compress
    # If this is the first iteration, only use a fraction of the available CPUs
    topK = 5
    topI = 600
    if all(not p.isInvented for p in g.primitives):
        if a > 3:
            CPUs = max(1, int(CPUs / 6))
        else:
            CPUs = max(1, int(CPUs / 3))
    else:
        CPUs = max(1, int(CPUs / 2))
    CPUs = 2

    # X X X FIXME X X X
    # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems
    iterations = 99  # maximum number of components to add at once

    while True:
        g0 = g

        originalFrontiers = frontiers
        t2f = {f.task: f for f in frontiers}
        frontiers = [f for f in frontiers if not f.empty]
        message = {
            "arity": a,
            "topK": topK,
            "pseudoCounts": float(pseudoCounts),
            "aic": aic,
            "bs": bs,
            "topI": topI,
            "structurePenalty": float(structurePenalty),
            "CPUs": CPUs,
            "DSL": g.json(),
            "iterations": iterations,
            "frontiers": [f.json() for f in frontiers]
        }

        message = json.dumps(message)
        if True:
            timestamp = datetime.datetime.now().isoformat()
            os.system("mkdir  -p compressionMessages")
            fn = "compressionMessages/%s" % timestamp
            with open(fn, "w") as f:
                f.write(message)
            eprint("Compression message saved to:", fn)

        try:
            # Get relative path
            compressor_file = os.path.join(get_root_dir(), 'compression')
            process = subprocess.Popen(compressor_file,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE)
            response, error = process.communicate(
                bytes(message, encoding="utf-8"))
            response = json.loads(response.decode("utf-8"))
        except OSError as exc:
            raise exc

        g = response["DSL"]
        g = Grammar(g["logVariable"],
                    [(l, p.infer(), p) for production in g["productions"]
                     for l in [production["logProbability"]]
                     for p in [Program.parse(production["expression"])]],
                    continuationType=g0.continuationType)

        frontiers = {
            original.task: Frontier([
                FrontierEntry(p,
                              logLikelihood=e["logLikelihood"],
                              logPrior=g.logLikelihood(original.task.request,
                                                       p))
                for e in new["programs"]
                for p in [Program.parse(e["program"])]
            ],
                                    task=original.task)
            for original, new in zip(frontiers, response["frontiers"])
        }
        frontiers = [
            frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers
        ]
        if iterations == 1 and len(g) > len(g0):
            eprint("Grammar changed - running another round of consolidation.")
            continue
        else:
            eprint("Finished consolidation.")
            return g, frontiers
Beispiel #25
0
 def update_grammar(self):
     programs = [
         Invented(smt.program.prog_ori) for smt in self.semantics
         if smt.solved and smt.program.arity > 0
     ]
     self.grammar = Grammar.uniform(McCarthyPrimitives() + programs)