Ejemplo n.º 1
0
def rustInduce(g0,
               frontiers,
               _=None,
               topK=1,
               pseudoCounts=1.0,
               aic=1.0,
               structurePenalty=0.001,
               a=0,
               CPUs=1,
               iteration=-1,
               topk_use_only_likelihood=False,
               vs=False):
    def finite_logp(l):
        return l if l != float("-inf") else -1000

    message = {
        "strategy": {
            "version-spaces": {
                "top_i": 50
            }
        } if vs else {
            "fragment-grammars": {}
        },
        "params": {
            "structure_penalty": structurePenalty,
            "pseudocounts": int(pseudoCounts + 0.5),
            "topk": topK,
            "topk_use_only_likelihood": topk_use_only_likelihood,
            "aic": aic if aic != float("inf") else None,
            "arity": a,
        },
        "primitives": [{
            "name": p.name,
            "tp": str(t),
            "logp": finite_logp(l)
        } for l, t, p in g0.productions if p.isPrimitive],
        "inventions": [
            {
                "expression": str(p.body),
                "logp": finite_logp(l)
            }  # -inf=-100
            for l, t, p in g0.productions if p.isInvented
        ],
        "variable_logprob":
        finite_logp(g0.logVariable),
        "frontiers": [{
            "task_tp":
            str(f.task.request),
            "solutions": [{
                "expression": str(e.program),
                "logprior": finite_logp(e.logPrior),
                "loglikelihood": e.logLikelihood,
            } for e in f],
        } for f in frontiers],
    }

    eprint("running rust compressor")

    messageJson = json.dumps(message)

    with open("jsonDebug", "w") as f:
        f.write(messageJson)

    # check which version of python we are using
    # if >=3.6 do:
    if sys.version_info[1] >= 6:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             encoding='utf-8',
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
    elif sys.version_info[1] == 5:
        p = subprocess.Popen(['./rust_compressor/rust_compressor'],
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

        messageJson = bytearray(messageJson, encoding='utf-8')
        # convert messageJson string to bytes
    else:
        eprint("must be python 3.5 or 3.6")
        assert False

    p.stdin.write(messageJson)
    p.stdin.flush()
    p.stdin.close()

    if p.returncode is not None:
        raise ValueError("rust compressor failed")

    if sys.version_info[1] >= 6:
        resp = json.load(p.stdout)
    elif sys.version_info[1] == 5:
        import codecs
        resp = json.load(codecs.getreader('utf-8')(p.stdout))

    productions = [(x["logp"], p) for p, x in
                   zip((p for (_, _, p) in g0.productions if p.isPrimitive), resp["primitives"])] + \
                  [(i["logp"], Invented(Program.parse(i["expression"])))
                   for i in resp["inventions"]]
    productions = [(l if l is not None else float("-inf"), p)
                   for l, p in productions]
    g = Grammar.fromProductions(productions,
                                resp["variable_logprob"],
                                continuationType=g0.continuationType)
    newFrontiers = [
        Frontier([
            FrontierEntry(Program.parse(s["expression"]),
                          logPrior=s["logprior"],
                          logLikelihood=s["loglikelihood"])
            for s in r["solutions"]
        ], f.task) for f, r in zip(frontiers, resp["frontiers"])
    ]
    return g, newFrontiers
Ejemplo n.º 2
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on regular expressions.
    """
    #for dreaming

    #parse use_ll_cutoff
    use_ll_cutoff = args.pop('use_ll_cutoff')
    if not use_ll_cutoff is False:

        #if use_ll_cutoff is a list of strings, then train_ll_cutoff and train_ll_cutoff 
        #will be tuples of that string followed by the actual model

        if len(use_ll_cutoff) == 1:
            train_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[0] # make_cutoff_model(use_ll_cutoff[0], tasks))
        else:
            assert len(use_ll_cutoff) == 2
            train_ll_cutoff = use_ll_cutoff[0] #make_cutoff_model(use_ll_cutoff[0], tasks))
            test_ll_cutoff = use_ll_cutoff[1] #make_cutoff_model(use_ll_cutoff[1], tasks))
    else:
        train_ll_cutoff = None
        test_ll_cutoff = None


    regexTasks = {"old": makeOldTasks,
                "short": makeShortTasks,
                "long": makeLongTasks,
                "words": makeWordTasks,
                "number": makeNumberTasks,
                "handpicked": makeHandPickedTasks,
                "new": makeNewTasks,
                "newNumber": makeNewNumberTasks
                }[args.pop("tasks")]

    tasks = regexTasks()  # TODO
    eprint("Generated", len(tasks), "tasks")

    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        seed = 42 # previously this was hardcoded and never changed
        random.seed(seed)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    maxExamples = args.pop("maxExamples")
   

    split = args.pop("split")
    test, train = testTrainSplit(tasks, split)
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))


    test = add_cutoff_values(test, test_ll_cutoff)
    train = add_cutoff_values(train, train_ll_cutoff)
    eprint("added cutoff values to tasks, train: ", train_ll_cutoff, ", test:", test_ll_cutoff )


    if args.pop("use_str_const"):
        assert args["primitives"] == "strConst" or args["primitives"] == "reduced"
        ConstantInstantiateVisitor.SINGLE = \
            ConstantInstantiateVisitor()
        test = add_string_constants(test)
        train = add_string_constants(train)
        eprint("added string constants to test and train")
    
    for task in test + train:
        if len(task.examples) > maxExamples:
            task.examples = task.examples[:maxExamples]

        task.specialTask = ("regex", {"cutoff": task.ll_cutoff, "str_const": task.str_const})
        task.examples = [(xs, [y for y in ys ])
                         for xs,ys in task.examples ]
        task.maxParameters = 1

    # from list stuff
    primtype = args.pop("primitives")
    prims = {"base": basePrimitives,
             "alt1": altPrimitives,
             "alt2": alt2Primitives,
             "easyWords": easyWordsPrimitives,
             "concat": concatPrimitives,
             "reduced": reducedConcatPrimitives,
             "strConst": strConstConcatPrimitives
             }[primtype]

    extractor = {
        "learned": LearnedFeatureExtractor,
        "json": MyJSONFeatureExtractor
    }[args.pop("extractor")]

    extractor.H = args.pop("hidden")

    #stardecay = args.stardecay
    #stardecay = args.pop('stardecay')
    #decaystr = 'd' + str(stardecay)
    import datetime

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/regex/%s"%timestamp
    os.system("mkdir -p %s"%outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/regex"%(outputDirectory),
        "evaluationTimeout": 0.005,
        "topk_use_only_likelihood": True,
        "maximumFrontier": 10,
        "compressor": "ocaml"
    })
    ####


        # use the
    #prim_list = prims(stardecay)
    prim_list = prims()
    specials = ["r_kleene", "r_plus", "r_maybe", "r_alt", "r_concat"]
    n_base_prim = len(prim_list) - len(specials)

    productions = [
        (math.log(0.5 / float(n_base_prim)),
         prim) if prim.name not in specials else (
            math.log(0.10),
            prim) for prim in prim_list]


    baseGrammar = Grammar.fromProductions(productions, continuationType=tpregex)
    #baseGrammar = Grammar.uniform(prims())

    #for i in range(100):
    #    eprint(baseGrammar.sample(tpregex))

    #eprint(baseGrammar)
    #explore
    test_stuff = args.pop("debug")
    if test_stuff:
        eprint(baseGrammar)
        eprint("sampled programs from prior:")
        for i in range(100): #100
            eprint(baseGrammar.sample(test[0].request,maximumDepth=1000))
        eprint("""half the probability mass is on higher-order primitives.
Therefore half of enumerated programs should have more than one node.
However, we do not observe this.
Instead we see a very small fraction of programs have more than one node. 
So something seems to be wrong with grammar.sample.

Furthermore: observe the large print statement above. 
This prints the candidates for sampleDistribution in grammar.sample.
the first element of each tuple is the probability passed into sampleDistribution.
Half of the probability mass should be on the functions, but instead they are equally 
weighted with the constants. If you look at the grammar above, this is an error!!!!
""")
        assert False

    del args["likelihoodModel"]
    explorationCompression(baseGrammar, train,
                           testingTasks = test,
                           **args)
Ejemplo n.º 3
0

def deepcoderProductions():
    return [(0.0, prim) for prim in deepcoderPrimitives()]


# def flatten_program(p):
#     string = p.show(False)
#     num_inputs = string.count('lambda')
#     string = string.replace('lambda', '')
#     string = string.replace('(', '')
#     string = string.replace(')', '')
#     #remove '_fn' (optional)
#     for i in range(num_inputs):
#         string = string.replace('$' + str(num_inputs-i-1),'input_' + str(i))
#     string = string.split(' ')
#     string = list(filter(lambda x: x is not '', string))
#     return string

if __name__ == "__main__":
    #g = Grammar.uniform(deepcoderPrimitives())
    g = Grammar.fromProductions(deepcoderProductions(), logVariable=.9)
    request = arrow(tlist(tint), tint, tint)
    p = g.sample(request)
    print("request:", request)
    print("program:")
    print(prettyProgram(p))
    print("flattened_program:")
    flat = flatten_program(p)
    print(flat)
Ejemplo n.º 4
0
        #print("self.name", self.name)
        return self.name

    def __setstate__(self, state):
        #for backwards compatibility:
        if type(state) == dict:
            pass  #do nothing, i don't need to load them if they are old...
        else:
            p = Primitive.GLOBALS[state]
            self.__init__(p.name, p.tp, p.value, p.constraint)


if __name__ == '__main__':
    import time
    CPrimitive("testCPrim", tint, lambda x: x, 17)
    g = Grammar.fromProductions(RobustFillProductions())
    print(len(g))
    request = tprogram
    p = g.sample(request)
    print("request:", request)
    print("program:")
    print(prettyProgram(p))
    s = 'abcdefg'
    e = p.evaluate([])
    #print("prog applied to", s)
    #print(e(s))
    print("flattened_program:")
    flat = flatten_program(p)
    print(flat)
    t = time.time()
    constraints = Constraint_prop().execute(p)
Ejemplo n.º 5
0
"non",
"l",
"erase",
"m",
"comes",
"up",
"comparison",
"during",
"'s value is the largest inclusive, which is strictly less than maximum element in numbers from 1 to the element in `a` which'",
"'s value is the biggest (inclusive), which is strictly less than maximum element of range from 1 to the element in `a` which'",
"'s value is the highest, which is strictly less than maximum element among sequence of digits of the element in `a` which'"]


if __name__ == "__main__":
    #g = Grammar.uniform(deepcoderPrimitives())

    g = Grammar.fromProductions(algolispProductions(), logVariable=.9)

    #p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call == (list_add_symbol 1 (list_init_symbol (fn_call mod ( list_add_symbol 2 (list_init_symbol arg1)) ))) ) (list_init_symbol $0)) )")
    p=Program.parse("(lambda (fn_call filter (list_add_symbol (lambda1_call eq (list_add_symbol (symbol_constant 1) (list_init_symbol (fn_call mod ( list_add_symbol (symbol_constant 2) (list_init_symbol (symbol_constant arg1))) ))) ) (list_init_symbol (symbol_constant $0)))))")

    print(p)

    #tree = p.evaluate(["a"])
    tree = p.evaluate([])
    print(tree("a"))

#