Ejemplo n.º 1
0
def helmholtzEnumeration(g,
                         request,
                         inputs,
                         timeout,
                         _=None,
                         special=None,
                         evaluationTimeout=None):
    """Returns json (as text)"""
    message = {
        "request": request.json(),
        "timeout": timeout,
        "DSL": g.json(),
        "extras": inputs
    }
    if evaluationTimeout: message["evaluationTimeout"] = evaluationTimeout
    if special: message["special"] = special
    message = json.dumps(message)
    with open('/tmp/hm', 'w') as handle:
        handle.write(message)
    try:
        binary = os.path.join(get_root_dir(), 'helmholtz')
        process = subprocess.Popen(binary,
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE)
        response, error = process.communicate(bytes(message, encoding="utf-8"))
    except OSError as exc:
        raise exc
    return response
Ejemplo n.º 2
0
def ocamlInduce(g,
                frontiers,
                _=None,
                topK=1,
                pseudoCounts=1.0,
                aic=1.0,
                structurePenalty=0.001,
                a=0,
                CPUs=1,
                bs=1000000,
                topI=300):
    # This is a dirty hack!
    # Memory consumption increases with the number of CPUs
    # And early on we have a lot of stuff to compress
    # If this is the first iteration, only use a fraction of the available CPUs
    topK = 5
    topI = 600
    if all(not p.isInvented for p in g.primitives):
        if a > 3:
            CPUs = max(1, int(CPUs / 6))
        else:
            CPUs = max(1, int(CPUs / 3))
    else:
        CPUs = max(1, int(CPUs / 2))
    CPUs = 2

    # X X X FIXME X X X
    # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems
    iterations = 99  # maximum number of components to add at once

    while True:
        g0 = g

        originalFrontiers = frontiers
        t2f = {f.task: f for f in frontiers}
        frontiers = [f for f in frontiers if not f.empty]
        message = {
            "arity": a,
            "topK": topK,
            "pseudoCounts": float(pseudoCounts),
            "aic": aic,
            "bs": bs,
            "topI": topI,
            "structurePenalty": float(structurePenalty),
            "CPUs": CPUs,
            "DSL": g.json(),
            "iterations": iterations,
            "frontiers": [f.json() for f in frontiers]
        }

        message = json.dumps(message)
        if True:
            timestamp = datetime.datetime.now().isoformat()
            os.system("mkdir  -p compressionMessages")
            fn = "compressionMessages/%s" % timestamp
            with open(fn, "w") as f:
                f.write(message)
            eprint("Compression message saved to:", fn)

        try:
            # Get relative path
            compressor_file = os.path.join(get_root_dir(), 'compression')
            process = subprocess.Popen(compressor_file,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE)
            response, error = process.communicate(
                bytes(message, encoding="utf-8"))
            response = json.loads(response.decode("utf-8"))
        except OSError as exc:
            raise exc

        g = response["DSL"]
        g = Grammar(g["logVariable"],
                    [(l, p.infer(), p) for production in g["productions"]
                     for l in [production["logProbability"]]
                     for p in [Program.parse(production["expression"])]],
                    continuationType=g0.continuationType)

        frontiers = {
            original.task: Frontier([
                FrontierEntry(p,
                              logLikelihood=e["logLikelihood"],
                              logPrior=g.logLikelihood(original.task.request,
                                                       p))
                for e in new["programs"]
                for p in [Program.parse(e["program"])]
            ],
                                    task=original.task)
            for original, new in zip(frontiers, response["frontiers"])
        }
        frontiers = [
            frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers
        ]
        if iterations == 1 and len(g) > len(g0):
            eprint("Grammar changed - running another round of consolidation.")
            continue
        else:
            eprint("Finished consolidation.")
            return g, frontiers
Ejemplo n.º 3
0
def solveForTask_ocaml(
        _=None,
        elapsedTime=0.,
        CPUs=1,
        g=None,
        tasks=None,
        lowerBound=None,
        upperBound=None,
        budgetIncrement=None,
        timeout=None,
        testing=None,  # FIXME: unused
        likelihoodModel=None,
        evaluationTimeout=None,
        maximumFrontiers=None):

    import json

    def taskMessage(t):
        m = {
            "examples": [{
                "inputs": list(xs),
                "output": y
            } for xs, y in t.examples],
            "name": t.name,
            "request": t.request.json(),
            "maximumFrontier": maximumFrontiers[t]
        }
        if hasattr(t, "specialTask"):
            special, extra = t.specialTask
            m["specialTask"] = special
            m["extras"] = extra
        return m

    message = {
        "DSL":
        g.json(),
        "tasks": [taskMessage(t) for t in tasks],
        "programTimeout":
        evaluationTimeout,
        "nc":
        CPUs,
        "timeout":
        timeout,
        "lowerBound":
        lowerBound,
        "upperBound":
        upperBound,
        "budgetIncrement":
        budgetIncrement,
        "verbose":
        False,
        "shatter":
        5 if len(tasks) == 1 and "turtle" in str(tasks[0].request) else 10
    }

    if hasattr(g, "unrolled"):
        message["PCFG"] = g.unrolled

    if hasattr(tasks[0],
               'maxParameters') and tasks[0].maxParameters is not None:
        message["maxParameters"] = tasks[0].maxParameters

    message = json.dumps(message)
    # uncomment this if you want to save the messages being sent to the solver

    try:
        solver_file = os.path.join(get_root_dir(), 'solver')
        process = subprocess.Popen(solver_file,
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE)
        response, error = process.communicate(bytes(message, encoding="utf-8"))
        response = json.loads(response.decode("utf-8"))
    except OSError as exc:
        raise exc

    except:
        eprint("response:", response)
        eprint("error:", error)
        with open("message", "w") as f:
            f.write(message)
        eprint("message,", message)
        assert False, "MAX RAISE"

    pc = response.get("number_enumerated", 0)  # TODO
    frontiers = {}
    searchTimes = {}
    for t in tasks:
        solutions = response[t.name]
        for e in solutions:
            p = Program.parse(e["program"])
            try:
                g.logLikelihood(t.request, p)
            except:
                eprint(t, p, "TYPING ERROR")
        frontier = Frontier([
            FrontierEntry(program=p,
                          logLikelihood=e["logLikelihood"],
                          logPrior=g.logLikelihood(t.request, p))
            for e in solutions for p in [Program.parse(e["program"])]
        ],
                            task=t)
        frontiers[t] = frontier
        if frontier.empty:
            searchTimes[t] = None
        # This is subtle:
        # The search time we report is actually not be minimum time to find any solution
        # Rather it is the time to find the MAP solution
        # This is important for regression problems,
        # where we might find something with a good prior but bad likelihood early on,
        # and only later discover the good high likelihood program
        else:
            searchTimes[t] = min(
                (e["logLikelihood"] + e["logPrior"], e["time"])
                for e in solutions)[1] + elapsedTime

    return frontiers, searchTimes, pc