Esempio n. 1
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

        if taskBatchSize is None:
            return unsolvedTasks
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        if ec_result.recognitionModel is None:
            eprint(
                "No recognition model, falling back on random %d tasks from the remaining %d"
                % (taskBatchSize, len(unsolvedTasks)))
            return random.sample(unsolvedTasks, taskBatchSize)
        else:
            lowEntropyUnsolved = entropyRandomBatch(ec_result,
                                                    unsolvedTasks,
                                                    taskBatchSize,
                                                    randomRatio=0)
            randomTask = random.choice(lowEntropyUnsolved)
            kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1,
                                    randomTask)
            return [randomTask] + kNN
Esempio n. 2
0
def benchmarkSynthesisTime(result, task, timeout):
    grammar = result.grammars[-1]

    from likelihoodModel import AllOrNothingLikelihoodModel
    from time import time
    import signal

    startTime = time()
    if result.parameters['useRecognitionModel']:
        # Because grammar induction is the last step of EC, the
        # recognition model is actually trained for the second to last
        # grammar
        grammar = result.grammars[-2]
        features = result.recognitionModel.featureExtractor.featuresOfTask(
            task)
        variables, productions = result.recognitionModel(features)
        grammar = Grammar(variables.data[0],
                          [(productions.data[k], t, p)
                           for k, (_, t, p) in enumerate(grammar.productions)])

    elapsed = time() - startTime
    frontier = callCompiled(enumerateForTask,
                            grammar,
                            task,
                            AllOrNothingLikelihoodModel,
                            maximumFrontier=1,
                            timeout=timeout - elapsed)
    dt = time() - startTime
    if dt > timeout or len(frontier) == 0: return None
    l = solution.entries[0].logLikelihood
    p = solution.entries[0].program
    eprint("Solved", task, "w/", p, "(log likelihood of task given program:",
           l, ").", "in time", dt)
    return dt, l
Esempio n. 3
0
def synthesize_with_best_model():
    model_path = '../trained_models/2018-06-12-2205-e10.pkl'
    if exists(model_path):
        book = dt.load_goblet_of_fire()
        net = RNNet.import_model(model_path)
        np.random.seed(50)
        print(net.synthesize(1000, book.char_to_one_hot, book.index_to_char))
    else:
        eprint('Best trained model found!')
Esempio n. 4
0
 def sampleHelmholtz(self, requests, statusUpdate = None):
    request = random.choice(requests)
    program = self.grammar.sample(request, maximumDepth = 6)
    features = self.featureExtractor.featuresOfProgram(program, request)
    if statusUpdate is not None:
        eprint(statusUpdate, end = '')
        flushEverything()
    # Feature extractor failure
    if features is None: return None
    else: return program, request, features
Esempio n. 5
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        return random.sample(tasks, taskBatchSize)
Esempio n. 6
0
def outputDreams(checkpoint, directory):
    from utilities import loadPickle
    result = loadPickle(checkpoint)
    eprint(" [+] Loaded checkpoint", checkpoint)
    g = result.grammars[-1]
    if directory is None:
        randomStr = ''.join(random.choice('0123456789') for _ in range(10))
        directory = "/tmp/" + randomStr
    eprint(" Dreaming into", directory)
    os.system("mkdir  -p %s" % directory)
    dreamFromGrammar(g, directory)
Esempio n. 7
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        start = (taskBatchSize * currIteration) % len(tasks)
        end = start + taskBatchSize
        taskBatch = (tasks + tasks)[start:end]  # Handle wraparound.
        return taskBatch
Esempio n. 8
0
def benchmarkSynthesisTimes(result, tasks, _=None, timeout=None, CPUs=None):
    if result.parameters['useRecognitionModel']:
        assert hasattr(result, 'recognitionModel') and result.recognitionModel is not None, \
            "Checkpoint was trained using a recognition model but it does not have a saved recognition model."

    times = parallelMap(
        CPUs, lambda task: benchmarkSynthesisTime(result, task, timeout),
        tasks)
    timeouts = sum(t == None for t in times)
    successes = sum(t != None for t in times)
    if successes > 0:
        average = sum(t[0] for t in times if t != None) / float(successes)
        deviation = (sum((t[0] - average)**2
                         for t in times if t != None) / float(successes))**0.5
        standardError = deviation / (float(successes)**0.5)
    eprint("BENCHMARK:")
    eprint("Solves %d/%d = %d%%" %
           (successes, len(tasks), int(100. * successes / len(tasks))))
    if successes > 0:
        eprint("Synthesis time %f +/- %f sec" % (average, standardError))
        average = sum(t[1] for t in times if t != None) / float(successes)
        deviation = (sum((t[1] - average)**2
                         for t in times if t != None) / float(successes))**0.5
        standardError = deviation / (float(successes)**0.5)
        eprint("Expected log P[t|p] =", average, "+/-", standardError)
Esempio n. 9
0
def log(sevirity, message, error = None):
    try:
        client = MongoClient("mongodb://*****:*****@ds042138.mlab.com:42138/whatsappsync");
        entry = {
                "service": "backend",
                "sevirity": sevirity,
                "message": message,
                "timestamp": datetime.datetime.utcnow()};
        if error:
            entry['error'] = error;

        client.whatsappsync.logs.insert_one(entry);
    except:
        eprint(traceback.format_exc());
Esempio n. 10
0
    def score(self, program, task):
        #need a try, catch here for problems, and for timeouts
        #can copy task.py for the timeout structure
        try:

            def timeoutCallBack(_1, _2):
                raise EvaluationTimeout()

            signal.signal(signal.SIGVTALRM, timeoutCallBack)
            signal.setitimer(signal.ITIMER_VIRTUAL, self.timeout)

            try:
                string_pregex = program.evaluate([])
                #if 'left_paren' in program.show(False):
                #eprint("string_pregex:", string_pregex)
                #eprint("string_pregex:", string_pregex)
                preg = string_pregex  #pregex.create(string_pregex)
            except IndexError:
                # free variable
                return False, NEGATIVEINFINITY
            except Exception as e:
                eprint("Exception during evaluation:", e)
                if "Attempt to evaluate fragment variable" in e:
                    eprint("program (bc fragment error)", program)
                return False, NEGATIVEINFINITY

        #tries and catches

        #include prior somehow
        #right now, just summing up log likelihoods. IDK if this is correct.
        #also not using prior at all.
            cum_ll = 0
            for example in task.examples:
                #might want a try, accept around the following line:
                try:
                    #eprint("about to match", program)
                    ll = preg.match(example[1])
                    #eprint("completed match", ll, program)
                except ValueError as e:
                    eprint("ValueError:", e)
                    ll = float('-inf')
                #eprint("pregex:", string_pregex)
                #eprint("example[1]", example[1])

                if ll == float('-inf'):
                    return False, NEGATIVEINFINITY
                else:
                    #eprint("ll", ll)
                    cum_ll += ll
            #eprint("cum_ll", cum_ll)
            return True, cum_ll

        except EvaluationTimeout:
            eprint("Timed out while evaluating", program)
            return False, NEGATIVEINFINITY
        finally:
            signal.signal(signal.SIGVTALRM, lambda *_: None)
            signal.setitimer(signal.ITIMER_VIRTUAL, 0)
Esempio n. 11
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        if ec_result.recognitionModel is None:
            eprint("No recognition model, falling back on random %d" %
                   taskBatchSize)
            return random.sample(tasks, taskBatchSize)
        else:
            randomTask = random.choice(tasks)
            kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1,
                                    randomTask)
            return [randomTask] + kNN
Esempio n. 12
0
def enumerateNetwork(network,
                     tasks_features,
                     likelihoodModel,
                     solver=None,
                     frontierSize=None,
                     enumerationTimeout=None,
                     CPUs=1,
                     maximumFrontier=None,
                     verbose=True,
                     evaluationTimeout=None):
    from time import time

    start = time()

    chunk_size = int(math.ceil(
        len(tasks_features) /
        CPUs)) if int(math.ceil(len(tasks_features) / CPUs)) > 0 else 1
    eprint("enumerateNetwork with", chunk_size, "tasks per cpu")

    chunked_tasks_features = [
        tasks_features[i:i + chunk_size]
        for i in xrange(0, len(tasks_features), chunk_size)
    ]

    #TODO, enumerateNetworkForTasks
    frontierss = parallelMap(
        CPUs,
        lambda (cpu_idx, tasks_features): enumerateNetworkForTasks(
            cpu_idx,
            network,
            tasks_features,
            frontierSize=frontierSize,
            timeout=enumerationTimeout,
            evaluationTimeout=evaluationTimeout,
            verbose=verbose,
            maximumFrontier=maximumFrontier),
        zip(range(len(chunked_tasks_features)), chunked_tasks_features),
        chunk=1)
    frontiers = [
        frontier for frontiers in frontierss for frontier in frontiers
    ]  #wtf is happening
    # if verbose:
    #     eprint("Enumerated %d frontiers in time %f"%(len(), time() - start))
    return frontiers
Esempio n. 13
0
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
    numRandom = int(randomRatio * taskBatchSize)
    numEntropy = taskBatchSize - numRandom

    eprint(
        "Selecting top %d tasks from the %d overall tasks given lowest entropy."
        % (taskBatchSize, len(tasks)))
    eprint("Will be selecting %d by lowest entropy and %d randomly." %
           (numEntropy, numRandom))
    taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(
        tasks)
    sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x: x[1])

    entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
    randomBatch = random.sample(
        [task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
    batch = entropyBatch + randomBatch

    return batch
Esempio n. 14
0
File: ec.py Progetto: insperatum/ec
def showHitMatrix(top, bottom, tasks):
    tasks = set(tasks)

    total = bottom | top
    eprint(len(total), "/", len(tasks), "total hit tasks")
    bottomMiss = tasks - bottom
    topMiss = tasks - top

    eprint("{: <13s}{: ^13s}{: ^13s}".format("", "bottom miss", "bottom hit"))
    eprint("{: <13s}{: ^13d}{: ^13d}".format("top miss",
                                             len(bottomMiss & topMiss),
                                             len(bottom & topMiss)))
    eprint("{: <13s}{: ^13d}{: ^13d}".format("top hit", len(top & bottomMiss),
                                             len(top & bottom)))
Esempio n. 15
0
        def train(self, tasks, steps=400):
            # list of list of features for each example in each task
            optimizer = torch.optim.Adam(self.parameters())
            with timing("Trained discriminator"):
                losses = []
                for i in xrange(steps):
                    self.zero_grad()
                    if random.random() <= self.trainingSuccessRatio:
                        # success
                        t = random.choice(tasks)
                        features = [
                            self.extract.featuresOfTask(
                                Task(t.name, t.request, [ex], t.features))
                            for ex in t.examples
                        ]
                        loss = (self(features) - 1.0)**2
                    else:
                        # fail
                        t1, t2 = random.sample(tasks, 2)
                        features1 = [
                            self.extract.featuresOfTask(
                                Task(t1.name, t1.request, [ex], t1.features))
                            for ex in t1.examples[:len(t1.examples) / 2]
                        ]
                        features2 = [
                            self.extract.featuresOfTask(
                                Task(t2.name, t2.request, [ex], t2.features))
                            for ex in t2.examples[len(t2.examples) / 2:]
                        ]
                        features = features1 + features2
                        loss = self(features)**2

                    loss.backward()
                    optimizer.step()
                    losses.append(loss.data[0])
                    if not i % 50:
                        eprint("Discriminator Epoch", i, "Loss",
                               sum(losses) / len(losses))
                        gc.collect()
Esempio n. 16
0
def enumerateDreams(checkpoint, directory):
    from recognition import backgroundHelmholtzEnumeration
    from utilities import loadPickle, standardDeviation, mean
    result = loadPickle(checkpoint)
    eprint(" [+] Loaded checkpoint", checkpoint)
    g = result.grammars[-1]
    if directory is None: assert False, "please specify a directory"
    eprint(" Dreaming into", directory)
    os.system("mkdir  -p %s" % directory)
    frontiers = backgroundHelmholtzEnumeration(
        makeTasks(None, None),
        g,
        100,
        evaluationTimeout=0.01,
        special=LogoFeatureCNN.special)()
    print(f"{len(frontiers)} total frontiers.")
    MDL = 0

    def L(f):
        return -list(f.entries)[0].logPrior

    frontiers.sort(key=lambda f: -L(f))
    while len(frontiers) > 0:
        # get frontiers whose MDL is between [MDL,MDL + 1)
        fs = []
        while len(frontiers) > 0 and L(frontiers[-1]) < MDL + 1:
            fs.append(frontiers.pop(len(frontiers) - 1))
        if fs:
            random.shuffle(fs)
            print(f"{len(fs)} programs with MDL between [{MDL}, {MDL + 1})")

            fs = fs[:500]
            os.system(f"mkdir {directory}/{MDL}")
            dreamFromGrammar([list(f.entries)[0].program for f in fs],
                             f"{directory}/{MDL}")
        MDL += 1
Esempio n. 17
0
    def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
        if taskBatchSize is None:
            taskBatchSize = len(tasks)
        elif taskBatchSize > len(tasks):
            eprint(
                "Task batch size is greater than total number of tasks, aborting."
            )
            assert False

        # Reshuffles tasks in a fixed way across epochs for reproducibility.
        currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))

        shuffledTasks = tasks.copy()  # Since shuffle works in place.
        random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)

        shuffledTasksWrap = tasks.copy()  # Since shuffle works in place.
        random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)

        start = (taskBatchSize * currIteration) % len(shuffledTasks)
        end = start + taskBatchSize
        taskBatch = (shuffledTasks +
                     shuffledTasksWrap)[start:end]  # Wraparound nicely.

        return list(set(taskBatch))
Esempio n. 18
0
def exportTasks():
    import sys
    import cPickle as pickle

    n_examples = 15
    if len(sys.argv) > 1:
        n_examples = int(sys.argv[1])

    eprint("Downloading and generating dataset")
    tasks = sorted(make_list_tasks(n_examples), key=lambda t: t.name)
    eprint("Got {} list tasks".format(len(tasks)))

    with open("data/list_tasks.pkl", "w") as f:
        pickle.dump(tasks, f)
    eprint("Wrote list tasks to data/list_tasks.pkl")
Esempio n. 19
0
 def sampleManyHelmholtz(self, requests, N, CPUs):
     eprint("Sampling %d programs from the prior on %d CPUs..."%(N,CPUs))
     flushEverything()
     frequency = N/50
     samples = parallelMap(CPUs,
                           lambda n: self.sampleHelmholtz(requests,
                                                          statusUpdate = '.' if n%frequency == 0 else None),
                           range(N))
     eprint()
     flushEverything()
     try:
         self.featureExtractor.finish()
     except AttributeError:
         ()
     eprint()
     flushEverything()
     return samples
Esempio n. 20
0
    animateCheckpoint = args.pop("animate")
    if animateCheckpoint is not None:
        animateSolutions(loadPickle(animateCheckpoint).allFrontiers)
        sys.exit(0)

    target = args.pop("target")
    red = args.pop("reduce")
    save = args.pop("save")
    prefix = args.pop("prefix")
    prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/"
    prefix_pickles = prefix + "/logo." + ('.'.join(target))
    if not os.path.exists(prefix_dreams):
        os.makedirs(prefix_dreams)
    tasks = makeTasks(target, proto)
    eprint("Generated", len(tasks), "tasks")

    os.chdir("prototypical-networks")
    subprocess.Popen(["python", "./protonet_server.py"])
    time.sleep(3)
    os.chdir("..")

    test, train = testTrainSplit(tasks, args.pop("split"))
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))
    if test: montageTasks(test, "test_")
    montageTasks(train, "train_")

    if red is not []:
        for reducing in red:
            try:
                with open(reducing, 'r') as f:
Esempio n. 21
0
        CPUs=numberOfCPUs(),
        extras=list_options)

    tasks = retrieveTasks(args.pop("dataset"))


    
    #removing f****d up tasks
   # tasks = [ t for t in tasks 
              #if tasks.request == 1]



    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.seed(42)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    # Remove degenerate tasks: either the identity or a constant
    tasks = [ t for t in tasks
              if any( xs[0] != y for xs, y in t.examples )]
    tasks = [ t for t in tasks
              if not all( t.examples[0][1] == y for xs, y in t.examples )]


    eprint("Got {} list tasks".format(len(tasks)))

    for task in tasks:
        task.features = list_features(task.examples)
Esempio n. 22
0
        ys = [program.runWithArguments(x) for x in xs]
        return Circuit.extractFeatures(ys)


if __name__ == "__main__":
    circuits = []
    import random
    random.seed(0)
    while len(circuits) < NUMBEROFTASKS * 2:
        inputs = sampleDistribution(inputDistribution)
        gates = sampleDistribution(gateDistribution)
        newTask = Circuit(numberOfInputs=inputs, numberOfGates=gates)
        if newTask not in circuits:
            circuits.append(newTask)
    eprint("Sampled %d circuits with %d unique functions" %
           (len(circuits), len({t.signature
                                for t in circuits})))
    tasks = [t.task() for t in circuits[:NUMBEROFTASKS]]
    testing = [t.task() for t in circuits[NUMBEROFTASKS:]]

    baseGrammar = Grammar.uniform(primitives)
    explorationCompression(baseGrammar,
                           tasks,
                           testingTasks=testing,
                           outputPrefix="experimentOutputs/circuit",
                           evaluationTimeout=None,
                           **commandlineArguments(
                               iterations=10,
                               aic=1.,
                               structurePenalty=1,
                               CPUs=numberOfCPUs(),
Esempio n. 23
0
        outputDirectory = "experimentOutputs/puddleworld/%s"%timestamp
        os.system("mkdir -p %s"%outputDirectory)

        # Convert pyccg ontology -> Dreamcoder.
        puddleworldTypes, puddleworldPrimitives = convertOntology(ec_ontology)
        input_type, output_type = puddleworldTypes['model'], puddleworldTypes['action']

        # Convert sentences-scenes -> Dreamcoder style tasks.
        doLocal, doGlobal, doTiny= args.pop('local'), args.pop('global'), args.pop('tiny')
        num_tiny, tiny_size = args.pop('num_tiny'), args.pop('tiny_scene_size')

        (localTrain, localTest) = makeLocalTasks(input_type, output_type) if doLocal else ([], [])
        (globalTrain, globalTest) = makeGlobalTasks(input_type, output_type) if doGlobal else ([], [])
        (tinyTrain, tinyTest) = makeTinyTasks(input_type, output_type, num_tiny, tiny_size) if doTiny else ([], [])
        allTrain, allTest = localTrain + globalTrain + tinyTrain, localTest + globalTest + tinyTest
        eprint("Using local tasks: %d train, %d test" % (len(localTrain), len(localTest)))
        eprint("Using global tasks: %d train, %d test" % (len(globalTrain), len(globalTest)))
        eprint("Using tiny tasks of size %d: %d train, %d test" % (tiny_size, len(tinyTrain), len(tinyTest)))
        eprint("Using total tasks: %d train, %d test" % (len(allTrain), len(allTest)))

        # Make Dreamcoder grammar.
        baseGrammar = Grammar.uniform(puddleworldPrimitives)
        print(baseGrammar.json())

        # Initialize the language learner driver.
        use_pyccg_enum, use_blind_enum = args.pop('use_pyccg_enum'), args.pop('use_blind_enum')
        print("Using PyCCG enumeration: %s, using blind enumeration: %s" % (str(use_pyccg_enum), str(use_blind_enum)))
        
        if args.pop('use_initial_lexicon'):
            print("Using initial lexicon for Puddleworld PyCCG learner.")
            pyccg_learner = WordLearner(initial_puddleworld_lex)
Esempio n. 24
0
if __name__ == "__main__":
    args = commandlineArguments(frontierSize=None,
                                activation='sigmoid',
                                iterations=10,
                                a=3,
                                maximumFrontier=10,
                                topK=2,
                                pseudoCounts=10.0,
                                helmholtzRatio=0.5,
                                structurePenalty=1.,
                                CPUs=numberOfCPUs(),
                                extras=regex_options)

    tasks = makeTasks()  #TODO
    eprint("Generated", len(tasks), "tasks")

    maxTasks = args.pop("maxTasks")
    if len(tasks) > maxTasks:
        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.seed(42)
        random.shuffle(tasks)
        del tasks[maxTasks:]

    maxExamples = args.pop("maxExamples")
    for task in tasks:
        if len(task.examples) > maxExamples:
            task.examples = task.examples[:maxExamples]

    split = args.pop("split")
    test, train = testTrainSplit(tasks, split)
Esempio n. 25
0
    def train(self, frontiers, _=None, steps=250, lr=0.001, topK=1, CPUs=1,
              helmholtzRatio = 0.):
        """
        helmholtzRatio: What fraction of the training data should be forward samples from the generative model?
        """
        requests = [ frontier.task.request for frontier in frontiers ]

        frontiers = [ frontier.topK(topK).normalize() for frontier in frontiers if not frontier.empty ]

        # Not sure why this ever happens
        if helmholtzRatio is None: helmholtzRatio = 0.

        eprint("Training recognition model from %d frontiers, %d%% Helmholtz."%(
            len(frontiers),
            int(helmholtzRatio*100)))
        
        HELMHOLTZBATCH = 250

        with timing("Trained recognition model"):
            avgLoss = None
            avgPermutedLoss = None

            for i in range(1,steps + 1): 
                eprint("step", i, "out of", steps + 1)   
                if helmholtzRatio < 1.:
                    permutedFrontiers = list(frontiers)
                    random.shuffle(permutedFrontiers)
                    
                    eprint("frontiers:")
                    eprint(frontiers)
                    eprint("permutedFrontiers:")
                    eprint(permutedFrontiers)
                
                else: permutedFrontiers = [None]
                frontier_num = 0
                for frontier in permutedFrontiers:
                    eprint("frontier num", frontier_num, "out of", len(permutedFrontiers))
                    frontier_num += 1
                    # Randomly decide whether to sample from the generative model
                    doingHelmholtz = random.random() < helmholtzRatio
                    if doingHelmholtz:
                        networkInputs = self.helmholtzNetworkInputs(requests, HELMHOLTZBATCH, CPUs)
                        loss = self.step(*networkInputs)
                    if not doingHelmholtz:
                        if helmholtzRatio < 1.:
                            #placeholder for now
                            # self.zero_grad()
                            # loss = self.frontierKL(frontier)
                            #fix this later
                            loss = 0
                            eprint("helmholtz is messed up. Fix it.")
                            pass
                        else:
                            # Refuse to train on the frontiers
                            pass

                if (i==1 or i%5==0):
                    # networkInputs = self.helmholtzNetworkInputs(requests, HELMHOLTZBATCH, CPUs)
                    # loss, permutedLoss = self.getCurrentLoss(*networkInputs)
                    avgLoss = (0.9*avgLoss + 0.1*loss) if avgLoss is not None else loss
                    # avgPermutedLoss = (0.9*avgPermutedLoss + 0.1*permutedLoss) if avgPermutedLoss is not None else permutedLoss

                    # inputInformation = avgPermutedLoss - avgLoss
                    eprint("Epoch %3d Loss %2.2f" % (i, avgLoss))
                    gc.collect()
Esempio n. 26
0
def visualizePrimitives(primitives, export='/tmp/logo_primitives.png'):
    from itertools import product
    from pylab import imshow, show
    from program import Index, Abstraction, Application, Primitive
    from utilities import montageMatrix, makeNiceArray
    from type import tint
    import scipy.misc
    from makeLogoTasks import parseLogo

    angles = [
        Program.parse(a) for a in [
            "logo_ZA",
            "logo_epsA",
            "(logo_MULA logo_epsA 2)",
            "(logo_DIVA logo_UA 4)",
            "(logo_DIVA logo_UA 5)",
            "(logo_DIVA logo_UA 7)",
            "(logo_DIVA logo_UA 9)",
        ]
    ]
    specialAngles = {
        "#(lambda (lambda (logo_forLoop logo_IFTY (lambda (lambda (logo_FWRT (logo_MULL logo_UL 3) (logo_MULA $2 4) $0))) $1)))":
        [Program.parse("(logo_MULA logo_epsA 4)")] +
        [Program.parse("(logo_DIVA logo_UA %d)" % n) for n in [7, 9]]
    }
    numbers = [Program.parse(n) for n in ["1", "2", "5", "7", "logo_IFTY"]]
    specialNumbers = {
        "#(lambda (#(lambda (lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $5 (logo_DIVA logo_UA $3) $0))) $0))))) (logo_MULL logo_UL $0) 4 4))":
        [Program.parse(str(n)) for n in [1, 2, 3]]
    }
    distances = [
        Program.parse(l) for l in [
            "logo_ZL", "logo_epsL", "(logo_MULL logo_epsL 2)",
            "(logo_DIVL logo_UL 2)", "logo_UL"
        ]
    ]
    subprograms = [
        parseLogo(sp) for sp in [
            "(move 1d 0a)",
            "(loop i infinity (move (*l epsilonLength 4) (*a epsilonAngle 2)))",
            "(loop i infinity (move (*l epsilonLength 5) (/a epsilonAngle 2)))",
            "(loop i 4 (move 1d (/a 1a 4)))"
        ]
    ]

    entireArguments = {
        "#(lambda (lambda (#(#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) logo_IFTY) (logo_MULA (#(logo_DIVA logo_UA) $1) $0) (#(logo_MULL logo_UL) 3))))":
        [[Program.parse(str(x)) for x in xs]
         for xs in [("3", "1", "$0"), ("4", "1",
                                       "$0"), ("5", "1",
                                               "$0"), ("5", "3",
                                                       "$0"), ("7", "3", "$0")]
         ]
    }
    specialDistances = {
        "#(lambda (lambda (logo_forLoop 7 (lambda (lambda (#(lambda (lambda (lambda (#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) 7 $1 $2 $0)))) $3 logo_epsA $0))) $0)))":
        [Program.parse("(logo_MULL logo_epsL %d)" % n) for n in range(5)]
    }

    matrix = []
    for p in primitives:
        if not p.isInvented: continue
        t = p.tp
        eprint(p, ":", p.tp)
        if t.returns() != turtle:
            eprint("\t(does not return a turtle)")
            continue

        def argumentChoices(t):
            if t == turtle:
                return [Index(0)]
            elif t == arrow(turtle, turtle):
                return subprograms
            elif t == tint:
                return specialNumbers.get(str(p), numbers)
            elif t == tangle:
                return specialAngles.get(str(p), angles)
            elif t == tlength:
                return specialDistances.get(str(p), distances)
            else:
                return []

        ts = []
        for arguments in entireArguments.get(
                str(p),
                product(*[argumentChoices(t) for t in t.functionArguments()])):
            eprint(arguments)
            pp = p
            for a in arguments:
                pp = Application(pp, a)
            pp = Abstraction(pp)
            i = np.reshape(np.array(drawLogo(pp, resolution=128)), (128, 128))
            if i is not None:
                ts.append(i)

        if ts == []: continue

        matrix.append(ts)
        if len(ts) < 6: ts = [ts]
        else: ts = makeNiceArray(ts)
        r = montageMatrix(ts)
        fn = "/tmp/logo_primitive_%d.png" % len(matrix)
        eprint("\tExported to", fn)
        scipy.misc.imsave(fn, r)

    matrix = montageMatrix(matrix)
    scipy.misc.imsave(export, matrix)
Esempio n. 27
0
import sys
import time
import traceback
import cPickle as pickle

from utilities import eprint

if __name__ == "__main__":
    sys.setrecursionlimit(10000)

    start = time.time()
    request = pickle.load(sys.stdin)
    dt = time.time() - start
    if dt > 1:
        eprint(
            "(compiled driver warning: SLOW) Compiled driver unpacked the message in time",
            dt)

    response = (False, None)
    try:
        start = time.time()
        f = request["function"]
        result = f(*request["arguments"], **request["keywordArguments"])
        response = (True, result)
    except Exception as e:
        eprint("Exception thrown in pypy process for %s:" % f.__name__)
        sys.stderr.write(traceback.format_exc())
        sys.stderr.flush()
    finally:
        start = time.time()
        pickle.dump(response, sys.stdout)
Esempio n. 28
0
#test callCompiled
import sys
import os
sys.path.append(os.path.abspath('./'))
sys.path.append(os.path.abspath('./ec'))

from utilities import callCompiled, eprint

from fun import f

x = 6
ans = callCompiled(f, x)

eprint(ans)
Esempio n. 29
0
    def __init__(self, tasks):
        lexicon = {
            c
            for t in tasks for (x, ), y in self.tokenize(t.examples)
            for c in x + y
        }

        super(LearnedFeatureExtractor, self).__init__(lexicon=list(lexicon),
                                                      H=64,
                                                      tasks=tasks,
                                                      bidirectional=True)


if __name__ == "__main__":
    tasks = makeTasks()
    eprint("Generated", len(tasks), "tasks")

    test, train = testTrainSplit(tasks, 0.2)
    eprint("Split tasks into %d/%d test/train" % (len(test), len(train)))

    baseGrammar = Grammar.uniform(primitives)

    explorationCompression(baseGrammar,
                           train,
                           testingTasks=test,
                           outputPrefix="experimentOutputs/text",
                           evaluationTimeout=0.0005,
                           **commandlineArguments(
                               steps=500,
                               iterations=10,
                               helmholtzRatio=0.5,
Esempio n. 30
0
    def train(self, frontiers, _=None, steps=250, lr=0.001, topK=1, CPUs=1,
              helmholtzRatio=0., helmholtzBatch=5000):
        """
        helmholtzRatio: What fraction of the training data should be forward samples from the generative model?
        """
        requests = [ frontier.task.request for frontier in frontiers ]
        frontiers = [ frontier.topK(topK).normalize() for frontier in frontiers if not frontier.empty ]

        # Not sure why this ever happens
        if helmholtzRatio is None:
            helmholtzRatio = 0.

        eprint("Training a recognition model from %d frontiers, %d%% Helmholtz, feature extractor %s."%(
            len(frontiers),
            int(helmholtzRatio*100),
            self.featureExtractor.__class__.__name__))

        # The number of Helmholtz samples that we generate at once
        # Should only affect performance and shouldn't affect anything else
        HELMHOLTZBATCH = helmholtzBatch
        helmholtzSamples = []

        optimizer = torch.optim.Adam(self.parameters(), lr=lr)

        with timing("Trained recognition model"):
            for i in range(1,steps + 1):
                losses = []

                if helmholtzRatio < 1.:
                    permutedFrontiers = list(frontiers)
                    random.shuffle(permutedFrontiers)
                else:
                    permutedFrontiers = [None]
                for frontier in permutedFrontiers:
                    # Randomly decide whether to sample from the generative model
                    doingHelmholtz = random.random() < helmholtzRatio
                    if doingHelmholtz:
                        if helmholtzSamples == []:
                            helmholtzSamples = \
                            self.sampleManyHelmholtz(requests,
                                                     HELMHOLTZBATCH,
                                                     1) # TODO THIS IS A HACK
                        attempt = helmholtzSamples.pop()
                        if attempt is not None:
                            program, request, features = attempt
                            self.zero_grad()
                            loss = self.HelmholtzKL(features, program, request)
                        else: doingHelmholtz = False
                    if not doingHelmholtz:
                        if helmholtzRatio < 1.:
                            self.zero_grad()
                            loss = self.frontierKL(frontier)
                        else:
                            # Refuse to train on the frontiers
                            continue

                    loss.backward()
                    optimizer.step()
                    losses.append(loss.data[0])
                if i%50 == 0 and losses:
                    eprint("Epoch",i,"Loss",sum(losses)/len(losses))
                    gc.collect()