def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty] if taskBatchSize is None: return unsolvedTasks elif taskBatchSize > len(tasks): eprint( "Task batch size is greater than total number of tasks, aborting." ) assert False if ec_result.recognitionModel is None: eprint( "No recognition model, falling back on random %d tasks from the remaining %d" % (taskBatchSize, len(unsolvedTasks))) return random.sample(unsolvedTasks, taskBatchSize) else: lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0) randomTask = random.choice(lowEntropyUnsolved) kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) return [randomTask] + kNN
def benchmarkSynthesisTime(result, task, timeout): grammar = result.grammars[-1] from likelihoodModel import AllOrNothingLikelihoodModel from time import time import signal startTime = time() if result.parameters['useRecognitionModel']: # Because grammar induction is the last step of EC, the # recognition model is actually trained for the second to last # grammar grammar = result.grammars[-2] features = result.recognitionModel.featureExtractor.featuresOfTask( task) variables, productions = result.recognitionModel(features) grammar = Grammar(variables.data[0], [(productions.data[k], t, p) for k, (_, t, p) in enumerate(grammar.productions)]) elapsed = time() - startTime frontier = callCompiled(enumerateForTask, grammar, task, AllOrNothingLikelihoodModel, maximumFrontier=1, timeout=timeout - elapsed) dt = time() - startTime if dt > timeout or len(frontier) == 0: return None l = solution.entries[0].logLikelihood p = solution.entries[0].program eprint("Solved", task, "w/", p, "(log likelihood of task given program:", l, ").", "in time", dt) return dt, l
def synthesize_with_best_model(): model_path = '../trained_models/2018-06-12-2205-e10.pkl' if exists(model_path): book = dt.load_goblet_of_fire() net = RNNet.import_model(model_path) np.random.seed(50) print(net.synthesize(1000, book.char_to_one_hot, book.index_to_char)) else: eprint('Best trained model found!')
def sampleHelmholtz(self, requests, statusUpdate = None): request = random.choice(requests) program = self.grammar.sample(request, maximumDepth = 6) features = self.featureExtractor.featuresOfProgram(program, request) if statusUpdate is not None: eprint(statusUpdate, end = '') flushEverything() # Feature extractor failure if features is None: return None else: return program, request, features
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint( "Task batch size is greater than total number of tasks, aborting." ) assert False return random.sample(tasks, taskBatchSize)
def outputDreams(checkpoint, directory): from utilities import loadPickle result = loadPickle(checkpoint) eprint(" [+] Loaded checkpoint", checkpoint) g = result.grammars[-1] if directory is None: randomStr = ''.join(random.choice('0123456789') for _ in range(10)) directory = "/tmp/" + randomStr eprint(" Dreaming into", directory) os.system("mkdir -p %s" % directory) dreamFromGrammar(g, directory)
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint( "Task batch size is greater than total number of tasks, aborting." ) assert False start = (taskBatchSize * currIteration) % len(tasks) end = start + taskBatchSize taskBatch = (tasks + tasks)[start:end] # Handle wraparound. return taskBatch
def benchmarkSynthesisTimes(result, tasks, _=None, timeout=None, CPUs=None): if result.parameters['useRecognitionModel']: assert hasattr(result, 'recognitionModel') and result.recognitionModel is not None, \ "Checkpoint was trained using a recognition model but it does not have a saved recognition model." times = parallelMap( CPUs, lambda task: benchmarkSynthesisTime(result, task, timeout), tasks) timeouts = sum(t == None for t in times) successes = sum(t != None for t in times) if successes > 0: average = sum(t[0] for t in times if t != None) / float(successes) deviation = (sum((t[0] - average)**2 for t in times if t != None) / float(successes))**0.5 standardError = deviation / (float(successes)**0.5) eprint("BENCHMARK:") eprint("Solves %d/%d = %d%%" % (successes, len(tasks), int(100. * successes / len(tasks)))) if successes > 0: eprint("Synthesis time %f +/- %f sec" % (average, standardError)) average = sum(t[1] for t in times if t != None) / float(successes) deviation = (sum((t[1] - average)**2 for t in times if t != None) / float(successes))**0.5 standardError = deviation / (float(successes)**0.5) eprint("Expected log P[t|p] =", average, "+/-", standardError)
def log(sevirity, message, error = None): try: client = MongoClient("mongodb://*****:*****@ds042138.mlab.com:42138/whatsappsync"); entry = { "service": "backend", "sevirity": sevirity, "message": message, "timestamp": datetime.datetime.utcnow()}; if error: entry['error'] = error; client.whatsappsync.logs.insert_one(entry); except: eprint(traceback.format_exc());
def score(self, program, task): #need a try, catch here for problems, and for timeouts #can copy task.py for the timeout structure try: def timeoutCallBack(_1, _2): raise EvaluationTimeout() signal.signal(signal.SIGVTALRM, timeoutCallBack) signal.setitimer(signal.ITIMER_VIRTUAL, self.timeout) try: string_pregex = program.evaluate([]) #if 'left_paren' in program.show(False): #eprint("string_pregex:", string_pregex) #eprint("string_pregex:", string_pregex) preg = string_pregex #pregex.create(string_pregex) except IndexError: # free variable return False, NEGATIVEINFINITY except Exception as e: eprint("Exception during evaluation:", e) if "Attempt to evaluate fragment variable" in e: eprint("program (bc fragment error)", program) return False, NEGATIVEINFINITY #tries and catches #include prior somehow #right now, just summing up log likelihoods. IDK if this is correct. #also not using prior at all. cum_ll = 0 for example in task.examples: #might want a try, accept around the following line: try: #eprint("about to match", program) ll = preg.match(example[1]) #eprint("completed match", ll, program) except ValueError as e: eprint("ValueError:", e) ll = float('-inf') #eprint("pregex:", string_pregex) #eprint("example[1]", example[1]) if ll == float('-inf'): return False, NEGATIVEINFINITY else: #eprint("ll", ll) cum_ll += ll #eprint("cum_ll", cum_ll) return True, cum_ll except EvaluationTimeout: eprint("Timed out while evaluating", program) return False, NEGATIVEINFINITY finally: signal.signal(signal.SIGVTALRM, lambda *_: None) signal.setitimer(signal.ITIMER_VIRTUAL, 0)
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint( "Task batch size is greater than total number of tasks, aborting." ) assert False if ec_result.recognitionModel is None: eprint("No recognition model, falling back on random %d" % taskBatchSize) return random.sample(tasks, taskBatchSize) else: randomTask = random.choice(tasks) kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask) return [randomTask] + kNN
def enumerateNetwork(network, tasks_features, likelihoodModel, solver=None, frontierSize=None, enumerationTimeout=None, CPUs=1, maximumFrontier=None, verbose=True, evaluationTimeout=None): from time import time start = time() chunk_size = int(math.ceil( len(tasks_features) / CPUs)) if int(math.ceil(len(tasks_features) / CPUs)) > 0 else 1 eprint("enumerateNetwork with", chunk_size, "tasks per cpu") chunked_tasks_features = [ tasks_features[i:i + chunk_size] for i in xrange(0, len(tasks_features), chunk_size) ] #TODO, enumerateNetworkForTasks frontierss = parallelMap( CPUs, lambda (cpu_idx, tasks_features): enumerateNetworkForTasks( cpu_idx, network, tasks_features, frontierSize=frontierSize, timeout=enumerationTimeout, evaluationTimeout=evaluationTimeout, verbose=verbose, maximumFrontier=maximumFrontier), zip(range(len(chunked_tasks_features)), chunked_tasks_features), chunk=1) frontiers = [ frontier for frontiers in frontierss for frontier in frontiers ] #wtf is happening # if verbose: # eprint("Enumerated %d frontiers in time %f"%(len(), time() - start)) return frontiers
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio): numRandom = int(randomRatio * taskBatchSize) numEntropy = taskBatchSize - numRandom eprint( "Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks))) eprint("Will be selecting %d by lowest entropy and %d randomly." % (numEntropy, numRandom)) taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies( tasks) sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x: x[1]) entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]] randomBatch = random.sample( [task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom) batch = entropyBatch + randomBatch return batch
def showHitMatrix(top, bottom, tasks): tasks = set(tasks) total = bottom | top eprint(len(total), "/", len(tasks), "total hit tasks") bottomMiss = tasks - bottom topMiss = tasks - top eprint("{: <13s}{: ^13s}{: ^13s}".format("", "bottom miss", "bottom hit")) eprint("{: <13s}{: ^13d}{: ^13d}".format("top miss", len(bottomMiss & topMiss), len(bottom & topMiss))) eprint("{: <13s}{: ^13d}{: ^13d}".format("top hit", len(top & bottomMiss), len(top & bottom)))
def train(self, tasks, steps=400): # list of list of features for each example in each task optimizer = torch.optim.Adam(self.parameters()) with timing("Trained discriminator"): losses = [] for i in xrange(steps): self.zero_grad() if random.random() <= self.trainingSuccessRatio: # success t = random.choice(tasks) features = [ self.extract.featuresOfTask( Task(t.name, t.request, [ex], t.features)) for ex in t.examples ] loss = (self(features) - 1.0)**2 else: # fail t1, t2 = random.sample(tasks, 2) features1 = [ self.extract.featuresOfTask( Task(t1.name, t1.request, [ex], t1.features)) for ex in t1.examples[:len(t1.examples) / 2] ] features2 = [ self.extract.featuresOfTask( Task(t2.name, t2.request, [ex], t2.features)) for ex in t2.examples[len(t2.examples) / 2:] ] features = features1 + features2 loss = self(features)**2 loss.backward() optimizer.step() losses.append(loss.data[0]) if not i % 50: eprint("Discriminator Epoch", i, "Loss", sum(losses) / len(losses)) gc.collect()
def enumerateDreams(checkpoint, directory): from recognition import backgroundHelmholtzEnumeration from utilities import loadPickle, standardDeviation, mean result = loadPickle(checkpoint) eprint(" [+] Loaded checkpoint", checkpoint) g = result.grammars[-1] if directory is None: assert False, "please specify a directory" eprint(" Dreaming into", directory) os.system("mkdir -p %s" % directory) frontiers = backgroundHelmholtzEnumeration( makeTasks(None, None), g, 100, evaluationTimeout=0.01, special=LogoFeatureCNN.special)() print(f"{len(frontiers)} total frontiers.") MDL = 0 def L(f): return -list(f.entries)[0].logPrior frontiers.sort(key=lambda f: -L(f)) while len(frontiers) > 0: # get frontiers whose MDL is between [MDL,MDL + 1) fs = [] while len(frontiers) > 0 and L(frontiers[-1]) < MDL + 1: fs.append(frontiers.pop(len(frontiers) - 1)) if fs: random.shuffle(fs) print(f"{len(fs)} programs with MDL between [{MDL}, {MDL + 1})") fs = fs[:500] os.system(f"mkdir {directory}/{MDL}") dreamFromGrammar([list(f.entries)[0].program for f in fs], f"{directory}/{MDL}") MDL += 1
def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration): if taskBatchSize is None: taskBatchSize = len(tasks) elif taskBatchSize > len(tasks): eprint( "Task batch size is greater than total number of tasks, aborting." ) assert False # Reshuffles tasks in a fixed way across epochs for reproducibility. currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks))) shuffledTasks = tasks.copy() # Since shuffle works in place. random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks) shuffledTasksWrap = tasks.copy() # Since shuffle works in place. random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap) start = (taskBatchSize * currIteration) % len(shuffledTasks) end = start + taskBatchSize taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely. return list(set(taskBatch))
def exportTasks(): import sys import cPickle as pickle n_examples = 15 if len(sys.argv) > 1: n_examples = int(sys.argv[1]) eprint("Downloading and generating dataset") tasks = sorted(make_list_tasks(n_examples), key=lambda t: t.name) eprint("Got {} list tasks".format(len(tasks))) with open("data/list_tasks.pkl", "w") as f: pickle.dump(tasks, f) eprint("Wrote list tasks to data/list_tasks.pkl")
def sampleManyHelmholtz(self, requests, N, CPUs): eprint("Sampling %d programs from the prior on %d CPUs..."%(N,CPUs)) flushEverything() frequency = N/50 samples = parallelMap(CPUs, lambda n: self.sampleHelmholtz(requests, statusUpdate = '.' if n%frequency == 0 else None), range(N)) eprint() flushEverything() try: self.featureExtractor.finish() except AttributeError: () eprint() flushEverything() return samples
animateCheckpoint = args.pop("animate") if animateCheckpoint is not None: animateSolutions(loadPickle(animateCheckpoint).allFrontiers) sys.exit(0) target = args.pop("target") red = args.pop("reduce") save = args.pop("save") prefix = args.pop("prefix") prefix_dreams = prefix + "/dreams/" + ('_'.join(target)) + "/" prefix_pickles = prefix + "/logo." + ('.'.join(target)) if not os.path.exists(prefix_dreams): os.makedirs(prefix_dreams) tasks = makeTasks(target, proto) eprint("Generated", len(tasks), "tasks") os.chdir("prototypical-networks") subprocess.Popen(["python", "./protonet_server.py"]) time.sleep(3) os.chdir("..") test, train = testTrainSplit(tasks, args.pop("split")) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) if test: montageTasks(test, "test_") montageTasks(train, "train_") if red is not []: for reducing in red: try: with open(reducing, 'r') as f:
CPUs=numberOfCPUs(), extras=list_options) tasks = retrieveTasks(args.pop("dataset")) #removing f****d up tasks # tasks = [ t for t in tasks #if tasks.request == 1] maxTasks = args.pop("maxTasks") if len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.seed(42) random.shuffle(tasks) del tasks[maxTasks:] # Remove degenerate tasks: either the identity or a constant tasks = [ t for t in tasks if any( xs[0] != y for xs, y in t.examples )] tasks = [ t for t in tasks if not all( t.examples[0][1] == y for xs, y in t.examples )] eprint("Got {} list tasks".format(len(tasks))) for task in tasks: task.features = list_features(task.examples)
ys = [program.runWithArguments(x) for x in xs] return Circuit.extractFeatures(ys) if __name__ == "__main__": circuits = [] import random random.seed(0) while len(circuits) < NUMBEROFTASKS * 2: inputs = sampleDistribution(inputDistribution) gates = sampleDistribution(gateDistribution) newTask = Circuit(numberOfInputs=inputs, numberOfGates=gates) if newTask not in circuits: circuits.append(newTask) eprint("Sampled %d circuits with %d unique functions" % (len(circuits), len({t.signature for t in circuits}))) tasks = [t.task() for t in circuits[:NUMBEROFTASKS]] testing = [t.task() for t in circuits[NUMBEROFTASKS:]] baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, tasks, testingTasks=testing, outputPrefix="experimentOutputs/circuit", evaluationTimeout=None, **commandlineArguments( iterations=10, aic=1., structurePenalty=1, CPUs=numberOfCPUs(),
outputDirectory = "experimentOutputs/puddleworld/%s"%timestamp os.system("mkdir -p %s"%outputDirectory) # Convert pyccg ontology -> Dreamcoder. puddleworldTypes, puddleworldPrimitives = convertOntology(ec_ontology) input_type, output_type = puddleworldTypes['model'], puddleworldTypes['action'] # Convert sentences-scenes -> Dreamcoder style tasks. doLocal, doGlobal, doTiny= args.pop('local'), args.pop('global'), args.pop('tiny') num_tiny, tiny_size = args.pop('num_tiny'), args.pop('tiny_scene_size') (localTrain, localTest) = makeLocalTasks(input_type, output_type) if doLocal else ([], []) (globalTrain, globalTest) = makeGlobalTasks(input_type, output_type) if doGlobal else ([], []) (tinyTrain, tinyTest) = makeTinyTasks(input_type, output_type, num_tiny, tiny_size) if doTiny else ([], []) allTrain, allTest = localTrain + globalTrain + tinyTrain, localTest + globalTest + tinyTest eprint("Using local tasks: %d train, %d test" % (len(localTrain), len(localTest))) eprint("Using global tasks: %d train, %d test" % (len(globalTrain), len(globalTest))) eprint("Using tiny tasks of size %d: %d train, %d test" % (tiny_size, len(tinyTrain), len(tinyTest))) eprint("Using total tasks: %d train, %d test" % (len(allTrain), len(allTest))) # Make Dreamcoder grammar. baseGrammar = Grammar.uniform(puddleworldPrimitives) print(baseGrammar.json()) # Initialize the language learner driver. use_pyccg_enum, use_blind_enum = args.pop('use_pyccg_enum'), args.pop('use_blind_enum') print("Using PyCCG enumeration: %s, using blind enumeration: %s" % (str(use_pyccg_enum), str(use_blind_enum))) if args.pop('use_initial_lexicon'): print("Using initial lexicon for Puddleworld PyCCG learner.") pyccg_learner = WordLearner(initial_puddleworld_lex)
if __name__ == "__main__": args = commandlineArguments(frontierSize=None, activation='sigmoid', iterations=10, a=3, maximumFrontier=10, topK=2, pseudoCounts=10.0, helmholtzRatio=0.5, structurePenalty=1., CPUs=numberOfCPUs(), extras=regex_options) tasks = makeTasks() #TODO eprint("Generated", len(tasks), "tasks") maxTasks = args.pop("maxTasks") if len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.seed(42) random.shuffle(tasks) del tasks[maxTasks:] maxExamples = args.pop("maxExamples") for task in tasks: if len(task.examples) > maxExamples: task.examples = task.examples[:maxExamples] split = args.pop("split") test, train = testTrainSplit(tasks, split)
def train(self, frontiers, _=None, steps=250, lr=0.001, topK=1, CPUs=1, helmholtzRatio = 0.): """ helmholtzRatio: What fraction of the training data should be forward samples from the generative model? """ requests = [ frontier.task.request for frontier in frontiers ] frontiers = [ frontier.topK(topK).normalize() for frontier in frontiers if not frontier.empty ] # Not sure why this ever happens if helmholtzRatio is None: helmholtzRatio = 0. eprint("Training recognition model from %d frontiers, %d%% Helmholtz."%( len(frontiers), int(helmholtzRatio*100))) HELMHOLTZBATCH = 250 with timing("Trained recognition model"): avgLoss = None avgPermutedLoss = None for i in range(1,steps + 1): eprint("step", i, "out of", steps + 1) if helmholtzRatio < 1.: permutedFrontiers = list(frontiers) random.shuffle(permutedFrontiers) eprint("frontiers:") eprint(frontiers) eprint("permutedFrontiers:") eprint(permutedFrontiers) else: permutedFrontiers = [None] frontier_num = 0 for frontier in permutedFrontiers: eprint("frontier num", frontier_num, "out of", len(permutedFrontiers)) frontier_num += 1 # Randomly decide whether to sample from the generative model doingHelmholtz = random.random() < helmholtzRatio if doingHelmholtz: networkInputs = self.helmholtzNetworkInputs(requests, HELMHOLTZBATCH, CPUs) loss = self.step(*networkInputs) if not doingHelmholtz: if helmholtzRatio < 1.: #placeholder for now # self.zero_grad() # loss = self.frontierKL(frontier) #fix this later loss = 0 eprint("helmholtz is messed up. Fix it.") pass else: # Refuse to train on the frontiers pass if (i==1 or i%5==0): # networkInputs = self.helmholtzNetworkInputs(requests, HELMHOLTZBATCH, CPUs) # loss, permutedLoss = self.getCurrentLoss(*networkInputs) avgLoss = (0.9*avgLoss + 0.1*loss) if avgLoss is not None else loss # avgPermutedLoss = (0.9*avgPermutedLoss + 0.1*permutedLoss) if avgPermutedLoss is not None else permutedLoss # inputInformation = avgPermutedLoss - avgLoss eprint("Epoch %3d Loss %2.2f" % (i, avgLoss)) gc.collect()
def visualizePrimitives(primitives, export='/tmp/logo_primitives.png'): from itertools import product from pylab import imshow, show from program import Index, Abstraction, Application, Primitive from utilities import montageMatrix, makeNiceArray from type import tint import scipy.misc from makeLogoTasks import parseLogo angles = [ Program.parse(a) for a in [ "logo_ZA", "logo_epsA", "(logo_MULA logo_epsA 2)", "(logo_DIVA logo_UA 4)", "(logo_DIVA logo_UA 5)", "(logo_DIVA logo_UA 7)", "(logo_DIVA logo_UA 9)", ] ] specialAngles = { "#(lambda (lambda (logo_forLoop logo_IFTY (lambda (lambda (logo_FWRT (logo_MULL logo_UL 3) (logo_MULA $2 4) $0))) $1)))": [Program.parse("(logo_MULA logo_epsA 4)")] + [Program.parse("(logo_DIVA logo_UA %d)" % n) for n in [7, 9]] } numbers = [Program.parse(n) for n in ["1", "2", "5", "7", "logo_IFTY"]] specialNumbers = { "#(lambda (#(lambda (lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $5 (logo_DIVA logo_UA $3) $0))) $0))))) (logo_MULL logo_UL $0) 4 4))": [Program.parse(str(n)) for n in [1, 2, 3]] } distances = [ Program.parse(l) for l in [ "logo_ZL", "logo_epsL", "(logo_MULL logo_epsL 2)", "(logo_DIVL logo_UL 2)", "logo_UL" ] ] subprograms = [ parseLogo(sp) for sp in [ "(move 1d 0a)", "(loop i infinity (move (*l epsilonLength 4) (*a epsilonAngle 2)))", "(loop i infinity (move (*l epsilonLength 5) (/a epsilonAngle 2)))", "(loop i 4 (move 1d (/a 1a 4)))" ] ] entireArguments = { "#(lambda (lambda (#(#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) logo_IFTY) (logo_MULA (#(logo_DIVA logo_UA) $1) $0) (#(logo_MULL logo_UL) 3))))": [[Program.parse(str(x)) for x in xs] for xs in [("3", "1", "$0"), ("4", "1", "$0"), ("5", "1", "$0"), ("5", "3", "$0"), ("7", "3", "$0")] ] } specialDistances = { "#(lambda (lambda (logo_forLoop 7 (lambda (lambda (#(lambda (lambda (lambda (#(lambda (lambda (lambda (logo_forLoop $2 (lambda (lambda (logo_FWRT $2 $3 $0))))))) 7 $1 $2 $0)))) $3 logo_epsA $0))) $0)))": [Program.parse("(logo_MULL logo_epsL %d)" % n) for n in range(5)] } matrix = [] for p in primitives: if not p.isInvented: continue t = p.tp eprint(p, ":", p.tp) if t.returns() != turtle: eprint("\t(does not return a turtle)") continue def argumentChoices(t): if t == turtle: return [Index(0)] elif t == arrow(turtle, turtle): return subprograms elif t == tint: return specialNumbers.get(str(p), numbers) elif t == tangle: return specialAngles.get(str(p), angles) elif t == tlength: return specialDistances.get(str(p), distances) else: return [] ts = [] for arguments in entireArguments.get( str(p), product(*[argumentChoices(t) for t in t.functionArguments()])): eprint(arguments) pp = p for a in arguments: pp = Application(pp, a) pp = Abstraction(pp) i = np.reshape(np.array(drawLogo(pp, resolution=128)), (128, 128)) if i is not None: ts.append(i) if ts == []: continue matrix.append(ts) if len(ts) < 6: ts = [ts] else: ts = makeNiceArray(ts) r = montageMatrix(ts) fn = "/tmp/logo_primitive_%d.png" % len(matrix) eprint("\tExported to", fn) scipy.misc.imsave(fn, r) matrix = montageMatrix(matrix) scipy.misc.imsave(export, matrix)
import sys import time import traceback import cPickle as pickle from utilities import eprint if __name__ == "__main__": sys.setrecursionlimit(10000) start = time.time() request = pickle.load(sys.stdin) dt = time.time() - start if dt > 1: eprint( "(compiled driver warning: SLOW) Compiled driver unpacked the message in time", dt) response = (False, None) try: start = time.time() f = request["function"] result = f(*request["arguments"], **request["keywordArguments"]) response = (True, result) except Exception as e: eprint("Exception thrown in pypy process for %s:" % f.__name__) sys.stderr.write(traceback.format_exc()) sys.stderr.flush() finally: start = time.time() pickle.dump(response, sys.stdout)
#test callCompiled import sys import os sys.path.append(os.path.abspath('./')) sys.path.append(os.path.abspath('./ec')) from utilities import callCompiled, eprint from fun import f x = 6 ans = callCompiled(f, x) eprint(ans)
def __init__(self, tasks): lexicon = { c for t in tasks for (x, ), y in self.tokenize(t.examples) for c in x + y } super(LearnedFeatureExtractor, self).__init__(lexicon=list(lexicon), H=64, tasks=tasks, bidirectional=True) if __name__ == "__main__": tasks = makeTasks() eprint("Generated", len(tasks), "tasks") test, train = testTrainSplit(tasks, 0.2) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, train, testingTasks=test, outputPrefix="experimentOutputs/text", evaluationTimeout=0.0005, **commandlineArguments( steps=500, iterations=10, helmholtzRatio=0.5,
def train(self, frontiers, _=None, steps=250, lr=0.001, topK=1, CPUs=1, helmholtzRatio=0., helmholtzBatch=5000): """ helmholtzRatio: What fraction of the training data should be forward samples from the generative model? """ requests = [ frontier.task.request for frontier in frontiers ] frontiers = [ frontier.topK(topK).normalize() for frontier in frontiers if not frontier.empty ] # Not sure why this ever happens if helmholtzRatio is None: helmholtzRatio = 0. eprint("Training a recognition model from %d frontiers, %d%% Helmholtz, feature extractor %s."%( len(frontiers), int(helmholtzRatio*100), self.featureExtractor.__class__.__name__)) # The number of Helmholtz samples that we generate at once # Should only affect performance and shouldn't affect anything else HELMHOLTZBATCH = helmholtzBatch helmholtzSamples = [] optimizer = torch.optim.Adam(self.parameters(), lr=lr) with timing("Trained recognition model"): for i in range(1,steps + 1): losses = [] if helmholtzRatio < 1.: permutedFrontiers = list(frontiers) random.shuffle(permutedFrontiers) else: permutedFrontiers = [None] for frontier in permutedFrontiers: # Randomly decide whether to sample from the generative model doingHelmholtz = random.random() < helmholtzRatio if doingHelmholtz: if helmholtzSamples == []: helmholtzSamples = \ self.sampleManyHelmholtz(requests, HELMHOLTZBATCH, 1) # TODO THIS IS A HACK attempt = helmholtzSamples.pop() if attempt is not None: program, request, features = attempt self.zero_grad() loss = self.HelmholtzKL(features, program, request) else: doingHelmholtz = False if not doingHelmholtz: if helmholtzRatio < 1.: self.zero_grad() loss = self.frontierKL(frontier) else: # Refuse to train on the frontiers continue loss.backward() optimizer.step() losses.append(loss.data[0]) if i%50 == 0 and losses: eprint("Epoch",i,"Loss",sum(losses)/len(losses)) gc.collect()