def dfshelper(p_base, t): ns['nb_steps'] += 1 ns['gas'] -= 1 try: if is_solution(p_base, examples): ns['solution'] = p_base return True except (NullInputError, OutputOutOfRangeError): # throw out programs that have null inputs or any out of range output # null outputs ok if unused return if ns['gas'] <= 0: return True if t == T: return # type -> list of input indices / Functions type_to_inputs = collections.defaultdict(list) for k, v in input_type_to_inputs.items(): type_to_inputs[k] += v used = set() for i, stmt in enumerate(p_base.stmts): program = Program(p_base.input_types, p_base.stmts[:i + 1]) used.add(stmt) # favor more recent statements output_type = stmt[0].output_type type_to_inputs[output_type].insert(0, (len(p_base.input_types) + i)) for k, v in ctx.typemap.items(): type_to_inputs[k] += v for f in ctx.functions: for args in iterate_inputs(f, type_to_inputs): stmt = (f, args) if stmt in used: continue program = Program(p_base.input_types, list(p_base.stmts) + [stmt]) try: if t + 1 < T and has_null(program, examples): continue except OutputOutOfRangeError: continue if dfshelper(program, t + 1): return True
def reward(self): if self.turn<0: return 0 # construct program input_types = [x.type for x in State.EXAMPLES[0][0]] stmts = [] for s in self.f_arg_list: if s < 15: stmts.append([]) stmts[-1].append(impl.FUNCTIONS[s]) stmts[-1].append([]) else: stmts[-1][1].append(impl.ACT_SPACE[s]) program = Program(input_types, stmts) try: # print(program) if is_solution(program, State.EXAMPLES): return 1 # except (NullInputError, OutputOutOfRangeError): except: # throw out programs that have null inputs or any out of range output # null outputs ok if unused return 0 return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--infile', type=str) parser.add_argument('--outfile', type=str) parser.add_argument('--nb_examples', type=int, default=5) args = parser.parse_args() with open(args.infile) as in_fh: programs = [Program.parse(l.rstrip()) for l in in_fh] datas = [] for program in programs: data = {} examples = constraint.get_input_output_examples( program, args.nb_examples) raw_examples = [] for inputs, output in examples: raw_inputs = [x.val for x in inputs] raw_output = output.val raw_examples.append((raw_inputs, raw_output)) data = dict(program=program.prefix, examples=raw_examples, attribute=converter.get_attribute_vec(program)) datas.append(data) with open(args.outfile, 'w') as out_fh: for data in datas: out_fh.write(json.dumps(data) + '\n')
def test_leader_election(solution, examples): prefix = str(solution) program = Program.parse(prefix) consistent = True for tuples in examples: #print("tuples:", tuples) inputs, output = tuples #print("Inputs:", inputs) #print("Output:", output) #print("stripped:", inputs[0]) #print("TYPE:", type(inputs[0])) #raw_input = [] #for i in range(len(inputs[0])): # raw_input.append(inputs[0][i]) #raw_input = inputs[0] #raw_input2 = raw_input.val #print("Raw input:", raw_input2) expected = IntValue(monarchical_leader_election(inputs[0].val)) actual = program(inputs[0]) #assertEqual(dfs_actual, dfs_expected) #print("DFS expected:", dfs_expected) #print("DFS actual:", dfs_actual) if (actual != expected): consistent = False print("DeepCoder program inconsistent with ground truth program") break #assertEqual(dfs_program.toprefix(), prefix) return consistent
def test_out_of_range(self): prefix = 'LIST|LIST|LIST|SCAN1L,*,0|SCAN1L,*,3|MAP,/2,4' inputs = [ ListValue([8, 155, -231, -115, -178, 115, -246, -93, 42, 237, -104, -92,-208, -15, -116, -144, -58, -66, -120]), ListValue([-2, -16, -8, -4, 5, 6, 4, 4, 7, -5, 0, 8, 2, 10, 12, 10, -4, 14]), ListValue([-7, -9, 3, -11, -6, 7, -3, -12, -7, -5, 10, 11, -8, -8, 10]) ] program = Program.parse(prefix) self.assertRaises(OutputOutOfRangeError, program, *inputs)
def test_basic(self): # takes the second highest negative number prefix = 'LIST|INT|FILTER,<0,0|SORT,2|REVERSE,3|ACCESS,1,4' program = Program.parse(prefix) expected = IntValue(-2) actual = program(ListValue([1, -5, -3, -4, -2, -1, 2, 3]), IntValue(1)) self.assertEqual(actual, expected) self.assertEqual(program.toprefix(), prefix)
def test_maxrank(self): # takes the second highest negative number prefix = 'LIST|MAXIMUM,0' program = Program.parse(prefix) expected = IntValue(3) actual = program(ListValue([1, -5, -3, -4, -2, -1, 2, 3])) self.assertEqual(actual, expected) self.assertEqual(program.toprefix(), prefix)
def dfshelper(p_base, t): ns['nb_steps'] += 1 ns['gas'] -= 1 if is_solution(p_base): ns['solution'] = p_base return True if ns['gas'] <= 0: return True if t == T: return # type -> list of input indices / Functions type_to_inputs = collections.defaultdict(list) for k, v in input_type_to_inputs.items(): type_to_inputs[k] += v used = set() for i, stmt in enumerate(p_base.stmts): program = Program(p_base.input_types, p_base.stmts[:i+1]) used.add(stmt) if has_null(program): # don't consider NULL for input iteration # favor more recent statements output_type = stmt[0].output_type type_to_inputs[output_type].insert(0, (len(p_base.input_types) + i)) for k, v in ctx.typemap.items(): type_to_inputs[k] += v for f in ctx.functions: for args in iterate_inputs(f, type_to_inputs): stmt = (f, args) if stmt in used: continue program = Program(p_base.input_types, list(p_base.stmts) + [stmt]) if dfshelper(program, t + 1): return True
def test_null_allowed(self): p = Program.parse('LIST|TAIL,0|ACCESS,1,0') expected = [ constraint.ListConstraint(lmin=1, int_constraints=[ constraint.IntConstraint(0, l - 1) for l in range(constraint.L + 1) ]), constraint.IntConstraint(0, 256), constraint.IntConstraint() ] output_constraint = constraint.IntConstraint() actual = constraint.propagate_constraints(p, output_constraint) self.assertEqual(expected, actual)
def test_propagate(self): stmts = [ (impl.MAP, (impl.TIMES2, 0)), (impl.FILTER, (impl.GT0, 1)), (impl.MAP, (impl.MINUS1, 2)), ] p = Program([LIST], stmts) output_constraint = constraint.ListConstraint( 1, 4, [constraint.IntConstraint(-5, 3)] * 5) actual = constraint.propagate_constraints(p, output_constraint)[0] expected = constraint.ListConstraint( 1, 4, [constraint.IntConstraint(-2, 2)] * 5) self.assertEqual(expected, actual)
def enumerate_helper(input_types, T, ctx, result_queue, stop_queue): program_base = Program(input_types, []) monitor = {'stopped': False} def helper(program_base, t): if monitor['stopped']: return try: stop_queue.get_nowait() monitor['stopped'] = True return except queue.Empty: pass if t == T: if not get_unused_indices(program_base): result_queue.put(program_base.prefix) # don't keep searching if pruned # has < T stmts. will get picked up # on another enumeration return type_to_inputs = collections.defaultdict(list) for i, typ in enumerate(program_base.types): type_to_inputs[typ].append(i) for k, v in ctx.typemap.items(): type_to_inputs[k] += v used = set(program_base.stmts) for f in ctx.functions: for args in iterate_inputs(f, type_to_inputs): stmt = f, args if stmt in used: continue program = Program(program_base.input_types, list(program_base.stmts) + [stmt]) helper(program, t + 1) helper(program_base, 0) result_queue.put(None) # done if not monitor['stopped']: stop_queue.get()
def next_step(self): new_helper_list = [] for i in range(len(predictions[0])): if impl.FUNCTION_MASK[i]: next_f = impl.FUNCTIONS[i] next_input_types = next_f.input_type # print(next_f, next_input_types) choince_list = [] if isinstance(next_input_types, tuple): for next_input_type in next_input_types: if next_input_type in self.TYPE_MASK.keys(): choince_list.append( list( itertools.compress( impl.ACT_SPACE, self.TYPE_MASK[next_input_type]))) else: # LAMBDA F choince_list.append( list( itertools.compress( impl.ACT_SPACE, impl. INPUT_TYPE2MASK[next_input_type]))) # products = list(itertools.product(*choince_list)) else: choince_list.append( list( itertools.compress( impl.ACT_SPACE, self.TYPE_MASK[next_input_types]))) products = list(itertools.product(*choince_list)) for args in products: if isinstance(next_f.input_type, tuple): expected_len_args = len(next_f.input_type) else: expected_len_args = 1 if len(args) != expected_len_args: break stmt = (next_f, args) program = Program(self.p_base.input_types, list(self.p_base.stmts) + [stmt]) # print(self.p_base, program) new_helper_list.append(Beamhelper(program, self.t + 1)) return new_helper_list
def check(self): ns['nb_steps'] += 1 ns['gas'] -= 1 stmts = self.construct_stmts() program = Program([x.type for x in examples[0][0]], stmts) try: if is_solution(program, examples): ns['solution'] = program return True except (NullInputError, OutputOutOfRangeError): # throw out programs that have null inputs or any out of range output # null outputs ok if unused return if ns['gas'] <= 0: return True
def main(): parser = argparse.ArgumentParser() parser.add_argument('--infile', type=str) parser.add_argument('--outfile', type=str) parser.add_argument('--nb_examples', type=int, default=5) args = parser.parse_args() with open(args.infile) as in_fh: programs = [Program.parse(l.rstrip()) for l in in_fh] problems = [] for program in tqdm.tqdm(programs, total=len(programs)): problem = {} examples = None # only try twice for speed for _ in range(2): try: examples = constraint.get_input_output_examples(program, args.nb_examples) # TODO: figure out why OutputOutOfRange happened here in T=4 generation except (NullInputError, constraint.InvalidConstraintError, constraint.OutputOutOfRangeError): continue if not examples: continue raw_examples = [] for inputs, output in examples: raw_inputs = [x.val for x in inputs] raw_output = output.val raw_examples.append(dict(inputs=raw_inputs, output=raw_output)) problem = dict(program=program.prefix, examples=raw_examples, attribute=util.get_attribute_vec(program)) problems.append(problem) with open(args.outfile, 'w') as out_fh: print('[', file=out_fh) for i, problem in enumerate(problems): trailing_comma = i < len(problems) - 1 util.pretty_print_problem(problem, out_fh, trailing_comma) print(']', file=out_fh)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--infile', type=str) parser.add_argument('--outfile', type=str) parser.add_argument('--nb_examples', type=int, default=5) parser.add_argument('--nb_inputs', type=int, default=3) args = parser.parse_args() with open(args.infile) as in_fh: line_count = sum([1 for _ in in_fh]) with open(args.infile) as in_fh: xdata = [] ydata = [] programs = [] pbar = tqdm.tqdm(total=line_count) for line in in_fh: pbar.update(1) program = Program.parse(line.rstrip()) try: xdata.append( get_program_row(program, args.nb_examples, args.nb_inputs)) ydata.append(get_attribute_vec(program)) except: print('prog:', program) print('constraint:') for x in constraint.propagate_constraints(program): print(x) raise programs.append(program) x = collections.defaultdict(list) for row in xdata: for k, v in row.items(): x[k].append(v) np.savez(args.outfile, y=ydata, **x)
def enumerate_programs(input_type_combinations, T, ctx, max_nb_programs): """Enumerates programs with T statements that have the same input types. Each program is pruned and doesn't have any unused inputs or statements. Arguments: input_type_combinations (list): list of list of INT or LIST specifying all input types to search over T (int): number of statements in each outputted program ctx (Context): search context max_nb_programs (int): max number of programs to enumerate. Returns: set of programs with input types input_types and exactly T stmts. """ programs = [] workers = [] result_queue = multiprocessing.Queue() stop_queue = multiprocessing.Queue() for input_types in input_type_combinations: worker = multiprocessing.Process(target=enumerate_helper, args=(input_types, T, ctx, result_queue, stop_queue)) worker.start() workers.append(worker) def join(): for worker in workers: worker.join() finished_cnt = 0 def all_done(): return finished_cnt == len(workers) def stop_workers(): for _ in range(len(workers)): stop_queue.put(1) def wait_for_workers(): while True: if not sum([worker.is_alive() for worker in workers]): return time.sleep(.1) with tqdm.tqdm(total=max_nb_programs) as pbar: stopped = False while not all_done(): if not stopped and len(programs) >= max_nb_programs: stopped = True stop_workers() try: result = result_queue.get_nowait() if result is None: finished_cnt += 1 continue program = Program.parse(result) if len(programs) < max_nb_programs: programs.append(program) pbar.update(1) except queue.Empty: continue if not stopped: stop_workers() join() return programs
def test_is_same(self): lhs = Program.parse('LIST|MAXIMUM,0') rhs = Program.parse('LIST|SCAN1L,max,0|MAXIMUM,1') self.assertTrue(constraint.is_same(lhs, rhs))
def test_get_unused_indices(self): prefix = 'LIST|INT|MAP,*2,0|FILTER,>0,0|FILTER,<0,2' program = Program.parse(prefix) expected = {1, 3} actual = get_unused_indices(program) self.assertEqual(actual, expected)
def dfs(examples, T, ctx, gas=np.inf): """Runs dfs search up to depth T or until a program is found that matches output. Args: examples: list of tuples of (inputs, output) T: max depth ctx: Context. used to restrict/order the set of functions that dfs searches over. gas (int): limit on number of node expansions. default to np.inf (unlimited) Returns: tuple of solution program, number of steps """ ns = {'nb_steps': 0, 'solution': None, 'gas': gas} # init #print("examples type:", examples.type) #print("Type():", type(examples[0][0])) #print("ExamplesDFS:", examples) input_types = [x.type for x in examples[0][0]] # [0][0] #print("Examples:", examples[0][0]) #print("INPUT TYPES:", input_types) #NEWLINE input_type_to_inputs = collections.defaultdict(list) for i, input_type in enumerate(input_types): input_type_to_inputs[input_type].append(i) p_base = Program(input_types, tuple()) def dfshelper(p_base, t): ns['nb_steps'] += 1 ns['gas'] -= 1 try: if is_solution(p_base, examples): ns['solution'] = p_base return True except (NullInputError, OutputOutOfRangeError): # throw out programs that have null inputs or any out of range output # null outputs ok if unused return if ns['gas'] <= 0: return True if t == T: return # type -> list of input indices / Functions type_to_inputs = collections.defaultdict(list) for k, v in input_type_to_inputs.items(): type_to_inputs[k] += v used = set() for i, stmt in enumerate(p_base.stmts): program = Program(p_base.input_types, p_base.stmts[:i + 1]) used.add(stmt) # favor more recent statements output_type = stmt[0].output_type type_to_inputs[output_type].insert(0, (len(p_base.input_types) + i)) for k, v in ctx.typemap.items(): type_to_inputs[k] += v for f in ctx.functions: for args in iterate_inputs(f, type_to_inputs): stmt = (f, args) if stmt in used: continue program = Program(p_base.input_types, list(p_base.stmts) + [stmt]) try: if t + 1 < T and has_null(program, examples): continue except OutputOutOfRangeError: continue if dfshelper(program, t + 1): return True dfshelper(p_base, 0) return ns['solution'], ns['nb_steps']
def test_prune(self): prefix = 'LIST|INT|MAP,*2,0|FILTER,>0,0|FILTER,<0,2' p = Program.parse(prefix) pp = prune(p) expected = 'LIST|MAP,*2,0|FILTER,<0,1' self.assertEqual(pp.toprefix(), expected)
def dfs(examples, T, ctx, gas=np.inf): """Runs dfs search up to depth T or until a program is found that matches output. Args: examples: list of tuples of (inputs, output) T: max depth ctx: Context. used to restrict/order the set of functions that dfs searches over. gas (int): limit on number of node expansions. default to np.inf (unlimited) Returns: tuple of solution program, number of steps """ ns = { 'nb_steps': 0, 'solution': None, 'gas' : gas } # init input_types = [x.type for x in examples[0][0]] input_type_to_inputs = collections.defaultdict(list) for i, input_type in enumerate(input_types): input_type_to_inputs[input_type].append(i) p_base = Program(input_types, tuple()) def is_solution(program): for inputs, output in examples: if program(*inputs) != output: return False return True def has_null(program): for inputs, _ in examples: if program(*inputs) == IntValue(NULL): return False return True def dfshelper(p_base, t): ns['nb_steps'] += 1 ns['gas'] -= 1 if is_solution(p_base): ns['solution'] = p_base return True if ns['gas'] <= 0: return True if t == T: return # type -> list of input indices / Functions type_to_inputs = collections.defaultdict(list) for k, v in input_type_to_inputs.items(): type_to_inputs[k] += v used = set() for i, stmt in enumerate(p_base.stmts): program = Program(p_base.input_types, p_base.stmts[:i+1]) used.add(stmt) if has_null(program): # don't consider NULL for input iteration # favor more recent statements output_type = stmt[0].output_type type_to_inputs[output_type].insert(0, (len(p_base.input_types) + i)) for k, v in ctx.typemap.items(): type_to_inputs[k] += v for f in ctx.functions: for args in iterate_inputs(f, type_to_inputs): stmt = (f, args) if stmt in used: continue program = Program(p_base.input_types, list(p_base.stmts) + [stmt]) if dfshelper(program, t + 1): return True dfshelper(p_base, 0) return ns['solution'], ns['nb_steps']
def beam_search(examples, T, predictions, gas): ns = {'nb_steps': 0, 'solution': None, 'gas': gas} nb_beam = int(gas / T) # nb_beam = 10 # init input_types = [x.type for x in examples[0][0]] # print(examples[0]) # print(input_types) # print(predictions[0][:10]) input_type_to_inputs = collections.defaultdict(list) for i, input_type in enumerate(input_types): input_type_to_inputs[input_type].append(i) p_base = Program(input_types, tuple()) class Beamhelper: def __init__(self, p_base, t): self.p_base = p_base self.t = t self.p = self.calculate_p() self.TYPE_MASK = {} self.TYPE_MASK['INT'] = np.zeros_like(predictions[0]) self.TYPE_MASK['LIST'] = np.zeros_like(predictions[0]) self.TYPE_MASK['BOOL'] = np.zeros_like(predictions[0]) for i in range(len(p_base.types)): self.TYPE_MASK[p_base.types[i]][i + len(impl.FUNCTIONS)] = 1 def next_step(self): new_helper_list = [] for i in range(len(predictions[0])): if impl.FUNCTION_MASK[i]: next_f = impl.FUNCTIONS[i] next_input_types = next_f.input_type # print(next_f, next_input_types) choince_list = [] if isinstance(next_input_types, tuple): for next_input_type in next_input_types: if next_input_type in self.TYPE_MASK.keys(): choince_list.append( list( itertools.compress( impl.ACT_SPACE, self.TYPE_MASK[next_input_type]))) else: # LAMBDA F choince_list.append( list( itertools.compress( impl.ACT_SPACE, impl. INPUT_TYPE2MASK[next_input_type]))) # products = list(itertools.product(*choince_list)) else: choince_list.append( list( itertools.compress( impl.ACT_SPACE, self.TYPE_MASK[next_input_types]))) products = list(itertools.product(*choince_list)) for args in products: if isinstance(next_f.input_type, tuple): expected_len_args = len(next_f.input_type) else: expected_len_args = 1 if len(args) != expected_len_args: break stmt = (next_f, args) program = Program(self.p_base.input_types, list(self.p_base.stmts) + [stmt]) # print(self.p_base, program) new_helper_list.append(Beamhelper(program, self.t + 1)) return new_helper_list def check(self): ns['nb_steps'] += 1 ns['gas'] -= 1 try: if is_solution(self.p_base, examples): ns['solution'] = self.p_base return True except (NullInputError, OutputOutOfRangeError): # throw out programs that have null inputs or any out of range output # null outputs ok if unused return if ns['gas'] <= 0: return True if self.t == T: return def calculate_p(self): p = 0.0 count = 0 for func, args in self.p_base.stmts: p_f = predictions[count][impl.TOKEN2INDEX[func]] count += 1 for arg in args: p_f *= predictions[count][impl.TOKEN2INDEX[arg]] count += 1 p += np.log(p_f**(1.0 / (len(args) + 1))) self.p = p return p def __eq__(self, other): return self.p == other.p def __lt__(self, other): return self.p < other.p helper_base = Beamhelper(p_base, 0) helpers = [helper_base] for i in range(T): # print('i:',i) new_helpers = [] for h in helpers: new_helpers += h.next_step() for h in new_helpers: h.calculate_p() new_helpers = sorted(new_helpers, reverse=True) # print(new_helpers[0].p_base, new_helpers[0].p) # print(new_helpers[1].p_base, new_helpers[1].p) helpers = new_helpers[:nb_beam] # print(1) for h in helpers: # print(h.p_base, h.p) if h.check(): return ns['solution'], ns['nb_steps'] return ns['solution'], ns['nb_steps']