def fix(self): self.real_base = 'results/mars_an_astar-near_1_1605057595/fullprogram' if self.device == 'cpu': base_program = CPU_Unpickler(open("%s.p" % self.real_base, "rb")).load() else: base_program = pickle.load(open("%s.p" % self.real_base, "rb")) if self.device == 'cpu': best_program = CPU_Unpickler(open("%s/subprogram.p" % self.base_program_name, "rb")).load() else: best_program = pickle.load(open("%s/subprogram.p" % self.base_program_name, "rb")) self.full_path = os.path.join(self.base_program_name, "fullprogram.p") #fix this with torch.no_grad(): test_input, test_output = map(list, zip(*self.testset)) true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device) predicted_vals = self.process_batch(base_program, test_input, self.output_type, self.output_size, self.device) metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels) log_and_print("F1 score achieved is {:.4f}".format(1 - metric)) curr_level = 0 l = [] traverse(base_program.submodules,l) curr_program = base_program.submodules change_key(base_program.submodules, [], self.hole_node_ind, best_program.submodules["program"]) with torch.no_grad(): test_input, test_output = map(list, zip(*self.testset)) true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device) predicted_vals = self.process_batch(base_program, test_input, self.output_type, self.output_size, self.device) metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels) log_and_print("F1 score achieved is {:.4f}".format(1 - metric)) log_and_print(str(additional_params)) pickle.dump(base_program, open(self.full_path, "wb"))
def run_init(self, timestamp, base_program_name, hole_node_ind, graph, trainset, validset, train_config, device, verbose=False): assert isinstance(graph, ProgramGraph) log_and_print("Training root program ...") current = copy.deepcopy(graph.root_node) initial_score, losses, m = execute_and_train_with_full(base_program_name, hole_node_ind, current.program, validset, trainset, train_config, graph.output_type, graph.output_size, neural=True, device=device) log_and_print("Initial training complete. Score from program is {:.4f} \n".format(1 - initial_score)) if device == 'cpu': base_program = CPU_Unpickler(open("%s.p" % base_program_name, "rb")).load() else: base_program = pickle.load(open("%s.p" % base_program_name, "rb")) curr_level = 0 l = [] traverse(base_program.submodules,l) # pprint(l) curr_program = base_program.submodules change_key(base_program.submodules, [], hole_node_ind, current.program.submodules["program"]) new_prog = base_program return 1 - initial_score, new_prog, losses
def load_base_program(self): print("Loading %s" % self.base_program_name) if self.device == 'cpu': self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() else: self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb")) base_folder = os.path.dirname(self.base_program_name) data = self.base_program.submodules l = [] traverse(data,l) log_and_print(l) return l
def evaluate_final(self): if self.device == 'cpu': program = CPU_Unpickler(open(self.full_path, "rb").load()) else: program = pickle.load(open(self.full_path, "rb")) log_and_print(print_program(program, ignore_constants=True)) l = [] traverse(program.submodules,l) with torch.no_grad(): test_input, test_output = map(list, zip(*self.testset)) true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device) predicted_vals = self.process_batch(program, test_input, self.output_type, self.output_size, self.device) metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels) log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
def neural_h(self): data = self.base_program.submodules l = [] #populate AST traverse(data,l) train_config = { 'lr' : self.learning_rate, 'neural_epochs' : 15, 'symbolic_epochs' : self.symbolic_epochs, 'optimizer' : optim.Adam, 'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo 'evalfxn' : label_correctness, 'num_labels' : self.num_labels } best_node_ind = 0 best_score = 0 for hole_node_ind in range(1,len(l)): hole_node = l[hole_node_ind] subprogram_str = print_program(hole_node[0]) if subprogram_str.count('(') > self.max_depth: continue near_input_type = hole_node[0].input_type near_output_type = hole_node[0].output_type near_input_size = hole_node[0].input_size near_output_size = hole_node[0].output_size # Initialize program graph starting from trained NN program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size, self.max_num_units, self.min_num_units, self.max_num_children, 0, self.penalty, ite_beta=self.ite_beta) ## max_depth 0 # Initialize algorithm algorithm = ASTAR_NEAR(frontier_capacity=0) score, new_prog, losses = algorithm.run_init(self.timestamp, self.base_program_name, hole_node_ind, program_graph, self.batched_trainset, self.validset, train_config, self.device) subprogram_str = print_program(hole_node[0]) log_and_print("Subprogram to replace: %s"% subprogram_str) if score > best_score: best_node_ind = hole_node_ind best_score = score log_and_print("New best: RNN Heuristic score at Node %d: %f\n" %( hole_node_ind, score)) else: log_and_print("RNN Heuristic score at Node %d: %f\n" %( hole_node_ind, score)) return best_node_ind
def evaluate_neurosymb(self, program): # if self.device == 'cpu': # program = CPU_Unpickler(open("neursym.p" % self.base_program_name, "rb")).load() # else: # program = pickle.load(open("neursym.p" % self.base_program_name, "rb")) # # program= CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() print(print_program(program, ignore_constants=True)) l = [] traverse(program.submodules,l) with torch.no_grad(): test_input, test_output = map(list, zip(*self.testset)) true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device) predicted_vals = self.process_batch(program, test_input, self.output_type, self.output_size, self.device) metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels) log_and_print("Test F1 score achieved is {:.4f}\n".format(1 - metric)) log_and_print(str(additional_params)) return 1- metric
def neural_h(self): data = self.base_program.submodules l = [] #populate AST traverse(data,l) train_config = { 'lr' : self.learning_rate, 'neural_epochs' : self.neural_epochs, 'symbolic_epochs' : self.symbolic_epochs, 'optimizer' : optim.Adam, 'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo 'evalfxn' : label_correctness, 'num_labels' : self.num_labels } scores = [self.base_program_name] for hole_node_ind in range(len(l)): hole_node = l[hole_node_ind] near_input_type = hole_node[0].input_type near_output_type = hole_node[0].output_type near_input_size = hole_node[0].input_size near_output_size = hole_node[0].output_size # Initialize program graph starting from trained NN program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size, self.max_num_units, self.min_num_units, self.max_num_children, 0, self.penalty, ite_beta=self.ite_beta) ## max_depth 0 # Initialize algorithm algorithm = ASTAR_NEAR(frontier_capacity=0) score, new_prog, losses = algorithm.run_init(self.timestamp, self.base_program_name, hole_node_ind, program_graph, self.batched_trainset, self.validset, train_config, self.device) subprogram_str = print_program(hole_node[0]) test_score = self.evaluate_neurosymb(new_prog) stats_arr = [subprogram_str, hole_node[1], score,test_score] stats_arr.extend(losses) scores.append(stats_arr) # scores.append() # h_file = os.path.join(self.save_path, "neursym_%d.p"%hole_node_ind) # pickle.dump(new_prog, open(h_file, "wb")) h_file = os.path.join(self.save_path, "neurh.csv") with open(h_file, "w", newline="") as f: writer = csv.writer(f) writer.writerows(scores)
def __init__(self, **kwargs): self.__dict__.update(kwargs) if torch.cuda.is_available(): self.device = 'cuda:0' else: self.device = 'cpu' # load input data self.train_data = np.load(self.train_data) self.test_data = np.load(self.test_data) self.valid_data = None self.train_labels = np.load(self.train_labels) self.test_labels = np.load(self.test_labels) self.valid_labels = None if self.valid_data is not None and self.valid_labels is not None: self.valid_data = np.load(self.valid_data) self.valid_labels = np.load(self.valid_labels) self.batched_trainset, self.validset, self.testset = prepare_datasets(self.train_data, self.valid_data, self.test_data, self.train_labels, self.valid_labels, self.test_labels, normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size) if self.device == 'cpu': self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() else: self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb")) data = self.base_program.submodules l = [] traverse(data,l) self.hole_node = l[self.hole_node_ind] #for near on subtree self.curr_iter = 0 self.program_path = None now = datetime.now() self.timestamp = str(datetime.timestamp(now)).split('.')[0] log_and_print(self.timestamp) self.evaluate()
def execute_and_train_with_full(base_program_name, hole_node_ind, program, validset, trainset, train_config, output_type, output_size, neural=False, device='cpu', use_valid_score=False, print_every=60): #load program # pprint(type(hole_node)) # level_to_replace = hole_node[1] if device == 'cpu': base_program = CPU_Unpickler(open("%s.p" % base_program_name, "rb")).load() else: base_program = pickle.load(open("%s.p" % base_program_name, "rb")) curr_level = 0 l = [] traverse(base_program.submodules, l) # pprint(l) curr_program = base_program.submodules # print(program) # print(program.submodules) # pprint change_key( base_program.submodules, [], hole_node_ind, program.submodules["program"]) #should we just replace with program? log_and_print(print_program(base_program)) # pickle.dump(base_program, open("neursym.p", "wb")) return execute_and_train(base_program, program, validset, trainset, train_config, output_type, output_size, neural, device)
def evaluate(self): # assert os.path.isfile(self.program_path) base_program= CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() program_baby = CPU_Unpickler(open("%s.p" % self.baby_program_name, "rb")).load() data = base_program.submodules l = [] traverse(data,l) # print(l) hole_node = l[self.hole_node_ind] #conditoin node # print(hole_node) change_key(base_program.submodules, hole_node[0], program_baby, hole_node[1]) # pickle.dump(program, open("ite_1603639887.p", "wb")) base_output_type = base_program.program.output_type base_output_size = base_program.program.output_size # program = pickle.load(open(self.program_path, "rb")) with torch.no_grad(): test_input, test_output = map(list, zip(*self.testset)) true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device) predicted_vals = self.process_batch(base_program, test_input, base_output_type, base_output_size, self.device) metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels) log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
def run_near(self): # print(self.device) train_config = { 'lr' : self.learning_rate, 'neural_epochs' : self.neural_epochs, 'symbolic_epochs' : self.symbolic_epochs, 'optimizer' : optim.Adam, 'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo 'evalfxn' : label_correctness, 'num_labels' : self.num_labels } near_input_type = self.hole_node[0].input_type near_output_type = self.hole_node[0].output_type near_input_size = self.hole_node[0].input_size near_output_size = self.hole_node[0].output_size # Initialize program graph starting from trained NN program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size, self.max_num_units, self.min_num_units, self.max_num_children, self.max_depth, self.penalty, ite_beta=self.ite_beta) # Initialize algorithm algorithm = ASTAR_NEAR(frontier_capacity=self.frontier_capacity) best_programs = algorithm.run(self.timestamp, self.base_program_name, self.hole_node_ind, program_graph, self.batched_trainset, self.validset, train_config, self.device) best_program_str = [] if self.algorithm == "rnn": # special case for RNN baseline best_program = best_programs else: # Print all best programs found log_and_print("\n") log_and_print("BEST programs found:") for item in best_programs: program_struct = print_program(item["program"], ignore_constants=True) program_info = "struct_cost {:.4f} | score {:.4f} | path_cost {:.4f} | time {:.4f}".format( item["struct_cost"], item["score"], item["path_cost"], item["time"]) best_program_str.append((program_struct, program_info)) print_program_dict(item) best_program = best_programs[-1]["program"] # Save best programs f = open(os.path.join(self.save_path, "best_programs.txt"),"w") f.write( str(best_program_str) ) f.close() self.program_path = os.path.join(self.save_path, "subprogram.p") pickle.dump(best_program, open(self.program_path, "wb")) self.full_path = os.path.join(self.save_path, "fullprogram.p") if self.device == 'cpu': base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() else: base_program = pickle.load(open("%s.p" % self.base_program_name, "rb")) curr_level = 0 l = [] traverse(base_program.submodules,l) curr_program = base_program.submodules change_key(base_program.submodules, [], self.hole_node_ind, best_program.submodules["program"]) pickle.dump(base_program, open(self.full_path, "wb")) # Save parameters f = open(os.path.join(self.save_path, "parameters.txt"),"w") parameters = ['input_type', 'output_type', 'input_size', 'output_size', 'num_labels', 'neural_units', 'max_num_units', 'min_num_units', 'max_num_children', 'max_depth', 'penalty', 'ite_beta', 'train_valid_split', 'normalize', 'batch_size', 'learning_rate', 'neural_epochs', 'symbolic_epochs', 'lossfxn', 'class_weights', 'num_iter', 'num_f_epochs', 'algorithm', 'frontier_capacity', 'initial_depth', 'performance_multiplier', 'depth_bias', 'exponent_bias', 'num_mc_samples', 'max_num_programs', 'population_size', 'selection_size', 'num_gens', 'total_eval', 'mutation_prob', 'max_enum_depth', 'exp_id', 'base_program_name', 'hole_node_ind'] for p in parameters: f.write( p + ': ' + str(self.__dict__[p]) + '\n' ) f.close()
def __init__(self, **kwargs): self.__dict__.update(kwargs) if torch.cuda.is_available(): self.device = 'cuda:0' print(self.device) else: self.device = 'cpu' self.loss_weight = torch.tensor([float(w) for w in self.class_weights.split(',')]).to(self.device) if self.exp_name == 'crim13': # load input data self.train_data = np.load(self.train_data) self.test_data = np.load(self.test_data) self.valid_data = None self.train_labels = np.load(self.train_labels) self.test_labels = np.load(self.test_labels) self.valid_labels = None assert self.train_data.shape[-1] == self.test_data.shape[-1] == self.input_size if self.valid_data is not None and self.valid_labels is not None: self.valid_data = np.load(self.valid_data) self.valid_labels = np.load(self.valid_labels) assert valid_data.shape[-1] == self.input_size self.batched_trainset, self.validset, self.testset = prepare_datasets(self.train_data, self.valid_data, self.test_data, self.train_labels, self.valid_labels, self.test_labels, normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size) elif self.exp_name == 'mars_an': #### start mars train_datasets = self.train_data.split(",") train_raw_features = [] train_raw_annotations = [] for fname in train_datasets: data = np.load(fname, allow_pickle=True) train_raw_features.extend(data["features"]) train_raw_annotations.extend(data["annotations"]) test_data = np.load(self.test_data, allow_pickle=True) test_raw_features = test_data["features"] test_raw_annotations = test_data["annotations"] valid_raw_features = None valid_raw_annotations = None valid_labels = None # Check the # of features of the first frame of the first video assert len(train_raw_features[0][0]) == len(test_raw_features[0][0]) == self.input_size if self.valid_data is not None: valid_data = np.load(self.valid_data, allow_pickle=True) valid_raw_features = valid_data["features"] valid_raw_annotations = valid_data["annotations"] assert len(valid_raw_features[0][0]) == self.input_size behave_dict = read_into_dict('../near_code_7keypoints/data/MARS_data/behavior_assignments_3class.txt') # Reshape the data to trajectories of length 100 train_features, train_labels = preprocess(train_raw_features, train_raw_annotations, self.train_labels, behave_dict) test_features, test_labels = preprocess(test_raw_features, test_raw_annotations, self.train_labels, behave_dict) if valid_raw_features is not None and valid_raw_annotations is not None: valid_features, valid_labels = preprocess(valid_raw_features, valid_raw_annotations, self.train_labels, behave_dict) self.batched_trainset, self.validset, self.testset = prepare_datasets(train_features, valid_features, test_features, train_labels, valid_labels, test_labels, normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size) ##### END MARS else: log_and_print('bad experiment name') return # self.fix() # add subprogram in # if self.device == 'cpu': # self.base_program = CPU_Unpickler(open("%s/subprogram.p" % self.base_program_name, "rb")).load() # else: # self.base_program = pickle.load(open("%s/subprogram.p" % self.base_program_name, "rb")) if self.device == 'cpu': self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load() else: self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb")) base_folder = os.path.dirname(self.base_program_name) # self.weights_dict = np.load(os.path.join(base_folder,'weights.npy'), allow_pickle=True).item() data = self.base_program.submodules l = [] traverse(data,l) log_and_print(l) # if self.hole_node_ind < 0: # self.hole_node_ind = len(l) + self.hole_node_ind #if negative, make it positive self.hole_node_ind %= len(l) self.hole_node = l[self.hole_node_ind] #for near on subtree self.curr_iter = 0 self.program_path = None if self.exp_id is not None: self.trial = self.exp_id if self.eval: self.evaluate() else: now = datetime.now() self.timestamp = str(datetime.timestamp(now)).split('.')[0][4:] log_and_print(self.timestamp) full_exp_name = "{}_{}_{}_{}".format( self.exp_name, self.algorithm, self.trial, self.timestamp) #unique timestamp for each near run self.save_path = os.path.join(self.save_dir, full_exp_name) if not os.path.exists(self.save_path): os.makedirs(self.save_path) init_logging(self.save_path) if self.neurh: log_and_print(self.base_program_name) self.neural_h() else: self.run_near() self.evaluate_final()