コード例 #1
0
    def fix(self):
        self.real_base =  'results/mars_an_astar-near_1_1605057595/fullprogram'
        if self.device == 'cpu':
            base_program = CPU_Unpickler(open("%s.p" % self.real_base, "rb")).load()
        else:
            base_program = pickle.load(open("%s.p" % self.real_base, "rb"))

        if self.device == 'cpu':
            best_program = CPU_Unpickler(open("%s/subprogram.p" % self.base_program_name, "rb")).load()
        else:
            best_program = pickle.load(open("%s/subprogram.p" % self.base_program_name, "rb"))


        self.full_path = os.path.join(self.base_program_name, "fullprogram.p") #fix this
        with torch.no_grad():
            test_input, test_output = map(list, zip(*self.testset))
            true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device)	
            predicted_vals = self.process_batch(base_program, test_input, self.output_type, self.output_size, self.device)
            
            metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels)
        log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
        curr_level = 0
        l = []
        traverse(base_program.submodules,l)
        curr_program = base_program.submodules
        change_key(base_program.submodules, [], self.hole_node_ind, best_program.submodules["program"])
        with torch.no_grad():
            test_input, test_output = map(list, zip(*self.testset))
            true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device)	
            predicted_vals = self.process_batch(base_program, test_input, self.output_type, self.output_size, self.device)
            
            metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels)
        log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
        log_and_print(str(additional_params))
        pickle.dump(base_program, open(self.full_path, "wb"))
コード例 #2
0
ファイル: astar_near.py プロジェクト: myracheng/pronear
    def run_init(self, timestamp, base_program_name, hole_node_ind, graph, trainset, validset, train_config, device, verbose=False):
        assert isinstance(graph, ProgramGraph)

        log_and_print("Training root program ...")
        current = copy.deepcopy(graph.root_node)
        initial_score, losses, m = execute_and_train_with_full(base_program_name, hole_node_ind, current.program, validset, trainset, train_config, 
            graph.output_type, graph.output_size, neural=True, device=device)
        
        log_and_print("Initial training complete. Score from program is {:.4f} \n".format(1 - initial_score))

        if device == 'cpu':
            base_program = CPU_Unpickler(open("%s.p" % base_program_name, "rb")).load()
        else:
            base_program = pickle.load(open("%s.p" % base_program_name, "rb"))

        curr_level = 0
        l = []
        traverse(base_program.submodules,l)
        # pprint(l)
        curr_program = base_program.submodules

        change_key(base_program.submodules, [], hole_node_ind, current.program.submodules["program"])

        new_prog = base_program
        return 1 - initial_score, new_prog, losses
コード例 #3
0
 def load_base_program(self):
     print("Loading %s" % self.base_program_name)
     if self.device == 'cpu':
         self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()
     else:
         self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb"))
     
     base_folder = os.path.dirname(self.base_program_name)
     data = self.base_program.submodules
     l = []
     traverse(data,l)
     log_and_print(l)
     return l
コード例 #4
0
 def evaluate_final(self):
     if self.device == 'cpu':
         program = CPU_Unpickler(open(self.full_path, "rb").load())
     else:
         program = pickle.load(open(self.full_path, "rb"))
     log_and_print(print_program(program, ignore_constants=True))
     l = []
     traverse(program.submodules,l)
     with torch.no_grad():
         test_input, test_output = map(list, zip(*self.testset))
         true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device)	
         predicted_vals = self.process_batch(program, test_input, self.output_type, self.output_size, self.device)
         
         metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels)
     log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
コード例 #5
0
    def neural_h(self):
        data = self.base_program.submodules
        l = [] #populate AST
        traverse(data,l)
        train_config = {
            'lr' : self.learning_rate,
            'neural_epochs' : 15,
            'symbolic_epochs' : self.symbolic_epochs,
            'optimizer' : optim.Adam,
            'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo
            'evalfxn' : label_correctness,
            'num_labels' : self.num_labels
        }

        best_node_ind = 0
        best_score = 0
        for hole_node_ind in range(1,len(l)):

            hole_node = l[hole_node_ind]
            subprogram_str = print_program(hole_node[0])
            if subprogram_str.count('(') > self.max_depth:
                continue

            near_input_type = hole_node[0].input_type
            near_output_type = hole_node[0].output_type
            near_input_size = hole_node[0].input_size
            near_output_size = hole_node[0].output_size

            # Initialize program graph starting from trained NN
            program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size,
                self.max_num_units, self.min_num_units, self.max_num_children, 0, self.penalty, ite_beta=self.ite_beta) ## max_depth 0

            # Initialize algorithm
            algorithm = ASTAR_NEAR(frontier_capacity=0)
            score, new_prog, losses = algorithm.run_init(self.timestamp, self.base_program_name, hole_node_ind,
                program_graph, self.batched_trainset, self.validset, train_config, self.device)
            subprogram_str = print_program(hole_node[0])
            log_and_print("Subprogram to replace: %s"% subprogram_str)
            if score > best_score:
                best_node_ind = hole_node_ind
                best_score = score
                log_and_print("New best: RNN Heuristic score at Node %d: %f\n" %( hole_node_ind, score))
            else: 
                log_and_print("RNN Heuristic score at Node %d: %f\n" %( hole_node_ind, score))
        return best_node_ind
コード例 #6
0
 def evaluate_neurosymb(self, program):
     # if self.device == 'cpu':
     #     program = CPU_Unpickler(open("neursym.p" % self.base_program_name, "rb")).load()
     # else:
     #     program = pickle.load(open("neursym.p" % self.base_program_name, "rb"))
     # # program= CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()
     print(print_program(program, ignore_constants=True))
     l = []
     traverse(program.submodules,l)
     with torch.no_grad():
         test_input, test_output = map(list, zip(*self.testset))
         true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device)	
         predicted_vals = self.process_batch(program, test_input, self.output_type, self.output_size, self.device)
         
         metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels)
     log_and_print("Test F1 score achieved is {:.4f}\n".format(1 - metric))
     log_and_print(str(additional_params))
     return 1- metric
コード例 #7
0
    def neural_h(self):
        data = self.base_program.submodules
        l = [] #populate AST
        traverse(data,l)
        train_config = {
            'lr' : self.learning_rate,
            'neural_epochs' : self.neural_epochs,
            'symbolic_epochs' : self.symbolic_epochs,
            'optimizer' : optim.Adam,
            'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo
            'evalfxn' : label_correctness,
            'num_labels' : self.num_labels
        }

        scores = [self.base_program_name]
        for hole_node_ind in range(len(l)):

            hole_node = l[hole_node_ind]
            near_input_type = hole_node[0].input_type
            near_output_type = hole_node[0].output_type
            near_input_size = hole_node[0].input_size
            near_output_size = hole_node[0].output_size

            # Initialize program graph starting from trained NN
            program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size,
                self.max_num_units, self.min_num_units, self.max_num_children, 0, self.penalty, ite_beta=self.ite_beta) ## max_depth 0

            # Initialize algorithm
            algorithm = ASTAR_NEAR(frontier_capacity=0)
            score, new_prog, losses = algorithm.run_init(self.timestamp, self.base_program_name, hole_node_ind,
                program_graph, self.batched_trainset, self.validset, train_config, self.device)
            subprogram_str = print_program(hole_node[0])
            test_score = self.evaluate_neurosymb(new_prog)
            stats_arr = [subprogram_str, hole_node[1], score,test_score]
            stats_arr.extend(losses)
            scores.append(stats_arr)
            # scores.append()
            # h_file = os.path.join(self.save_path, "neursym_%d.p"%hole_node_ind)
            # pickle.dump(new_prog, open(h_file, "wb"))

        h_file = os.path.join(self.save_path, "neurh.csv")
        with open(h_file, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(scores)
コード例 #8
0
ファイル: evaluate.py プロジェクト: myracheng/pronear
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

        if torch.cuda.is_available():
            self.device = 'cuda:0'
        else:
            self.device = 'cpu'

        # load input data
        self.train_data = np.load(self.train_data)
        self.test_data = np.load(self.test_data)
        self.valid_data = None
        self.train_labels = np.load(self.train_labels)
        self.test_labels = np.load(self.test_labels)
        self.valid_labels = None
        if self.valid_data is not None and self.valid_labels is not None:
            self.valid_data = np.load(self.valid_data)
            self.valid_labels = np.load(self.valid_labels)

        

        self.batched_trainset, self.validset, self.testset = prepare_datasets(self.train_data, self.valid_data, self.test_data, self.train_labels, self.valid_labels, 
        self.test_labels, normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size)
        
        if self.device == 'cpu':
            self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()
        else:
            self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb"))

        
        data = self.base_program.submodules
        l = []
        traverse(data,l)
        self.hole_node = l[self.hole_node_ind]
        
        #for near on subtree
        self.curr_iter = 0
        self.program_path = None 

        now = datetime.now()
        self.timestamp = str(datetime.timestamp(now)).split('.')[0]
        log_and_print(self.timestamp)

        self.evaluate()
コード例 #9
0
def execute_and_train_with_full(base_program_name,
                                hole_node_ind,
                                program,
                                validset,
                                trainset,
                                train_config,
                                output_type,
                                output_size,
                                neural=False,
                                device='cpu',
                                use_valid_score=False,
                                print_every=60):
    #load program
    # pprint(type(hole_node))
    # level_to_replace = hole_node[1]
    if device == 'cpu':
        base_program = CPU_Unpickler(open("%s.p" % base_program_name,
                                          "rb")).load()
    else:
        base_program = pickle.load(open("%s.p" % base_program_name, "rb"))

    curr_level = 0
    l = []
    traverse(base_program.submodules, l)
    # pprint(l)
    curr_program = base_program.submodules
    # print(program)
    # print(program.submodules)
    # pprint

    change_key(
        base_program.submodules, [], hole_node_ind,
        program.submodules["program"])  #should we just replace with program?
    log_and_print(print_program(base_program))
    # pickle.dump(base_program, open("neursym.p", "wb"))

    return execute_and_train(base_program, program, validset, trainset,
                             train_config, output_type, output_size, neural,
                             device)
コード例 #10
0
ファイル: evaluate.py プロジェクト: myracheng/pronear
    def evaluate(self):

        # assert os.path.isfile(self.program_path)
        base_program= CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()

        program_baby = CPU_Unpickler(open("%s.p" % self.baby_program_name, "rb")).load()
        data = base_program.submodules
        l = []
        traverse(data,l)
        # print(l)
        hole_node = l[self.hole_node_ind] #conditoin node
        # print(hole_node)
        change_key(base_program.submodules, hole_node[0], program_baby, hole_node[1]) 
        # pickle.dump(program, open("ite_1603639887.p", "wb"))
        base_output_type = base_program.program.output_type
        base_output_size = base_program.program.output_size
        # program = pickle.load(open(self.program_path, "rb"))
        with torch.no_grad():
            test_input, test_output = map(list, zip(*self.testset))
            true_vals = torch.flatten(torch.stack(test_output)).float().to(self.device)	
            predicted_vals = self.process_batch(base_program, test_input, base_output_type, base_output_size, self.device)
            
            metric, additional_params = label_correctness(predicted_vals, true_vals, num_labels=self.num_labels)
        log_and_print("F1 score achieved is {:.4f}".format(1 - metric))
コード例 #11
0
    def run_near(self): 
        # print(self.device)
        train_config = {
            'lr' : self.learning_rate,
            'neural_epochs' : self.neural_epochs,
            'symbolic_epochs' : self.symbolic_epochs,
            'optimizer' : optim.Adam,
            'lossfxn' : nn.CrossEntropyLoss(weight=self.loss_weight), #todo
            'evalfxn' : label_correctness,
            'num_labels' : self.num_labels
        }


        near_input_type = self.hole_node[0].input_type
        near_output_type = self.hole_node[0].output_type
        near_input_size = self.hole_node[0].input_size
        near_output_size = self.hole_node[0].output_size
        

        # Initialize program graph starting from trained NN
        program_graph = ProgramGraph(DSL_DICT, CUSTOM_EDGE_COSTS, near_input_type, near_output_type, near_input_size, near_output_size,
            self.max_num_units, self.min_num_units, self.max_num_children, self.max_depth, self.penalty, ite_beta=self.ite_beta)

        # Initialize algorithm
        algorithm = ASTAR_NEAR(frontier_capacity=self.frontier_capacity)
        best_programs = algorithm.run(self.timestamp, self.base_program_name, self.hole_node_ind,
            program_graph, self.batched_trainset, self.validset, train_config, self.device)
        best_program_str = []
        if self.algorithm == "rnn":
            # special case for RNN baseline
            best_program = best_programs
        else:
            # Print all best programs found
            log_and_print("\n")
            log_and_print("BEST programs found:")
            for item in best_programs:
                program_struct = print_program(item["program"], ignore_constants=True)
                program_info = "struct_cost {:.4f} | score {:.4f} | path_cost {:.4f} | time {:.4f}".format(
                    item["struct_cost"], item["score"], item["path_cost"], item["time"])
                best_program_str.append((program_struct, program_info))
                print_program_dict(item)
            best_program = best_programs[-1]["program"]

        
        # Save best programs
        f = open(os.path.join(self.save_path, "best_programs.txt"),"w")
        f.write( str(best_program_str) )
        f.close()

        self.program_path = os.path.join(self.save_path, "subprogram.p")
        pickle.dump(best_program, open(self.program_path, "wb"))

        self.full_path = os.path.join(self.save_path, "fullprogram.p")

        if self.device == 'cpu':
            base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()
        else:
            base_program = pickle.load(open("%s.p" % self.base_program_name, "rb"))

        curr_level = 0
        l = []
        traverse(base_program.submodules,l)
        curr_program = base_program.submodules
        change_key(base_program.submodules, [], self.hole_node_ind, best_program.submodules["program"])
        pickle.dump(base_program, open(self.full_path, "wb"))


        # Save parameters
        f = open(os.path.join(self.save_path, "parameters.txt"),"w")

        parameters = ['input_type', 'output_type', 'input_size', 'output_size', 'num_labels', 'neural_units', 'max_num_units', 
            'min_num_units', 'max_num_children', 'max_depth', 'penalty', 'ite_beta', 'train_valid_split', 'normalize', 'batch_size', 
            'learning_rate', 'neural_epochs', 'symbolic_epochs', 'lossfxn', 'class_weights', 'num_iter', 'num_f_epochs', 'algorithm', 
            'frontier_capacity', 'initial_depth', 'performance_multiplier', 'depth_bias', 'exponent_bias', 'num_mc_samples', 'max_num_programs', 
            'population_size', 'selection_size', 'num_gens', 'total_eval', 'mutation_prob', 'max_enum_depth', 'exp_id', 'base_program_name', 'hole_node_ind']
        for p in parameters:
            f.write( p + ': ' + str(self.__dict__[p]) + '\n' )
        f.close()
コード例 #12
0
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        if torch.cuda.is_available():
            self.device = 'cuda:0'
            print(self.device)
        else:
            self.device = 'cpu'
        
        self.loss_weight = torch.tensor([float(w) for w in self.class_weights.split(',')]).to(self.device)
        if self.exp_name == 'crim13':
            # load input data
            self.train_data = np.load(self.train_data)
            self.test_data = np.load(self.test_data)
            self.valid_data = None
            self.train_labels = np.load(self.train_labels)
            self.test_labels = np.load(self.test_labels)
            self.valid_labels = None
            assert self.train_data.shape[-1] == self.test_data.shape[-1] == self.input_size
            if self.valid_data is not None and self.valid_labels is not None:
                self.valid_data = np.load(self.valid_data)
                self.valid_labels = np.load(self.valid_labels)
                assert valid_data.shape[-1] == self.input_size

            self.batched_trainset, self.validset, self.testset = prepare_datasets(self.train_data, self.valid_data, self.test_data, self.train_labels, self.valid_labels, 
            self.test_labels, normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size)
        elif self.exp_name == 'mars_an':
            #### start mars
            train_datasets = self.train_data.split(",")
            train_raw_features = []
            train_raw_annotations = []
            for fname in train_datasets:
                data = np.load(fname, allow_pickle=True)
                train_raw_features.extend(data["features"])
                train_raw_annotations.extend(data["annotations"])
            test_data = np.load(self.test_data, allow_pickle=True)

            test_raw_features = test_data["features"]
            test_raw_annotations = test_data["annotations"]
            valid_raw_features = None
            valid_raw_annotations = None
            valid_labels = None
            # Check the # of features of the first frame of the first video
            assert len(train_raw_features[0][0]) == len(test_raw_features[0][0]) == self.input_size

            if self.valid_data is not None:
                valid_data = np.load(self.valid_data, allow_pickle=True)
                valid_raw_features = valid_data["features"]
                valid_raw_annotations = valid_data["annotations"]
                assert len(valid_raw_features[0][0]) == self.input_size

            behave_dict = read_into_dict('../near_code_7keypoints/data/MARS_data/behavior_assignments_3class.txt')
            # Reshape the data to trajectories of length 100
            train_features, train_labels = preprocess(train_raw_features, train_raw_annotations, self.train_labels, behave_dict)
            test_features, test_labels = preprocess(test_raw_features, test_raw_annotations, self.train_labels, behave_dict)
            if valid_raw_features is not None and valid_raw_annotations is not None:
                valid_features, valid_labels = preprocess(valid_raw_features, valid_raw_annotations, self.train_labels, behave_dict)
            self.batched_trainset, self.validset, self.testset  = prepare_datasets(train_features, valid_features, test_features,
                                            train_labels, valid_labels, test_labels,
                                    normalize=self.normalize, train_valid_split=self.train_valid_split, batch_size=self.batch_size)

                            ##### END MARS
        else:
            log_and_print('bad experiment name')
            return
        
        
        # self.fix()

        # add subprogram in
        # if self.device == 'cpu':
        #     self.base_program = CPU_Unpickler(open("%s/subprogram.p" % self.base_program_name, "rb")).load()
        # else:
        #     self.base_program = pickle.load(open("%s/subprogram.p" % self.base_program_name, "rb"))
        if self.device == 'cpu':
            self.base_program = CPU_Unpickler(open("%s.p" % self.base_program_name, "rb")).load()
        else:
            self.base_program = pickle.load(open("%s.p" % self.base_program_name, "rb"))
        
        base_folder = os.path.dirname(self.base_program_name)
        # self.weights_dict = np.load(os.path.join(base_folder,'weights.npy'), allow_pickle=True).item()
        
        
        data = self.base_program.submodules
        l = []
        traverse(data,l)
        log_and_print(l)
        # if self.hole_node_ind < 0:
            # self.hole_node_ind = len(l) + self.hole_node_ind
        #if negative, make it positive
        self.hole_node_ind %= len(l)

        self.hole_node = l[self.hole_node_ind]
        

        #for near on subtree
        self.curr_iter = 0
        self.program_path = None 


        if self.exp_id is not None:
            self.trial = self.exp_id
        if self.eval:
            self.evaluate()
        else:
            now = datetime.now()
            self.timestamp = str(datetime.timestamp(now)).split('.')[0][4:]
            log_and_print(self.timestamp)
            full_exp_name = "{}_{}_{}_{}".format(
            self.exp_name, self.algorithm, self.trial, self.timestamp) #unique timestamp for each near run
            self.save_path = os.path.join(self.save_dir, full_exp_name)
            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)
            init_logging(self.save_path)
            if self.neurh:

                log_and_print(self.base_program_name)
                self.neural_h()
            else:
                self.run_near()
                self.evaluate_final()