예제 #1
0
    def calc_feature_importance(self, model, x_train, x_test, y_test, feature_names):
        test_df = pd.DataFrame(y_test)
        cols = test_df.columns.values.tolist()
        if len(cols)==1:
            target_col = cols[0]
        else:
            target_col = cols
        y_hat = model.predict(x_test)
        pred_df = Model.gen_pred_df(test_df, y_hat, target_col)
        base_score = Evaluator.eval_acc(pred_df)
        base_score

        num_samples = x_test.shape[0]

        scores = []
        for i in range(len(feature_names)):
            x_perm = x_test.copy()
            perm = np.random.permutation(np.array(range(num_samples)))
            x_perm[:,i] = x_test[perm,i]
            
            y_hat_perm = model.predict(x_perm)
            pred_df = Model.gen_pred_df(test_df, y_hat_perm, target_col)
            col_score = Evaluator.eval_acc(pred_df)
            scores.append(base_score-col_score)
        feature_df = pd.DataFrame({'features':feature_names, 'score':scores})
        feature_df = feature_df.sort_values('score',ascending=False)

        return feature_df
예제 #2
0
    def calc_feature_importance(self, model, x_train, x_test, y_test,
                                feature_names):
        test_df = pd.DataFrame(y_test)
        cols = test_df.columns.values.tolist()
        if len(cols) == 1:
            target_col = cols[0]
        else:
            target_col = cols
        y_hat = model.predict(x_test)
        pred_df = Model.gen_pred_df(test_df, y_hat, target_col)
        base_score = Evaluator.eval_acc(pred_df)
        base_score

        num_samples = x_test.shape[0]

        scores = []
        for i in range(len(feature_names)):
            x_perm = x_test.copy()
            perm = np.random.permutation(np.array(range(num_samples)))
            x_perm[:, i] = x_test[perm, i]

            y_hat_perm = model.predict(x_perm)
            pred_df = Model.gen_pred_df(test_df, y_hat_perm, target_col)
            col_score = Evaluator.eval_acc(pred_df)
            scores.append(base_score - col_score)
        feature_df = pd.DataFrame({'features': feature_names, 'score': scores})
        feature_df = feature_df.sort_values('score', ascending=False)

        return feature_df
예제 #3
0
    def get_evaluator(self):
        distance_metric = EuclideanPairwiseDistance()
        scorer = CMCScore()

        evaluator = Evaluator(distance_metric,
                              scorer,
                              k_threshold=self.k_threshold)

        return evaluator
예제 #4
0
def setup_experiment():
    # setup modules & hyperparameters
    XOR_experiment = Experiment()
    if True:
        XOR_model = TF_MLP_Model()
        if True:
            XOR_model.in_size = 2
            XOR_model.hidden_sizes = [20, 20, 20, 20]
            XOR_model.out_size = 1

            XOR_model.activation = "relu"

        XOR_trainer = TF_Trainer()
        if True:
            XOR_optimizer = TF_Adam_Optimizer()
            if True:
                XOR_optimizer.learning_rate = 0.01
                XOR_optimizer.epsilon = 0.1

            XOR_loss = TF_MSE_Loss()

            XOR_example_parser = XOR_Example_Parser()

            XOR_trainer.random_seed = 0

            XOR_trainer.data_prefix = "data/xor"
            XOR_trainer.example_parser = XOR_Example_Parser()
            XOR_trainer.dataset_shuffle_buffer_size = 4

            XOR_trainer.loss = TF_MSE_Loss()
            XOR_trainer.optimizer = XOR_optimizer

            XOR_trainer.load_checkpoint_dir = None
            XOR_trainer.start_epoch = 0

            XOR_trainer.n_epochs = 50
            XOR_trainer.batch_size = 4
            XOR_trainer.log_period = 1
            XOR_trainer.save_period = 50

        XOR_evaluator = Evaluator()

        XOR_experiment.model = XOR_model
        XOR_experiment.trainer = XOR_trainer
        XOR_experiment.evaluator = XOR_evaluator

    return XOR_experiment
예제 #5
0
    def class_pipeline(self):
        # ETL
        etl = self.get_etl_pipeline()
        one_hot_target = etl.target_is_categorical&(self.model_type=='nn')
        train_df,test_df = etl.run(one_hot_target=one_hot_target)
        
        if isinstance(etl.target,list):
            target_arr = etl.target
        else:
            target_arr = [etl.target]

        num_features = len([x for x in train_df.columns if x not in target_arr])
        num_targets = len(target_arr)

        # Model Training
        model = self.get_model(num_features,num_targets)
        pred_df,shap_df = model.run(train_df,test_df,etl.target, calc_shap=True)

        # Model Evaluation
        acc = Evaluator.eval_acc(pred_df)
        print('Model Has Test Accuracy Of {}%'.format(acc*100))

        return pred_df,shap_df
예제 #6
0
 def test_elite(self, evaluator: Evaluator,
                seed: Union[None, List[Any]]) -> float:
     _, avg, _ = evaluator.test(self.get_elite(), seed)
     return sum(avg) / len(avg)
예제 #7
0
    
    
    controller = LinearRegressionController(observer_dim, action_dim)
    observer = IdentityObserver(state_dim)
    designer = IdentityDesigner(observer_dim, 1.0) #TODO: should this also take in an action?
    
    agent = ParticleAgent([observer], controller, designer)
    actuators = {agent: [ForceActuator(i, 3) for i in range(3)]}
    environment = GravityEnvironment(tf.constant([0.0, 0.0, -9.81], dtype=dtype))
    
    dt = 0.05
    simulator = ParticleSimulator(environment, dt, [agent], actuators)

    problem_class = ProblemClass(simulator)
    
    evaluator = Evaluator(lambda x: -1.0 * tf.linalg.norm(x - tf.constant([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=dtype)))
    problem_class.add_evaluator(evaluator)
    problem_class.add_agent(agent)
    
    
    variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    
  
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    optimizer = GradientDescentOptimizer(problem_class, sess, variables)
    
    optimizer.optimize(lr=1.0, max_iters=100, num_steps=100)

    
    
    def class_pipeline(self, acc_percent_threshold, verbose):
        # ETL
        etl = self.get_etl_pipeline()
        if verbose:
            print("Using The " + etl.name + " Data Set")
        one_hot_target = (self.model_type == 'nn')
        train_df, val_df, test_df = etl.run(one_hot_target=one_hot_target,
                                            val_set=True)

        if isinstance(etl.target, list):
            target_arr = etl.target
        else:
            target_arr = [etl.target]

        fimp_object = self.get_fimp_object()

        features = [x for x in train_df.columns if x not in target_arr]
        new_features = features
        best_test_acc = 0
        best_val_acc = 0
        prev_test_acc = 0
        val_acc = 0

        feature_df = pd.DataFrame()
        while val_acc >= best_val_acc * acc_percent_threshold:
            num_features = len(new_features)
            num_targets = len(target_arr)

            # Construct Train & Test DFs
            cols = new_features + target_arr
            train_df = train_df[cols]
            val_df = val_df[cols]
            test_df = test_df[cols]

            # Model Training
            model = self.get_model(num_features, num_targets)
            if verbose:
                print("Training On " + model.name)
            pred_test_df, pred_val_df, feature_import_df = model.run(
                train_df,
                test_df,
                etl.target,
                val_df=val_df,
                fimp_object=fimp_object,
                verbose=verbose)

            # Model Evaluation
            val_acc = Evaluator.eval_acc(pred_val_df)
            test_acc = Evaluator.eval_acc(pred_test_df)
            best_val_acc = max(val_acc, best_val_acc)
            best_test_acc = max(test_acc, best_test_acc)

            cont_feature_selection = val_acc >= best_val_acc * acc_percent_threshold

            if verbose:
                print(
                    'Model With Features {} \nHas Test Accuracy Of {}% (Best={}%) - Continue={}\n'
                    .format(new_features, test_acc * 100, best_test_acc * 100,
                            cont_feature_selection))

            feature_iter_df = pd.DataFrame({
                'features': [new_features],
                'num_features': num_features,
                'val_acc': val_acc,
                'test_acc': test_acc,
                'continue': cont_feature_selection
            })
            feature_df = pd.concat([feature_df, feature_iter_df], sort=False)
            if cont_feature_selection:
                features = new_features
                prev_test_acc = test_acc
                new_features = feature_import_df[:-1][
                    'features'].values.tolist()
                if len(new_features) == 0:
                    break
        return feature_df
예제 #9
0
 def __init__(self):
     self.logger = Logger(name='doppel').get_logger()
     self.vectorizer = Vectorizer()
     self.evaluator = Evaluator()