def calc_feature_importance(self, model, x_train, x_test, y_test, feature_names): test_df = pd.DataFrame(y_test) cols = test_df.columns.values.tolist() if len(cols)==1: target_col = cols[0] else: target_col = cols y_hat = model.predict(x_test) pred_df = Model.gen_pred_df(test_df, y_hat, target_col) base_score = Evaluator.eval_acc(pred_df) base_score num_samples = x_test.shape[0] scores = [] for i in range(len(feature_names)): x_perm = x_test.copy() perm = np.random.permutation(np.array(range(num_samples))) x_perm[:,i] = x_test[perm,i] y_hat_perm = model.predict(x_perm) pred_df = Model.gen_pred_df(test_df, y_hat_perm, target_col) col_score = Evaluator.eval_acc(pred_df) scores.append(base_score-col_score) feature_df = pd.DataFrame({'features':feature_names, 'score':scores}) feature_df = feature_df.sort_values('score',ascending=False) return feature_df
def calc_feature_importance(self, model, x_train, x_test, y_test, feature_names): test_df = pd.DataFrame(y_test) cols = test_df.columns.values.tolist() if len(cols) == 1: target_col = cols[0] else: target_col = cols y_hat = model.predict(x_test) pred_df = Model.gen_pred_df(test_df, y_hat, target_col) base_score = Evaluator.eval_acc(pred_df) base_score num_samples = x_test.shape[0] scores = [] for i in range(len(feature_names)): x_perm = x_test.copy() perm = np.random.permutation(np.array(range(num_samples))) x_perm[:, i] = x_test[perm, i] y_hat_perm = model.predict(x_perm) pred_df = Model.gen_pred_df(test_df, y_hat_perm, target_col) col_score = Evaluator.eval_acc(pred_df) scores.append(base_score - col_score) feature_df = pd.DataFrame({'features': feature_names, 'score': scores}) feature_df = feature_df.sort_values('score', ascending=False) return feature_df
def get_evaluator(self): distance_metric = EuclideanPairwiseDistance() scorer = CMCScore() evaluator = Evaluator(distance_metric, scorer, k_threshold=self.k_threshold) return evaluator
def setup_experiment(): # setup modules & hyperparameters XOR_experiment = Experiment() if True: XOR_model = TF_MLP_Model() if True: XOR_model.in_size = 2 XOR_model.hidden_sizes = [20, 20, 20, 20] XOR_model.out_size = 1 XOR_model.activation = "relu" XOR_trainer = TF_Trainer() if True: XOR_optimizer = TF_Adam_Optimizer() if True: XOR_optimizer.learning_rate = 0.01 XOR_optimizer.epsilon = 0.1 XOR_loss = TF_MSE_Loss() XOR_example_parser = XOR_Example_Parser() XOR_trainer.random_seed = 0 XOR_trainer.data_prefix = "data/xor" XOR_trainer.example_parser = XOR_Example_Parser() XOR_trainer.dataset_shuffle_buffer_size = 4 XOR_trainer.loss = TF_MSE_Loss() XOR_trainer.optimizer = XOR_optimizer XOR_trainer.load_checkpoint_dir = None XOR_trainer.start_epoch = 0 XOR_trainer.n_epochs = 50 XOR_trainer.batch_size = 4 XOR_trainer.log_period = 1 XOR_trainer.save_period = 50 XOR_evaluator = Evaluator() XOR_experiment.model = XOR_model XOR_experiment.trainer = XOR_trainer XOR_experiment.evaluator = XOR_evaluator return XOR_experiment
def class_pipeline(self): # ETL etl = self.get_etl_pipeline() one_hot_target = etl.target_is_categorical&(self.model_type=='nn') train_df,test_df = etl.run(one_hot_target=one_hot_target) if isinstance(etl.target,list): target_arr = etl.target else: target_arr = [etl.target] num_features = len([x for x in train_df.columns if x not in target_arr]) num_targets = len(target_arr) # Model Training model = self.get_model(num_features,num_targets) pred_df,shap_df = model.run(train_df,test_df,etl.target, calc_shap=True) # Model Evaluation acc = Evaluator.eval_acc(pred_df) print('Model Has Test Accuracy Of {}%'.format(acc*100)) return pred_df,shap_df
def test_elite(self, evaluator: Evaluator, seed: Union[None, List[Any]]) -> float: _, avg, _ = evaluator.test(self.get_elite(), seed) return sum(avg) / len(avg)
controller = LinearRegressionController(observer_dim, action_dim) observer = IdentityObserver(state_dim) designer = IdentityDesigner(observer_dim, 1.0) #TODO: should this also take in an action? agent = ParticleAgent([observer], controller, designer) actuators = {agent: [ForceActuator(i, 3) for i in range(3)]} environment = GravityEnvironment(tf.constant([0.0, 0.0, -9.81], dtype=dtype)) dt = 0.05 simulator = ParticleSimulator(environment, dt, [agent], actuators) problem_class = ProblemClass(simulator) evaluator = Evaluator(lambda x: -1.0 * tf.linalg.norm(x - tf.constant([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=dtype))) problem_class.add_evaluator(evaluator) problem_class.add_agent(agent) variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) optimizer = GradientDescentOptimizer(problem_class, sess, variables) optimizer.optimize(lr=1.0, max_iters=100, num_steps=100)
def class_pipeline(self, acc_percent_threshold, verbose): # ETL etl = self.get_etl_pipeline() if verbose: print("Using The " + etl.name + " Data Set") one_hot_target = (self.model_type == 'nn') train_df, val_df, test_df = etl.run(one_hot_target=one_hot_target, val_set=True) if isinstance(etl.target, list): target_arr = etl.target else: target_arr = [etl.target] fimp_object = self.get_fimp_object() features = [x for x in train_df.columns if x not in target_arr] new_features = features best_test_acc = 0 best_val_acc = 0 prev_test_acc = 0 val_acc = 0 feature_df = pd.DataFrame() while val_acc >= best_val_acc * acc_percent_threshold: num_features = len(new_features) num_targets = len(target_arr) # Construct Train & Test DFs cols = new_features + target_arr train_df = train_df[cols] val_df = val_df[cols] test_df = test_df[cols] # Model Training model = self.get_model(num_features, num_targets) if verbose: print("Training On " + model.name) pred_test_df, pred_val_df, feature_import_df = model.run( train_df, test_df, etl.target, val_df=val_df, fimp_object=fimp_object, verbose=verbose) # Model Evaluation val_acc = Evaluator.eval_acc(pred_val_df) test_acc = Evaluator.eval_acc(pred_test_df) best_val_acc = max(val_acc, best_val_acc) best_test_acc = max(test_acc, best_test_acc) cont_feature_selection = val_acc >= best_val_acc * acc_percent_threshold if verbose: print( 'Model With Features {} \nHas Test Accuracy Of {}% (Best={}%) - Continue={}\n' .format(new_features, test_acc * 100, best_test_acc * 100, cont_feature_selection)) feature_iter_df = pd.DataFrame({ 'features': [new_features], 'num_features': num_features, 'val_acc': val_acc, 'test_acc': test_acc, 'continue': cont_feature_selection }) feature_df = pd.concat([feature_df, feature_iter_df], sort=False) if cont_feature_selection: features = new_features prev_test_acc = test_acc new_features = feature_import_df[:-1][ 'features'].values.tolist() if len(new_features) == 0: break return feature_df
def __init__(self): self.logger = Logger(name='doppel').get_logger() self.vectorizer = Vectorizer() self.evaluator = Evaluator()