예제 #1
0
 def get_validation_results(self):
     val_results = self.model.evals_result()
     train = gu.create_data_frame(val_results['validation_0'])
     train = train.add_prefix('train_')
     test = gu.create_data_frame(val_results['validation_1'])
     test = test.add_prefix('test_')
     train_test = pd.concat([train, test], axis=1)
     train_test.index.name = 'epoch'
     train_test.reset_index(level=0, inplace=True)
     train_test['model'] = self.atomic_metrics['model']
     train_test['ts'] = self.atomic_metrics['ts']
     return train_test
예제 #2
0
 def get_roc_values(self):
     fpr, tpr, _ = metrics.roc_curve(self.y_actual,
                                     self.y_predicted_prob_one)
     roc_df = gu.create_data_frame({'fpr': fpr, 'tpr': tpr})
     roc_df.index.name = 'index'
     roc_df.reset_index(level=0, inplace=True)
     roc_df['model'] = self.atomic_metrics['model']
     roc_df['ts'] = self.atomic_metrics['ts']
     self.plots['roc'] = roc_df
예제 #3
0
 def confusion_matrix(self):
     conf_df = gu.create_data_frame({
         'actual': self.y_actual,
         'predicted': self.y_predicted
     })
     conf_df = conf_df.groupby(['actual', 'predicted'],
                               as_index=False).size()
     conf_df.rename(columns={'size': 'count'})
     conf_df['model'] = self.atomic_metrics['model']
     conf_df['ts'] = self.atomic_metrics['ts']
     return conf_df
예제 #4
0
 def get_pr_values(self):
     precision, recall, _ = metrics.precision_recall_curve(
         self.y_actual, self.y_predicted_prob_one)
     pr_df = gu.create_data_frame({
         'precision': precision,
         'recall': recall
     })
     pr_df.index.name = 'index'
     pr_df.reset_index(level=0, inplace=True)
     pr_df['model'] = self.atomic_metrics['model']
     pr_df['ts'] = self.atomic_metrics['ts']
     self.plots['pr'] = pr_df
예제 #5
0
 def get_prob_values(self):
     cols = ['predicted', 'actual', 'prob_class_1']
     data = np.column_stack(
         [self.y_predicted, self.y_actual, self.y_predicted_prob_one])
     prob_df = gu.create_data_frame(data=data, columns=cols)
     prob_df['classification'] = np.where(
         prob_df['predicted'] == prob_df['actual'], 'Positives',
         'Negatives')
     prob_df.index.name = 'index'
     prob_df.reset_index(level=0, inplace=True)
     prob_df['model'] = self.atomic_metrics['model']
     prob_df['ts'] = self.atomic_metrics['ts']
     self.plots['prob'] = prob_df
예제 #6
0
 def get_features_corr_matrix(self, matrix):
     x_pd= gu.create_data_frame(matrix)
     corr_mtx= x_pd.corr()
     ######################################################
     #should be removed when we have the feature names
     corr_mtx= corr_mtx.add_prefix('f')
     ######################################################
     corr_mtx= corr_mtx.melt(ignore_index= False)
     corr_mtx= corr_mtx.rename(columns={"variable": "feature_1"})
     corr_mtx.index.name ='feature'
     corr_mtx.reset_index(level=0, inplace=True)
     corr_mtx['model']= self.atomic_metrics['model']
     corr_mtx['ts']= self.atomic_metrics['ts']
     self.feature_metrics['corr_matrix'] = corr_mtx
예제 #7
0
def test_create_data_frame():
    """Test function for create_data_frame() function in generalutils
    """
    test_data = {
        'col1': [1, 2, 3],
        'col2': ['abc', 'def', 'ghi'],
        'col3':[1.0, 2.0, 3.0]
    }
    df = gu.create_data_frame(test_data)
    
    assert isinstance(df,pd.DataFrame)
    assert df['col1'].dtypes.name == 'int64'
    assert df['col2'].dtypes.name == 'object'
    assert df['col3'].dtypes.name == 'float64'
예제 #8
0
 def get_importance(self):
     importance_types = [
         'weight', 'gain', 'cover', 'total_gain', 'total_cover'
     ]
     importance = {}
     for imp_type in importance_types:
         try:
             importance[imp_type] = self.booster.get_score(
                 importance_type=imp_type)
         except Exception as e:
             print(e)
     importance_df = gu.create_data_frame(importance)
     importance_df.index.name = 'feature'
     importance_df.reset_index(level=0, inplace=True)
     importance_df['model'] = self.atomic_metrics['model']
     importance_df['ts'] = self.atomic_metrics['ts']
     return importance_df