def try_on_v(self,model, X_test, y_test,k): pred = model.predict(np.array(X_test)) accuracy, sensitivity, specificity ,auc,FCORE,MCC = utils.calculate_metric(y_test, pred) # self.file.write("accuracy:" + str(k) + str(accuracy) + "\n") # self.file.write("sensitivity:" + str(k) + str(sensitivity) + "\n") # self.file.write("specificity:" + str(k) + str(specificity) + "\n") self.sheet.write(k, 8, str(accuracy)) self.sheet.write(k, 9, str(sensitivity)) self.sheet.write(k, 10, str(specificity)) self.sheet.write(k, 11, str(auc)) self.sheet.write(k, 12, str(FCORE)) self.sheet.write(k, 13, str(MCC))
def try_on_test(self,model, X_test, y_test,k): pred = model.predict(np.array(X_test)) accuracy, sensitivity, specificity ,auc,F_SCORE,mcc = utils.calculate_metric(y_test, pred) # self.file.write("accuracy:" + str(k) + str(accuracy) + "\n") # self.file.write("sensitivity:" + str(k) + str(sensitivity) + "\n") # self.file.write("specificity:" + str(k) + str(specificity) + "\n") self.sheet.write(k, 1, str(accuracy)) self.sheet.write(k, 2, str(sensitivity)) self.sheet.write(k, 3, str(specificity)) self.sheet.write(k, 4, str(auc)) self.sheet.write(k, 5, str(F_SCORE)) self.sheet.write(k, 6, str(mcc)) if (self.accuracy_max < accuracy): model.save('./my_model.h5') self.accuracy_max = accuracy
test_loader = cifar_loader(batch_test) if torch.cuda.is_available(): # releasing unnecessary memory in GPU torch.cuda.empty_cache() # ----------------- TESTING ----------------- test_losses = 0 precision, recall, f1, accuracy = [], [], [], [] with torch.no_grad(): for i, data in enumerate(test_loader): X, y = data[0].to(device), data[1].to(device) outputs = net(X) # this get's the prediction from the network test_losses += criterion(outputs, y) predicted_classes = torch.max( outputs, 1)[1] # get class from network's prediction # calculate P/R/F1/A metrics for batch for acc, metric in zip( (precision, recall, f1, accuracy), (precision_score, recall_score, f1_score, accuracy_score)): acc.append( calculate_metric(metric, y.cpu(), predicted_classes.cpu())) # print( # f"\nEpoch {epoch + 1}/{num_epochs}, training loss: {epoch / len(train_loader)}, validation loss: {test_losses / len(test_loader)}") print_scores(precision, recall, f1, accuracy, len(test_loader))
if(filter_data == "Y" or filter_data == "N"): break else: print("Enter valid input (Y/N)") continue data_filter = False min_edits = 0 if(filter_data == "Y"): data_filter = True while True: try: min_edits = int(input("Enter minimum number of edits required for each subject (positive integer): ")) if min_edits >= 1: break else: print("Please enter a positive integer") except ValueError: print("Give a positive integer") continue main_table_df, deadline_table_df, codestates_table_df = edit_data_filter.load_main_table(read_path, data_filter, min_edits) checker = utils.check_attributes(main_table_df, ["SubjectID", "Order", "EventType", "AssignmentID", "ParentEventID", "EditType"]) checker2 = utils.check_attributes(deadline_table_df, ["AssignmentID", "X-Deadline"]) checker3 = utils.check_attributes(codestates_table_df, ["CodeStateID", "Code"]) if checker and (checker2 and checker3): eo_map = utils.calculate_metric(main_table_df, calculate_eo, codestates_table_df) out.info(eo_map) utils.write_metric_map("EarlyandOften", eo_map, write_path)
num_iteration=clf.best_iteration) / kf.n_splits #store feature importance for this fold fold_importance_df = pd.DataFrame() fold_importance_df['feats'] = train.columns fold_importance_df['importance'] = clf.feature_importance( importance_type='gain') feature_importance_df = pd.concat( [feature_importance_df, fold_importance_df], axis=0) #average and get the mean feature importance feature_importance_df = feature_importance_df.groupby( 'feats')['importance'].mean().reset_index().sort_values( by='importance', ascending=False).head(30) logger.debug(f'feature importance for {cur_type}') logger.debug(feature_importance_df) #log val score and feature importance cv_score = calculate_metric(oof, cur_type_target, np.full(len(cur_type_train), cur_type)) logger.debug(f'CV score for {cur_type}: {cv_score}') cv_score_list.append(cv_score) #make submission file if not is_debug_mode: cv_score = sum(cv_score_list) / len(cv_score_list) sub = feather.read_dataframe('data/input/sample_submission.feather') sub['scalar_coupling_constant'] = predictions sub.to_csv(f'data/output/sub_{now}_{cv_score:.3f}.csv', index=False) logger.debug(f'data/output/sub_{now}_{cv_score:.3f}.csv')
val_data = lgb.Dataset(x_val, label=y_val, categorical_feature=categorical_cols) clf = lgb.train(params, train_data, NUM_ROUNDS, valid_sets=[train_data, val_data], verbose_eval=False, early_stopping_rounds=100, callbacks=callbacks) val_pred = clf.predict(x_val, num_iteration=clf.best_iteration) predictions[cur_type_idx_test] = clf.predict(test.iloc[cur_type_idx_test], num_iteration=clf.best_iteration) #store feature importance for this fold feature_importance_df = pd.DataFrame() feature_importance_df['feats'] = train.columns feature_importance_df['importance'] = clf.feature_importance(importance_type='gain') feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False).head(30) logger.debug(f'feature importance for {cur_type}') logger.debug(feature_importance_df) #log val score and feature importance val_score = calculate_metric(val_pred, y_val, np.full(len(y_val), cur_type)) logger.debug(f'val score for {cur_type}: {val_score}') val_score_list.append(val_score) #make submission file if not is_debug_mode: val_score = sum(val_score_list) / len(val_score_list) sub = feather.read_dataframe('data/input/sample_submission.feather') sub['scalar_coupling_constant'] = predictions sub.to_csv(f'data/output/sub_{now}_{val_score:.3f}.csv', index=False) logger.debug(f'data/output/sub_{now}_{val_score:.3f}.csv')
verbose_eval=False, early_stopping_rounds=100, callbacks=callbacks) val_pred = clf.predict(val_cur_type, num_iteration=clf.best_iteration) #store feature importance for this fold feature_importance_df = pd.DataFrame() feature_importance_df['feats'] = train.columns feature_importance_df['importance'] = clf.feature_importance( importance_type='gain') predictions[cur_type_idx_test] = clf.predict( test.iloc[cur_type_idx_test], num_iteration=clf.best_iteration) #log val score and feature importance val_score = calculate_metric(val_pred, target_val, np.full(len(val_cur_type), cur_type)) logger.debug(f'Val score for {cur_type}: {val_score}') val_score_list.append(val_score) #feature_importance_df = feature_importance_df.groupby('feats')['importance'].mean().reset_index() feature_importance_df = feature_importance_df.sort_values( by='importance', ascending=False).head(30) logger.debug(f'feature importance for {cur_type}') logger.debug(feature_importance_df) #make submission file if not is_debug_mode: val_score = sum(val_score_list) / len(val_score_list) sub = feather.read_dataframe('data/input/sample_submission.feather') sub['scalar_coupling_constant'] = predictions sub.to_csv(f'data/output/sub_{now}_{val_score:.3f}.csv', index=False)