def main(args): # create results directory if necessary if not os.path.isdir(args.results_dir): os.mkdir(args.results_dir) if args.k_start == -1: start = 0 else: start = args.k_start if args.k_end == -1: end = args.k else: end = args.k_end all_test_auc = [] all_val_auc = [] all_test_acc = [] all_val_acc = [] folds = np.arange(start, end) for i in folds: seed_torch(args.seed) train_dataset, val_dataset, test_dataset = dataset.return_splits( from_id=False, csv_path='{}/splits_{}.csv'.format(args.split_dir, i)) datasets = (train_dataset, val_dataset, test_dataset) results, test_auc, val_auc, test_acc, val_acc = train( datasets, i, args) all_test_auc.append(test_auc) all_val_auc.append(val_auc) all_test_acc.append(test_acc) all_val_acc.append(val_acc) #write results to pkl filename = os.path.join(args.results_dir, 'split_{}_results.pkl'.format(i)) save_pkl(filename, results) final_df = pd.DataFrame({ 'folds': folds, 'test_auc': all_test_auc, 'val_auc': all_val_auc, 'test_acc': all_test_acc, 'val_acc': all_val_acc }) if len(folds) != args.k: save_name = 'summary_partial_{}_{}.csv'.format(start, end) else: save_name = 'summary.csv' final_df.to_csv(os.path.join(args.results_dir, save_name))
def main(args): # create results directory if necessary if not os.path.isdir(args.results_dir): os.mkdir(args.results_dir) if args.k_start == -1: start = 0 else: start = args.k_start if args.k_end == -1: end = args.k else: end = args.k_end val_cindex = [] folds = np.arange(start, end) for i in folds: start = timer() seed_torch(args.seed) train_dataset, val_dataset = dataset.return_splits( from_id=False, csv_path='{}/splits_{}.csv'.format(args.split_dir, i)) print('training: {}, validation: {}'.format(len(train_dataset), len(val_dataset))) datasets = (train_dataset, val_dataset) if 'omic' in args.mode: args.omic_input_dim = train_dataset.genomic_features.shape[1] print("Genomic Dimension", args.omic_input_dim) val_df, cindex = train(datasets, i, args) val_cindex.append(cindex) #write results to pkl save_pkl( os.path.join(args.results_dir, 'split_train_{}_results.pkl'.format(i)), val_df) end = timer() print('Fold %d Time: %f seconds' % (i, end - start)) if len(folds) != args.k: save_name = 'summary_partial_{}_{}.csv'.format(start, end) else: save_name = 'summary.csv' results_df = pd.DataFrame({'folds': folds, 'val_cindex': val_cindex}) results_df.to_csv(os.path.join(args.results_dir, 'summary_latest.csv'))