def train_epochs(args, model, optimizer, params, dicts): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join(MODEL_DIR, '_'.join([args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime())])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd() #just save things to where this script was called metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path,test_only, dicts, model_dir, args.samples, args.gpu, args.debug, args.quiet) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) #save metrics, model, params persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING if test_only: break if args.criterion in metrics_hist.keys(): if early_stop(metrics_hist, args.criterion, args.patience): #stop training, do tests on test and train sets, and then stop the script print("%s hasn't improved in %d epochs, early stopping..." % (args.criterion, args.patience)) test_only = True model = torch.load('%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch+1
def train_epochs(args, model, optimizer, params, dicts, struc_feats, struc_labels): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) # Converting to csr sparse matrix form X = struc_feats.tocsr() print(X.shape[0]) # Splitting into train, val and test --> need idx values passed as args X_train = X[:args.len_train] y_train = struc_labels[:args.len_train] X_val = X[args.len_train:args.len_train + args.len_val] X_test = X[args.len_train + args.len_val:args.len_train + args.len_val + args.len_test] # Standardizing features scaler = MaxAbsScaler().fit(X_train) X_train_std = scaler.transform(X_train) X_val_std = scaler.transform(X_val) X_test_std = scaler.transform(X_test) ################################ opt_thresh = 0.5 # Placeholder, only needed when predicting on test set #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join( MODEL_DIR, '_'.join([ args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime()) ])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd( ) #just save things to where this script was called start = time.time() metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path, test_only, dicts, model_dir, args.gpu, args.quiet, X_train_std, X_val_std, X_test_std, y_train, args.train_frac, args.test_frac, opt_thresh) end = time.time() print("\nEpoch Duration: " + str(end - start)) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) #save metrics, model, params --> opt_thresh is only needed when predicting on test set opt_thresh = persistence.save_everything( args, metrics_hist_all, model, model_dir, params, args.criterion ) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING if test_only: break if args.criterion in metrics_hist.keys(): if early_stop(metrics_hist, args.criterion, args.patience): #stop training, do tests on test and train sets, and then stop the script print("%s hasn't improved in %d epochs, early stopping..." % (args.criterion, args.patience)) test_only = True model = torch.load( '%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch + 1
def train_epochs(args, model, optimizer, params, dicts): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) opt_thresh = None # Placeholder, only needed when predicting on test set, updated below #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join(MODEL_DIR, '_'.join([args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime())])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd() #just save things to where this script was called start = time.time() metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path,test_only, dicts, model_dir, args.gpu, args.quiet, opt_thresh, args.obs_limit) end = time.time() print("\nEpoch Duration: " + str(end-start)) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) #save metrics, model, params persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING if test_only: break if (epoch == args.n_epochs - 2): opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) if args.criterion in metrics_hist.keys(): if (early_stop(metrics_hist, args.criterion, args.patience)): #stop training, do tests on test and train sets, and then stop the script print("%s hasn't improved in %d epochs, early stopping or just completed last epoch" % (args.criterion, args.patience)) test_only = True opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) model = torch.load('%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch+1
def train_epochs(args, model, optimizer, params, dicts, struc_feats, struc_labels): """ Main loop. does train and test """ metrics_hist = defaultdict(lambda: []) metrics_hist_te = defaultdict(lambda: []) metrics_hist_tr = defaultdict(lambda: []) test_only = args.test_model is not None print("\n\ntest_only: " + str(test_only)) # Converting to csr sparse matrix form X = struc_feats.tocsr() # Splitting into train, val and test --> need idx values passed as args X_train = X[:args.len_train] y_train = struc_labels[:args.len_train] X_val = X[args.len_train:args.len_train + args.len_val] X_test = X[args.len_train + args.len_val:args.len_train + args.len_val + args.len_test] # Standardizing features scaler = MaxAbsScaler().fit(X_train) X_train_std = scaler.transform(X_train) X_val_std = scaler.transform(X_val) X_test_std = scaler.transform(X_test) ################################ opt_thresh = None # Placeholder, only needed when predicting on test set, updated below #train for n_epochs unless criterion metric does not improve for [patience] epochs for epoch in range(args.n_epochs): #only test on train/test set on very last epoch if epoch == 0 and not args.test_model: model_dir = os.path.join( MODEL_DIR, '_'.join([ args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime()) ])) os.mkdir(model_dir) elif args.test_model: model_dir = os.getcwd( ) #just save things to where this script was called start = time.time() metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path, test_only, dicts, model_dir, args.gpu, args.quiet, X_train_std, X_val_std, X_test_std, y_train, args.train_frac, args.test_frac, opt_thresh, args.struc_aux_loss_wt, args.conv_aux_loss_wt) end = time.time() print("\nEpoch Duration: " + str(end - start)) # DISTRIBUTING results from metrics_all to respective dicts for name in metrics_all[0].keys(): metrics_hist[name].append(metrics_all[0][name]) for name in metrics_all[1].keys(): metrics_hist_te[name].append(metrics_all[1][name]) for name in metrics_all[2].keys(): metrics_hist_tr[name].append(metrics_all[2][name]) metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr) ### Writing to csv ### params['kernel_sizes'] = str(params['kernel_sizes']) params['val_auc'] = metrics_hist['auc'] params['val_f1'] = metrics_hist['f1_micro'] if test_only or (epoch == args.n_epochs - 1): params['test_f1'] = metrics_hist_te['f1_micro'][0] params['test_auc'] = metrics_hist_te['auc'][0] metric_df = pd.DataFrame(params) metric_df.to_csv(model_dir + "/results.csv", index=False) #save metrics, model, params persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) if test_only: break if (epoch == args.n_epochs - 2): opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) if args.criterion in metrics_hist.keys(): if (early_stop(metrics_hist, args.criterion, args.patience)): #stop training, do tests on test and train sets, and then stop the script print( "%s hasn't improved in %d epochs, early stopping or just completed last epoch" % (args.criterion, args.patience)) test_only = True opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax( metrics_hist[args.criterion])] print("Optimal f1 threshold: " + str(opt_thresh)) model = torch.load( '%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST return epoch + 1