Beispiel #1
0
def train_epochs(args, model, optimizer, params, dicts):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None
    
    print("\n\ntest_only: " + str(test_only))
        
    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):
        
        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(MODEL_DIR, '_'.join([args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime())]))
            os.mkdir(model_dir) 
            
        elif args.test_model:
            
            model_dir = os.getcwd() #just save things to where this script was called
            
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path,test_only, dicts, model_dir, 
                                                  args.samples, args.gpu, args.debug, args.quiet)

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING

        if test_only:
            break

        if args.criterion in metrics_hist.keys():
            if early_stop(metrics_hist, args.criterion, args.patience):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping..." % (args.criterion, args.patience))
                test_only = True
                model = torch.load('%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST
                
    return epoch+1
def train_epochs(args, model, optimizer, params, dicts, struc_feats,
                 struc_labels):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None

    print("\n\ntest_only: " + str(test_only))

    # Converting to csr sparse matrix form
    X = struc_feats.tocsr()

    print(X.shape[0])

    # Splitting into train, val and test --> need idx values passed as args
    X_train = X[:args.len_train]
    y_train = struc_labels[:args.len_train]

    X_val = X[args.len_train:args.len_train + args.len_val]
    X_test = X[args.len_train + args.len_val:args.len_train + args.len_val +
               args.len_test]

    # Standardizing features
    scaler = MaxAbsScaler().fit(X_train)
    X_train_std = scaler.transform(X_train)
    X_val_std = scaler.transform(X_val)
    X_test_std = scaler.transform(X_test)
    ################################

    opt_thresh = 0.5  # Placeholder, only needed when predicting on test set

    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):

        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model, args.desc,
                    time.strftime('%b_%d_%H:%M', time.gmtime())
                ]))
            os.mkdir(model_dir)

        elif args.test_model:

            model_dir = os.getcwd(
            )  #just save things to where this script was called

        start = time.time()
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs,
                                args.batch_size, args.data_path, test_only,
                                dicts, model_dir, args.gpu, args.quiet,
                                X_train_std, X_val_std, X_test_std, y_train,
                                args.train_frac, args.test_frac, opt_thresh)
        end = time.time()
        print("\nEpoch Duration: " + str(end - start))

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params --> opt_thresh is only needed when predicting on test set
        opt_thresh = persistence.save_everything(
            args, metrics_hist_all, model, model_dir, params, args.criterion
        )  # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING

        if test_only:
            break

        if args.criterion in metrics_hist.keys():
            if early_stop(metrics_hist, args.criterion, args.patience):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping..." %
                      (args.criterion, args.patience))
                test_only = True
                model = torch.load(
                    '%s/model_best_%s.pth' %
                    (model_dir,
                     args.criterion))  # LOADING BEST MODEL FOR FINAL TEST

    return epoch + 1
Beispiel #3
0
def train_epochs(args, model, optimizer, params, dicts):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None
    
    print("\n\ntest_only: " + str(test_only))
    
    opt_thresh = None # Placeholder, only needed when predicting on test set, updated below
        
    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):
        
        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(MODEL_DIR, '_'.join([args.model, args.desc, time.strftime('%b_%d_%H:%M', time.gmtime())]))
            os.mkdir(model_dir) 
            
        elif args.test_model:
            
            model_dir = os.getcwd() #just save things to where this script was called
        
        start = time.time()
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs, args.batch_size, args.data_path,test_only, dicts, model_dir, 
                                                  args.gpu, args.quiet, opt_thresh, args.obs_limit)
        end = time.time()
        print("\nEpoch Duration: " + str(end-start))

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        #save metrics, model, params
        persistence.save_everything(args, metrics_hist_all, model, model_dir, params, args.criterion) # SHOULD SAVE MODEL PARAMS AT EACH EPOCH, BELIEVE IS HAPPENING

        if test_only:
            break

        if (epoch == args.n_epochs - 2):
            opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(metrics_hist[args.criterion])]
            print("Optimal f1 threshold: " + str(opt_thresh))

        if args.criterion in metrics_hist.keys():
            if (early_stop(metrics_hist, args.criterion, args.patience)):
                #stop training, do tests on test and train sets, and then stop the script
                print("%s hasn't improved in %d epochs, early stopping or just completed last epoch" % (args.criterion, args.patience))
                test_only = True
                opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(metrics_hist[args.criterion])]
                print("Optimal f1 threshold: " + str(opt_thresh))
                model = torch.load('%s/model_best_%s.pth' % (model_dir, args.criterion)) # LOADING BEST MODEL FOR FINAL TEST
                
    return epoch+1
def train_epochs(args, model, optimizer, params, dicts, struc_feats,
                 struc_labels):
    """
        Main loop. does train and test
    """
    metrics_hist = defaultdict(lambda: [])
    metrics_hist_te = defaultdict(lambda: [])
    metrics_hist_tr = defaultdict(lambda: [])

    test_only = args.test_model is not None

    print("\n\ntest_only: " + str(test_only))

    # Converting to csr sparse matrix form
    X = struc_feats.tocsr()

    # Splitting into train, val and test --> need idx values passed as args
    X_train = X[:args.len_train]
    y_train = struc_labels[:args.len_train]

    X_val = X[args.len_train:args.len_train + args.len_val]
    X_test = X[args.len_train + args.len_val:args.len_train + args.len_val +
               args.len_test]

    # Standardizing features
    scaler = MaxAbsScaler().fit(X_train)
    X_train_std = scaler.transform(X_train)
    X_val_std = scaler.transform(X_val)
    X_test_std = scaler.transform(X_test)
    ################################

    opt_thresh = None  # Placeholder, only needed when predicting on test set, updated below

    #train for n_epochs unless criterion metric does not improve for [patience] epochs
    for epoch in range(args.n_epochs):

        #only test on train/test set on very last epoch
        if epoch == 0 and not args.test_model:
            model_dir = os.path.join(
                MODEL_DIR, '_'.join([
                    args.model, args.desc,
                    time.strftime('%b_%d_%H:%M', time.gmtime())
                ]))
            os.mkdir(model_dir)

        elif args.test_model:

            model_dir = os.getcwd(
            )  #just save things to where this script was called

        start = time.time()
        metrics_all = one_epoch(model, optimizer, epoch, args.n_epochs,
                                args.batch_size, args.data_path, test_only,
                                dicts, model_dir, args.gpu, args.quiet,
                                X_train_std, X_val_std, X_test_std, y_train,
                                args.train_frac, args.test_frac, opt_thresh,
                                args.struc_aux_loss_wt, args.conv_aux_loss_wt)
        end = time.time()
        print("\nEpoch Duration: " + str(end - start))

        # DISTRIBUTING results from metrics_all to respective dicts
        for name in metrics_all[0].keys():
            metrics_hist[name].append(metrics_all[0][name])
        for name in metrics_all[1].keys():
            metrics_hist_te[name].append(metrics_all[1][name])
        for name in metrics_all[2].keys():
            metrics_hist_tr[name].append(metrics_all[2][name])
        metrics_hist_all = (metrics_hist, metrics_hist_te, metrics_hist_tr)

        ### Writing to csv ###
        params['kernel_sizes'] = str(params['kernel_sizes'])
        params['val_auc'] = metrics_hist['auc']
        params['val_f1'] = metrics_hist['f1_micro']

        if test_only or (epoch == args.n_epochs - 1):
            params['test_f1'] = metrics_hist_te['f1_micro'][0]
            params['test_auc'] = metrics_hist_te['auc'][0]

        metric_df = pd.DataFrame(params)
        metric_df.to_csv(model_dir + "/results.csv", index=False)

        #save metrics, model, params
        persistence.save_everything(args, metrics_hist_all, model, model_dir,
                                    params, args.criterion)

        if test_only:
            break

        if (epoch == args.n_epochs - 2):
            opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                metrics_hist[args.criterion])]
            print("Optimal f1 threshold: " + str(opt_thresh))

        if args.criterion in metrics_hist.keys():
            if (early_stop(metrics_hist, args.criterion, args.patience)):
                #stop training, do tests on test and train sets, and then stop the script
                print(
                    "%s hasn't improved in %d epochs, early stopping or just completed last epoch"
                    % (args.criterion, args.patience))
                test_only = True
                opt_thresh = metrics_hist["opt_f1_thresh_micro"][np.nanargmax(
                    metrics_hist[args.criterion])]
                print("Optimal f1 threshold: " + str(opt_thresh))
                model = torch.load(
                    '%s/model_best_%s.pth' %
                    (model_dir,
                     args.criterion))  # LOADING BEST MODEL FOR FINAL TEST

    return epoch + 1