Example #1
0
def do_test(model, data, measures):
    start_time = time.time()
    input_size = len(_g.vocab)

    if not _g.args.quiet:
        print('Testing...')

    criterion = nn.NLLLoss(ignore_index=_g.vocab.stoi[_g.padding_symbol])

    losses = None
    so_far = 0

    try:
        for i, batch in zip(range(len(data)), data):  # TODO necessary for now to do it this way
            loss = _t.evaluate(model, criterion, _u.to_one_hot(batch.before, input_size), batch.after, measures)
            loss = loss.unsqueeze(dim=1)
            losses = loss if losses is None else torch.cat((losses, loss), dim=1)
            so_far = i+1

        if not _g.args.quiet:
            print('Testing done successfully')

    except KeyboardInterrupt:
        print('\nExiting earlier than expected. Wait a moment!')

    losses = losses.mean(dim=1)
    text = 'Test {} elements in {}.'.format(so_far * data.batch_size, _u.pretty_print_time(time.time() - start_time))
    eval_measures = _u.to_builtin({n: (x,y) for n,x,y in
                                   zip(['loss'] + list(measures.keys()), losses[::2], losses[1::2])})

    for i, j in eval_measures.items():
        text += ' ' + i + ' {:5.6f}({:5.6f}).'.format(j[0], j[1])
    if not _g.args.quiet:
        print(text)
Example #2
0
def train_transfer(output, models=['linear', 'tree', 'forest', 'svr']):
    data = get_predictions(output)
    print('Primary predictions loaded.')

    [X, y, X_train, y_train, X_test, y_test, X_scaled, y_scaled,
     X_train_scaled, y_train_scaled, X_test_scaled, y_scaler] \
        = pre.split_pipeline(data, output)
    print('Data preprocessed.')

    regressors = tra.build(X_train, y_train, X_train_scaled, y_train_scaled,
                           models)

    best_regressor = tra.evaluate(regressors, X_train, y_train, X_train_scaled,
                                  y_train_scaled, X_test, y_test,
                                  X_test_scaled, y_scaler)
    print('Regressors evaluated. Best regressor is:\n' + str(best_regressor))

    if 'SVR' in str(best_regressor):
        best_regressor.fit(X_scaled, y_scaled)
    else:
        best_regressor.fit(X, y)
    print('Regressor fit.')

    tra.print_results(best_regressor, X, X_scaled, y, y_scaler)

    tra.save(best_regressor, X, output + '_transfer')
    print('Regressor saved.')

    tra.upload(output + '_transfer')
    print('Regressor uploaded.')
Example #3
0
def main():
    args = get_training_args()
    # Logger
    log = Logger(file=args.log_file, verbose=args.verbose, flush=True)
    # Prepare data
    batches, dic = get_dataset(args, log)
    # Create model
    network = instantiate_network(args, dic, log)
    # Train model
    train(args, network, batches["train"], batches["dev"], log)
    # Test
    test_accuracy = evaluate(args, network, batches["test"])
    # Print final result
    log(f"Test accuracy: {test_accuracy*100:.2f}%")
    # Explain if the model is SoPa
    if args.model_type == "sopa":
        explain(args, network, batches["explain"], log)
Example #4
0
def test_model(dataroot, model_path, batch_size, device):
    trainset, validset, validset_subjects, class_weights = get_dataset(
        dataroot, folds_train=(0, 1, 2), folds_valid=(3, ))
    class_weights = class_weights.to(device)
    valid_loader = DataLoader(validset,
                              batch_size=batch_size,
                              num_workers=6,
                              shuffle=False)

    model = get_model()
    sd = torch.load(model_path)
    # PBT saves the model as part of a dict that also contains other information about the individual
    if 'model' in sd and 'sd' in sd['model']:
        sd = sd['model']['sd']
    model.load_state_dict(sd)
    model.to(device)

    valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader,
                                                  class_weights, device)

    print(f"Results for model {model_path}")
    print(f"valid_loss={valid_loss:.4e}")
    print(f"auc={auc:.4f}")
    print(f"prec={prec:.4f}")
    print(f"rec={rec:.4f}")
    print(f"f1={f1:.4f}")
    print(f"cm=\n{cm}")

    return {
        'valid_loss': valid_loss,
        'cm': cm,
        'auc': auc,
        'prec': prec,
        'rec': rec,
        'f1': f1
    }
        return os.path.realpath("project_git_repo/cpd35-clustering-demo")
    else:
        return os.getcwd()


PROJECT_DIR = find_project_dir()
SCRIPT_DIR = os.path.join(PROJECT_DIR, "assets/jupyterlab")
DATA_DIR = os.path.join(PROJECT_DIR, "assets/data_asset")
sys.path.append(os.path.normpath(SCRIPT_DIR))
print(SCRIPT_DIR)
print(DATA_DIR)

from training import train, evaluate, clusterings

reference_df = pd.read_csv(os.path.join(DATA_DIR, "credit_risk_reference.csv"))
input_df = reference_df.drop(['Risk'], axis=1)

# Training models and select winning one

results = []

for (clustering_name, clustering_op) in clusterings:
    print(clustering_name)
    model = train(input_df, clustering_name, clustering_op)
    result = evaluate(reference_df, clustering_op)
    print("---")
    results.append(result)

best_score_idx = np.argmax(r['v_measure'] for r in results)
print("The winner is: '{}' with V-measure: {}!".format(
    clusterings[best_score_idx][0], results[best_score_idx]['v_measure']))
Example #6
0
def main(config_file):
    # read from config
    local_config = __import__(config_file)
    model_name = local_config.INPUTS['MODEL']
    model = getattr(__import__('.models', fromlist=[model_name]), model_name)
    batch_size = local_config.INPUTS['BATCHSIZE']
    optimizer = local_config.INPUTS['OPTIMIZER']
    num_epochs = local_config.INPUTS['EPOCHS']
    no_classes = local_config.INPUTS['CLASSES']
    learning_rate = local_config.INPUTS['LR']

    # logging
    start_time = time.time()
    date = time.strftime('%d-%m-%Y-%H-%M-%S', time.localtime())
    log_path = f'./birdsong/run_log/{model_name}_{date}'
    state_fname, log_fname, summ_tensor_board = logger.create_log(log_path)
    writer = SummaryWriter(str(summ_tensor_board))

    # Upsampling
    train_df = upsample_df(TRAIN, 400)

    # Augmentation
    noiser = SoundscapeNoise('storage/noise_slices', scaling=1)

    ds_train = SpectralDataset(train_df,
                               INPUT_DIR,
                               enhancement_func=exponent,
                               augmentation_func=noiser)
    ds_test = SpectralDataset(TEST, INPUT_DIR, enhancement_func=exponent)

    dl_train = DataLoader(ds_train,
                          batch_size,
                          num_workers=4,
                          pin_memory=PIN,
                          shuffle=True)
    dl_test = DataLoader(ds_test,
                         batch_size,
                         num_workers=4,
                         pin_memory=PIN,
                         shuffle=True)
    print('Dataloaders initialized')

    time_axis = ds_test.shape[1]
    freq_axis = ds_test.shape[0]
    net = model(time_axis=time_axis,
                freq_axis=freq_axis,
                no_classes=no_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # local vars
    best_acc = 0
    for epoch in range(num_epochs):
        train(net, dl_train, epoch, optimizer, criterion, DEVICE)

        train_stats, train_conf_matrix = evaluate(net, dl_train, criterion,
                                                  no_classes, DEVICE)
        print(
            f'Train Loss: {train_stats[0]:.5f}, Train Acc: {train_stats[1]:.5f}'
        )
        test_stats, test_conf_matrix = evaluate(net, dl_test, criterion,
                                                no_classes, DEVICE)
        print(f'Test Loss: {test_stats[0]:.5f}, Test Acc: {test_stats[1]:.5f}')

        is_best = test_stats[1] > best_acc
        best_acc = max(test_stats[1], best_acc)
        print('Best Accuracy: {:.5f}'.format(best_acc))

        logger.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'best_accuracy': best_acc
            },
            is_best,
            filename=state_fname)
        """
        print('Making images')
        img_path = log_path + '/train' + '_' + str(epoch) + '.png'
        img = plot_conf_mat(img_path, train_conf_matrix)

        img_path = log_path + '/test' + '_' + str(epoch) + '.png'
        img = plot_conf_mat(img_path, test_conf_matrix)
        """

        print('Writing logs')
        logger.write_summary(writer, epoch, train_stats, test_stats)
        logger.dump_log_txt(date, start_time, local_config, train_stats,
                            test_stats, best_acc, log_fname)

        print('Done for now')

    writer.close()
    print('Finished Training')
Example #7
0
epochs = 100

train_losses = []
val_losses = []

for epoch in range(epochs):    # how many times to go through whole dataset?
    # ----- PURELY FOR PLOTTING THE HYPOTHESIS OVER THE WHOLE INPUT DOMAIN --------
    all_preds = nn(X).detach().numpy()                  # make predictions for all inputs (not just minibatch)
    h_plot = h_ax.plot(X.numpy(), all_preds, c='g')    # plot predictions for all inputs
    fig.canvas.draw()
    # -----------------------------------------------------------------------------

    train(nn, train_loader, criterion, optimiser, epoch, fig, loss_ax, train_losses, p)
    idx = len(train_loader) * epoch                     # index of current batch
    evaluate(nn, val_loader, criterion, epoch, fig, loss_ax, val_losses, idx)

    h_plot.pop(0).remove()                              # remove the previous plot


    '''
    #   --------- OLD ----------
    for batch_idx, batch in enumerate(train_loader):                                  # for each minibatch from dataloader
        print(batch)

        x, y = batch                                                               # unpack the minibatch
        h = nn(x)                                                                  # make predictions for this minibatch
        loss = criterion(h, y)                                                     # evaluate loss for this batch
        loss.backward()                   # differentiate loss with respect to parameters that the optimiser is tracking

        optimiser.step()                                                            # take optimisation step
Example #8
0
def train_binary_model(path,
                       epochs=100,
                       ft_epochs=100,
                       learning_rate=0.01,
                       classes_to_match: Union[int, List[int]] = 0,
                       classes_to_drop: Union[int, List[int]] = None):
    """
    Train a smaller binary model for empty/not empty classification and save it under the given path. The method first
    loads the models using :py:doc:`generate_datasets.py <training.generate_datasets.py>` methods. Then the model is
    trained, saved and finally evaluated.

    Training is run in two steps: It is first trained with synthetic data and then finetuned with real data. Early
    stopping is used to prevent overfitting.

    Args:
        path(str): The directory to save the trained model to.
        epochs(int): The number of epochs. (Default value = 100)
        ft_epochs: The number of finetuning epochs. (Default value = 100)
        learning_rate: The learning rate for the Adadelta optimizer. (Default value = 0.01)
        classes_to_match(Union[int, list[int]]): The classes to match as class 1. (Default value = 0)
        classes_to_drop(Union[int, list[int]]): The classes to drop from the dataset. (Default value = None)

    Returns:
        None

    """
    os.makedirs(path, exist_ok=True)
    concat_machine, concat_hand, concat_out, real_training, real_validation = load_datasets(
        TRANSFORMED_DATASET_NAMES)

    batch_size = 192
    train_generator = ToBinaryGenerator(concat_machine.train,
                                        concat_hand.train,
                                        concat_out.train,
                                        classes_to_match=classes_to_match,
                                        classes_to_drop=classes_to_drop,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        truncate=True)

    dev_generator = ToBinaryGenerator(concat_machine.test,
                                      concat_hand.test,
                                      concat_out.test,
                                      classes_to_match=classes_to_match,
                                      classes_to_drop=classes_to_drop,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      truncate=True)

    ft_train_generator = ToBinaryGenerator(real_training.train,
                                           classes_to_match=classes_to_match,
                                           classes_to_drop=classes_to_drop,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           truncate=True)

    ft_dev_generator = ToBinaryGenerator(real_training.test,
                                         classes_to_match=classes_to_match,
                                         classes_to_drop=classes_to_drop,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         truncate=True)

    test_generator = ToBinaryGenerator(real_validation.test,
                                       classes_to_match=classes_to_match,
                                       classes_to_drop=classes_to_drop,
                                       batch_size=batch_size,
                                       shuffle=False)

    # Run training on the GPU
    with tf.device('/GPU:0'):
        # Keras Model
        print("Creating model..")
        model = Sequential()
        model.add(Conv2D(16, (5, 5), strides=2, input_shape=(28, 28, 1)))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(4, 4)))
        model.add(Conv2D(32, (2, 2)))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())  # 32
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        # def mean_pred(_, y):
        #     return keras.backend.mean(y)

        print("Compiling model..")
        model.compile(
            loss=keras.losses.BinaryCrossentropy(from_logits=True),
            optimizer=keras.optimizers.Adadelta(learning_rate),
            metrics=[keras.metrics.binary_accuracy, 'mse'],
        )
        print(model.summary())

        print("Training model")
        model.fit_generator(train_generator,
                            validation_data=dev_generator,
                            epochs=epochs,
                            callbacks=[
                                EarlyStopping(monitor='val_accuracy',
                                              restore_best_weights=True,
                                              patience=3,
                                              min_delta=0.0001),
                            ])

        print("Finetuning model")
        model.fit_generator(ft_train_generator,
                            validation_data=ft_train_generator,
                            epochs=ft_epochs,
                            callbacks=[
                                EarlyStopping(monitor='val_accuracy',
                                              restore_best_weights=True,
                                              patience=3,
                                              min_delta=0.0001),
                            ])

        models.save_model(model, path + "model.h5", save_format='h5')

        print("Evaluating")
        print(
            "Training dev",
            list(
                zip(model.metrics_names,
                    model.evaluate_generator(dev_generator))))
        print(
            "Finetuning dev",
            list(
                zip(model.metrics_names,
                    model.evaluate_generator(ft_dev_generator))))
        print(
            "Test",
            list(
                zip(model.metrics_names,
                    model.evaluate_generator(test_generator))))
        evaluate(model, test_generator, binary=True)
Example #9
0
def run():

    ## Load Config from JSON file
    dir_path = os.path.dirname(os.path.realpath(__file__))
    config_path = os.path.join(dir_path, "experiment", FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    if not os.path.exists(FLAGS.data_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    ## Load Data
    df = dl.load_raw_text_file(FLAGS.data_path, num_examples=30000)

    # index language for Input and Output
    inp_index = LanguageIndex(phrases=df["es"].values)
    targ_index = LanguageIndex(df["eng"].values)
    vocab_inp_size = len(inp_index.word2idx)
    vocab_tar_size = len(targ_index.word2idx)

    # Convert Sentences into tokenized tensors
    input_tensor, target_tensor = dl.convert_tensor(df, inp_index, targ_index)
    # Split to training and test set
    input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(
        input_tensor, target_tensor, test_size=0.2)
    train_dataset = MyData(input_tensor_train, target_tensor_train)
    val_dataset = MyData(input_tensor_val, target_tensor_val)

    # Conver to DataLoader Object
    train_dataset = data.DataLoader(train_dataset,
                                    batch_size=config['batch_size'],
                                    drop_last=True,
                                    shuffle=True)

    eval_dataset = data.DataLoader(val_dataset,
                                   batch_size=config['batch_size'],
                                   drop_last=False,
                                   shuffle=True)
    # Models
    model = Seq2Seq(config, vocab_inp_size, vocab_tar_size)
    scorer = create_scorer(config['metrics'])

    if config['gpu']:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)
    print("Weight Initialized")

    ## Train and Evaluate over epochs
    all_train_avg_loss = []
    all_eval_avg_loss = []
    all_eval_avg_acc = []

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        model, optimizer, train_avg_loss = train(model, optimizer,
                                                 train_dataset, run_state,
                                                 config['debug'])
        all_train_avg_loss.append(train_avg_loss)

        # Return Val Set Loss and Accuracy
        eval_avg_loss, eval_acc = evaluate(model, eval_dataset, targ_index,
                                           scorer, config['debug'])
        all_eval_avg_loss.append(eval_avg_loss)
        all_eval_avg_acc.append(eval_acc)

        # Save Model Checkpoint
        checkpoint_dict = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': eval_avg_loss,
        }

        checkpoint_path = '{}/epoch_{:0.0f}_val_loss_{:0.3f}.pt'.format(
            FLAGS.model_checkpoint_dir, epoch, eval_avg_loss)
        torch.save(checkpoint_dict, checkpoint_path)

    # Export Model Learning Curve Info
    df = pd.DataFrame({
        'epoch': range(FLAGS.epochs),
        'train_loss': all_train_avg_loss,
        'eval_loss': all_eval_avg_loss,
        'eval_acc': all_eval_avg_acc
    })

    now = datetime.now()
    current_time = now.strftime("%Y%m%d%H%M%S")
    export_path = '{}/{}_{:0.0f}_bz_{}_val_loss_{:0.3f}.csv'.format(
        FLAGS.metrics_dir, current_time, FLAGS.epochs, config['batch_size'],
        eval_avg_loss)
    df.to_csv(export_path, index=False)
Example #10
0
def main(config_file):
    #read from config
    local_config = __import__(config_file)
    model_name = local_config.INPUTS['MODEL']
    model = getattr(__import__('.models', fromlist=[model_name]), model_name)
    batch_size = local_config.INPUTS['BATCHSIZE']
    optimizer_name = local_config.INPUTS['OPTIMIZER']
    optimizer = getattr(__import__('torch.optim', fromlist=[optimizer_name]), optimizer_name)
    num_epochs = local_config.INPUTS['EPOCHS']
    no_classes = local_config.INPUTS['CLASSES']
    learning_rate = local_config.INPUTS['LR']

    #logging
    start_time = time.time()
    date = time.strftime('%d-%m-%Y-%H-%M-%S', time.localtime())
    log_path = f'./birdsong/run_log/{model_name}_{date}'
    state_fname, log_fname, summ_tensor_board = logger.create_log(log_path)
    writer = SummaryWriter(str(summ_tensor_board))

    params = {'input_dir' : INPUT_DIR,
              'batchsize' : batch_size,
              'window' : 5000,
              'stride' : 2000,
              'spectrogram_func' : mel_s,
              'augmentation_func' : None}

    ds_test = SoundDataset(TEST, **params)
    ds_train = SoundDataset(TRAIN, **params)
    dl_test = DataLoader(ds_test, batch_size)
    dl_train = DataLoader(ds_train, batch_size)
    print('dataloaders initialized')

    time_axis = ds_test.shape[1]
    freq_axis = ds_test.shape[0]
    net = model(time_axis=time_axis, freq_axis=freq_axis, no_classes=no_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer(net.parameters(), lr=learning_rate)

    #local vars
    best_acc = 0
    for epoch in range(num_epochs):
        train(net, dl_train, epoch, optimizer, criterion, DEVICE)

        train_stats, train_conf_matrix = evaluate(net, dl_train, criterion, no_classes, DEVICE)
        print(f'Train Loss: {train_stats[0]:.5f}, Train Acc: {train_stats[1]:.5f}')
        test_stats, test_conf_matrix = evaluate(net, dl_test, criterion, no_classes, DEVICE)
        print(f'Test Loss: {test_stats[0]:.5f}, Test Acc: {test_stats[1]:.5f}')
        
        is_best = test_stats[1] > best_acc
        best_acc = max(test_stats[1], best_acc)
        print('Best Accuracy: {:.5f}'.format(best_acc))

        logger.save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': net.state_dict(),
            'best_accuracy': best_acc
        }, is_best, filename=state_fname)

        img_path = log_path + '/train' + '_' + str(epoch) + '.png'
        img = plot_conf_mat(img_path, train_conf_matrix) 

        img_path = log_path + '/test' + '_' + str(epoch) + '.png'
        img = plot_conf_mat(img_path, test_conf_matrix)
        
        logger.write_summary(writer, epoch, train_stats, test_stats, img)
        logger.dump_log_txt(date, start_time, local_config, train_stats, test_stats, best_acc, log_fname)

    writer.close()
    print('Finished Training')
Example #11
0
                                                )
  from collections import Counter                                              
  print(Counter(test_data.labels))
  

  """ Train """
  fc_dim = train_data[0][0].shape[0]
  #model = Net(fc_dim)
  model = LogisticRegression(fc_dim)
  
  print(f"Optimization parameters: {params['optimization']}")
  
  
  trained_model = Train(model,weights,train_data,test_data,**params['optimization'])
  print(trained_model.model)
  
  test_gen = torch.utils.data.DataLoader(test_data, batch_size=test_data.__len__(), shuffle=False)
  
  for test_data, test_labels in test_gen:
    evaluate(trained_model, test_data, test_labels)
  
  # model.load_state_dict(torch.load(PATH))








Example #12
0
def main(args):

    set_seed(SEED)

    train_transforms, test_transforms = get_transforms(args.dataset)
    print(f"Data transformations:\n{train_transforms}\n")

    # Get the dataloaders
    train_loader, test_loader = get_dataloaders(args.dataset, args.batch_size,
                                                args.workers, train_transforms,
                                                test_transforms)

    # Architecture
    if args.dataset == 'mnist':
        in_channels = 1
    else:
        raise NotImplementedError()
    if args.activation == 'relu':
        activation = nn.ReLU(inplace=True)
    else:
        raise NotImplementedError()
    if args.pooling == 'max':
        pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
    else:
        raise NotImplementedError()
    drop_rate = args.drop_rate

    # Build model
    model = LeNet5(in_channels, activation, pooling, drop_rate)
    if torch.cuda.is_available():
        torch.cuda.set_device(args.gpu)
        model = model.cuda()
    # Weight normal initialization
    if args.init_weights:
        model.apply(normal_initialization)

    start_epoch = 0
    if args.resume is not None:
        model, optimizer, start_epoch = load_training_state(
            model, optimizer, args.resume)

    # Loss function & optimizer
    if args.criterion == 'ce':
        criterion = nn.CrossEntropyLoss()
    else:
        raise NotImplementedError()
    if args.optimizer == 'sgd':
        # Issue
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay,
                              nesterov=args.nesterov)
    elif args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)
    else:
        raise NotImplementedError()

    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.5,
                                  patience=0,
                                  threshold=1e-2,
                                  verbose=True)

    # Output folder
    output_folder = os.path.join(args.output_folder, args.training_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    log_path = os.path.join(args.output_folder, 'logs', args.training_name)
    if os.path.exists(log_path):
        rmtree(log_path)
    logger = SummaryWriter(log_path)

    # Train
    best_loss = math.inf
    mb = master_bar(range(args.nb_epochs))
    for epoch_idx in mb:
        # Training
        train_epoch(model,
                    train_loader,
                    optimizer,
                    criterion,
                    mb,
                    tb_logger=logger,
                    epoch=start_epoch + epoch_idx)

        # Evaluation
        val_loss, accuracy = evaluate(model, test_loader, criterion)

        mb.first_bar.comment = f"Epoch {start_epoch+epoch_idx+1}/{start_epoch+args.nb_epochs}"
        mb.write(
            f'Epoch {start_epoch+epoch_idx+1}/{start_epoch+args.nb_epochs} - Validation loss: {val_loss:.4} (Acc@1: {accuracy:.2%})'
        )

        # State saving
        if val_loss < best_loss:
            print(
                f"Validation loss decreased {best_loss:.4} --> {val_loss:.4}: saving state..."
            )
            best_loss = val_loss
            torch.save(
                dict(epoch=start_epoch + epoch_idx,
                     model_state_dict=model.state_dict(),
                     optimizer_state_dict=optimizer.state_dict(),
                     val_loss=val_loss),
                os.path.join(output_folder, "training_state.pth"))

        if logger is not None:
            current_iter = (start_epoch + epoch_idx + 1) * len(train_loader)
            logger.add_scalar(f"Validation loss", val_loss, current_iter)
            logger.add_scalar(f"Error rate", 1 - accuracy, current_iter)
            logger.flush()
        scheduler.step(val_loss)
Example #13
0
def run():
    USE_CUDA = torch.cuda.is_available()

    config_path = os.path.join("experiments", FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    if "wancong" in config_path:
        dataset = PolyDataset()
        eval_dataset = PolyDataset("test_small.txt")
    else:
        dataset = ToyDataset(5, 15)
        eval_dataset = ToyDataset(5, 15, type='eval')

    BATCHSIZE = 30
    train_loader = data.DataLoader(dataset,
                                   batch_size=BATCHSIZE,
                                   shuffle=False,
                                   collate_fn=pad_collate,
                                   drop_last=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=BATCHSIZE,
                                  shuffle=False,
                                  collate_fn=pad_collate,
                                  drop_last=True)
    config["batch_size"] = BATCHSIZE

    # Models
    model = Seq2Seq(config)

    print(f"total number of parameters: {count_parameters(model)}")

    if USE_CUDA:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    print("=" * 60)
    print(model)
    print("=" * 60)
    for k, v in sorted(config.items(), key=lambda i: i[0]):
        print(" (" + k + ") : " + str(v))
    print()
    print("=" * 60)

    print("\nInitializing weights...")
    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs, FLAGS.train_size)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        model, optimizer = train(model, optimizer, train_loader, run_state)
        evaluate(model, eval_loader)
Example #14
0
    return train_iterator, valid_iterator, test_iterator, text_field.vocab

# load data
print("\nLoading data...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
text_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, batch_first=True)
label_field = data.Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
docid_field = data.RawField()
train_iter, dev_iter, test_iter, text_voca = fall_data(docid_field, text_field, label_field, device=-1, repeat=False)

# train or eval
if args.test:
	best_model = model.LSTM().to(device)
	optimizer = optim.Adam(best_model.parameters(), lr=args.lr)
	t.load_checkpoint(destination_folder + '/model.pt', best_model, optimizer)
	t.evaluate(best_model, test_iter)
else:
    print('start training')
	wandb.init()
	wandb.watch(model)     
	model = model.LSTM(text_voca).to(device)
	optimizer = optim.Adam(model.parameters(), lr=args.lr)
	eval_every = len(train_iter) // 2
	t.train(model=model, optimizer=optimizer, train_loader=train_iter, valid_loader=dev_iter, num_epochs=args.epochs, eval_every = eval_every, file_path= destination_folder, device=device)






Example #15
0
def do_train(model, tdata, vdata, measures):
    start_time = time.time()
    input_size = len(_g.vocab)

    # TODO paramatersanitychecker

    torch.save(model, _g.args.path + '/' + _g.args.filename)
    if not _g.args.quiet:
        print('Training...')

    optimizer = optim.Adam(model.parameters(), lr=_g.args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, _g.args.decay, _g.args.decay_factor)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.85, patience=30)
    criterion = nn.NLLLoss(ignore_index=_g.vocab.stoi[_g.padding_symbol], reduce=False)

    best_eval_loss = None
    t_prints, e_prints = None, None
    epoch_times = None
    saving = False

    try:
        for epoch, (tbatch, vbatch) in enumerate(zip(tdata, vdata)):
            if _g.args.time:
                if time.time() - start_time > _g.args.time:
                    break
            elif epoch == _g.args.epochs:
                break

            epoch_start_time = time.time()
            t_losses = _t.tbtt(model, criterion, optimizer, _u.to_one_hot(tbatch.before, input_size),
                                            tbatch.after)
            t_losses = t_losses.unsqueeze(dim=1)

            e_losses = _t.evaluate(model, criterion, _u.to_one_hot(vbatch.before, input_size), vbatch.after, measures)
            e_losses = e_losses.unsqueeze(dim=1)

            t_prints = t_losses if t_prints is None else torch.cat((t_prints, t_losses), dim=1)
            e_prints = e_losses if e_prints is None else torch.cat((e_prints, e_losses), dim=1)

            epoch_end_time = time.time()
            epoch_time = torch.tensor(epoch_end_time - epoch_start_time)
            epoch_times = epoch_time if epoch_times is None else torch.stack((epoch_times, epoch_time), dim=0)

            if (epoch + 1) % _g.args.print_every == 0:
                t_prints = t_prints.mean(dim=1)
                e_prints = e_prints.mean(dim=1)

                if not _g.args.quiet:
                    _u.pretty_print(_g.args.epochs, _g.args.time, epoch+1, epoch_end_time - start_time,
                                    _u.to_builtin(epoch_times.mean()), _u.to_builtin(torch.chunk(t_prints, 2)),
                                    _u.to_builtin(
                                        {n: (x, y) for n,x,y in
                                         zip(['loss'] + list(measures.keys()), e_prints[::2], e_prints[1::2])}
                                    )
                             )
                t_prints, e_prints = None, None

            if not best_eval_loss or e_losses[0].item() < best_eval_loss:
                saving = True
                best_eval_loss = e_losses[0].item()
                torch.save(model, _g.args.path + '/' + _g.args.filename)
                saving = False

            scheduler.step()

        if not _g.args.quiet:
            print('Training done successfully')

    except KeyboardInterrupt:
        print('\nExiting earlier than expected. Wait a moment!')

        if saving:  # In case it was interrupted while saving
            torch.save(model, _g.args.path + '/' + _g.args.filename)
Example #16
0
def run():
    USE_CUDA = torch.cuda.is_available()
    FLAGS.config = 'example_seq2seq.json'
    config_path = os.path.join("experiments", FLAGS.config)
    print(FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    writer = SummaryWriter('experiments/finally')

    #     dataset = ToyDataset(5, 15)
    #     eval_dataset = ToyDataset(5, 15, type='eval')
    dataset = Toy_Numbers(10)
    eval_dataset = Toy_Numbers(10, train=False)
    BATCHSIZE = 32
    train_loader = data.DataLoader(dataset,
                                   batch_size=BATCHSIZE,
                                   shuffle=False,
                                   collate_fn=pad_collate,
                                   drop_last=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=BATCHSIZE,
                                  shuffle=False,
                                  collate_fn=pad_collate,
                                  drop_last=True)
    config["batch_size"] = BATCHSIZE

    # Models
    model = Seq2Seq(config)
    model = model.float()

    # dataiter = iter(train_loader)
    # sample_input= dataiter.next()

    # writer.add_graph(model, sample_input)
    # writer.close()

    if USE_CUDA:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    print("=" * 60)
    print(model)
    print("=" * 60)
    for k, v in sorted(config.items(), key=lambda i: i[0]):
        print(" (" + k + ") : " + str(v))
    print()
    print("=" * 60)

    print("\nInitializing weights...")
    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs, FLAGS.train_size)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        model, optimizer = train(model, optimizer, train_loader, run_state,
                                 writer)
        # print("losses", l_list)
        # for i in l_list:
        #     # print(i)
        #     writer.add_scalar('Loss/train',i)
        evaluate(model, eval_loader, writer)
Example #17
0
def run(args):

    print('\nSettings: \n', args, '\n')

    args.model_signature = str(dt.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    ########## Find GPUs
    (gpu_config, n_gpu_used) = set_gpus(args.n_gpu)

    ########## Data, model, and optimizer setup
    mnist = MNIST(args)

    x = tf.placeholder(tf.float32, [None, 28, 28, 1])

    if args.model == 'hvae':
        if not args.K:
            raise ValueError('Must set number of flow steps when using HVAE')
        elif not args.temp_method:
            raise ValueError('Must set tempering method when using HVAE')
        model = HVAE(args, mnist.avg_logit)
    elif args.model == 'cnn':
        model = VAE(args, mnist.avg_logit)
    else:
        raise ValueError('Invalid model choice')

    elbo = model.get_elbo(x, args)
    nll = model.get_nll(x, args)

    optimizer = AdamaxOptimizer(learning_rate=args.learn_rate,
                                eps=args.adamax_eps)
    opt_step = optimizer.minimize(-elbo)

    ########## Tensorflow and saver setup
    sess = tf.Session(config=gpu_config)
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    savepath = os.path.join(args.checkpoint_dir, args.model_signature,
                            'model.ckpt')

    if not os.path.exists(args.checkpoint_dir):
        os.makedirs(args.checkpoint_dir)

    ########## Test that GPU memory is sufficient
    if n_gpu_used > 0:
        try:
            x_test = mnist.next_test_batch()
            (t_e, t_n) = sess.run((elbo, nll), {x: x_test})
            mnist.batch_idx_test = 0  # Reset batch counter if it works
        except:
            raise MemoryError("""
                Likely insufficient GPU memory
                Reduce test batch by lowering the -tbs parameter
                """)

    ########## Training Loop

    train_elbo_hist = []
    val_elbo_hist = []

    # For early stopping
    best_elbo = -np.inf
    es_epochs = 0
    epoch = 0

    train_times = []

    for epoch in range(1, args.epochs + 1):

        t0 = time.time()
        train_elbo = train(epoch, mnist, opt_step, elbo, x, args, sess)
        train_elbo_hist.append(train_elbo)
        train_times.append(time.time() - t0)
        print('One epoch took {:.2f} seconds'.format(time.time() - t0))

        val_elbo = validate(mnist, elbo, x, sess)
        val_elbo_hist.append(val_elbo)

        if val_elbo > best_elbo:

            # Save the model that currently generalizes best
            es_epochs = 0
            best_elbo = val_elbo
            saver.save(sess, savepath)
            best_model_epoch = epoch

        elif args.early_stopping_epochs > 0:

            es_epochs += 1

            if es_epochs >= args.early_stopping_epochs:
                print('***** STOPPING EARLY ON EPOCH {} of {} *****'.format(
                    epoch, args.epochs))
                break

        print('--> Early stopping: {}/{} (Best ELBO: {:.4f})'.format(
            es_epochs, args.early_stopping_epochs, best_elbo))
        print('\t Current val ELBO: {:.4f}\n'.format(val_elbo))

        if np.isnan(val_elbo):
            raise ValueError('NaN encountered!')

    train_times = np.array(train_times)
    mean_time = np.mean(train_times)
    std_time = np.std(train_times)
    print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
        mean_time, std_time))

    ########## Evaluation

    # Restore the best-performing model
    saver.restore(sess, savepath)

    test_elbos = np.zeros(args.n_nll_runs)
    test_nlls = np.zeros(args.n_nll_runs)

    for i in range(args.n_nll_runs):

        print('\n---- Test run {} of {} ----\n'.format(i + 1, args.n_nll_runs))
        (test_elbos[i], test_nlls[i]) = evaluate(mnist, elbo, nll, x, args,
                                                 sess)

    mean_elbo = np.mean(test_elbos)
    std_elbo = np.std(test_elbos)

    mean_nll = np.mean(test_nlls)
    std_nll = np.std(test_nlls)

    print('\nTest ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo))
    print('Test NLL: {:.2f} +/- {:.2f}'.format(mean_nll, std_nll))

    ########## Logging, Saving, and Plotting

    with open(args.logfile, 'a') as ff:
        print('----------------- Test ID {} -----------------'.format(
            args.model_signature),
              file=ff)
        print(args, file=ff)
        print('Stopped after {} epochs'.format(epoch), file=ff)
        print('Best model from epoch {}'.format(best_model_epoch), file=ff)
        print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
            mean_time, std_time),
              file=ff)

        print('FINAL VALIDATION ELBO: {:.2f}'.format(val_elbo_hist[-1]),
              file=ff)
        print('Test ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo),
              file=ff)
        print('Test NLL: {:.2f} +/- {:.2f}\n'.format(mean_nll, std_nll),
              file=ff)

    if not os.path.exists(args.pickle_dir):
        os.makedirs(args.pickle_dir)

    train_dict = {
        'train_elbo': train_elbo_hist,
        'val_elbo': val_elbo_hist,
        'args': args
    }
    pickle.dump(
        train_dict,
        open(os.path.join(args.pickle_dir, args.model_signature + '.p'), 'wb'))

    if not os.path.exists(args.plot_dir):
        os.makedirs(args.plot_dir)

    tf_gen_samples = model.get_samples(args)
    np_gen_samples = sess.run(tf_gen_samples)
    plot_digit_samples(np_gen_samples, args)

    plot_training_curve(train_elbo_hist, val_elbo_hist, args)

    ########## Email notification upon test completion

    try:

        msg_text = """Test completed for ID {0}.

        Parameters: {1}

        Test ELBO: {2:.2f} +/- {3:.2f}
        Test NLL: {4:.2f} +/- {5:.2f} """.format(args.model_signature, args,
                                                 mean_elbo, std_elbo, mean_nll,
                                                 std_nll)

        msg = MIMEText(msg_text)
        msg['Subject'] = 'Test ID {0} Complete'.format(args.model_signature)
        msg['To'] = args.receiver
        msg['From'] = args.sender

        s = smtplib.SMTP('localhost')
        s.sendmail(args.sender, [args.receiver], msg.as_string())
        s.quit()

    except:

        print('Unable to send email from sender {0} to receiver {1}'.format(
            args.sender, args.receiver))