예제 #1
0
def main():
    start_epoch = 1
    model = Loop(args)
    model.cuda()

    if args.checkpoint != '':
        checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth'
        checkpoint_args = torch.load(checkpoint_args_path)

        start_epoch = checkpoint_args[3]
        model.load_state_dict(torch.load(args.checkpoint))

    criterion = MaskedMSE().cuda()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Keep track of losses
    train_losses = []
    eval_losses = []
    best_eval = float('inf')

    # Begin!
    for epoch in range(start_epoch, start_epoch + args.epochs):
        train(model, criterion, optimizer, epoch, train_losses)
        eval_loss = evaluate(model, criterion, epoch, eval_losses)
        if eval_loss < best_eval:
            torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName))
            best_eval = eval_loss

        torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName))
        torch.save([args, train_losses, eval_losses, epoch],
                   '%s/args.pth' % (args.expName))
def main():
    args = init()

    checkpoint = args.checkpoint
    checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth'
    checkpoint_args = torch.load(checkpoint_args_path)

    opt = torch.load(os.path.dirname(checkpoint) + '/args.pth')
    train_args = opt[0]
    train_args.noise = 0
    train_args.checkpoint = checkpoint

    args_to_use = args
    args_to_use = train_args

    print args_to_use
    model = Loop(args_to_use)

    model.cuda()
    model.load_state_dict(
        torch.load(args_to_use.checkpoint,
                   map_location=lambda storage, loc: storage))

    criterion = MaskedMSE().cuda()

    loader = get_loader(args.data, args.max_seq_len, args.batch_size,
                        args.nspk)

    eval_loss = evaluate(model, loader, criterion)

    print eval_loss
def eval_loss(checkpoint='models/vctk/bestmodel.pth',
              data='data/vctk',
              max_seq_len=1000,
              nspk=22,
              gpu=0,
              batch_size=64,
              seed=1):
    #args = init()
    torch.cuda.set_device(gpu)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    print checkpoint
    print os.getcwd()
    checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth'
    checkpoint_args = torch.load(checkpoint_args_path)

    opt = torch.load(os.path.dirname(checkpoint) + '/args.pth')
    train_args = opt[0]
    train_args.noise = 0
    train_args.checkpoint = checkpoint

    #args_to_use = args
    args_to_use = train_args

    print args_to_use
    model = Loop(args_to_use)

    model.cuda()
    model.load_state_dict(
        torch.load(args_to_use.checkpoint,
                   map_location=lambda storage, loc: storage))

    criterion = MaskedMSE().cuda()

    loader = get_loader(data, max_seq_len, batch_size, nspk)

    eval_loss, my_eval_loss, loss_workings = evaluate(model, loader, criterion)

    print eval_loss
    print my_eval_loss

    return eval_loss, loss_workings
예제 #4
0
def main():

    cuda_available = torch.cuda.is_available()
    train_params, dataset_params = get_e_arguments()
    net = TimeStretch()
    epoch_trained = 0
    if train_params['restore_model']:
        net = load_model(net, train_params['restore_dir'],
                         train_params['restore_model'])
        if net is None:
            print("Initialize network and train from scratch.")
            net = TimeStretch()
        else:
            epoch_trained = 0

    train_loader, X_val_var, y_val_var, L_test = audio_data_loader(
        **dataset_params)

    if cuda_available is False:
        warnings.warn(
            "Cuda is not avalable, can not train model using multi-gpu.")
    if cuda_available:
        # Remove train_params["device_ids"] for single GPU
        if train_params["device_ids"]:
            batch_size = dataset_params["batch_size"]
            num_gpu = len(train_params["device_ids"])
            assert batch_size % num_gpu == 0
            net = nn.DataParallel(net, device_ids=train_params['device_ids'])
        torch.backends.cudnn.benchmark = True
        net = net.cuda()

    criterion = MaskedMSE()  #nn.MSELoss()#
    optimizer = get_optimizer(net, train_params['optimizer'],
                              train_params['learning_rate'],
                              train_params['momentum'])

    if cuda_available:
        criterion = criterion.cuda()
    if not os.path.exists(train_params['log_dir']):
        os.makedirs(train_params['log_dir'])
    if not os.path.exists(train_params['restore_dir']):
        os.makedirs(train_params['restore_dir'])
    train_loss_log_file = open(
        train_params['log_dir'] + 'train_loss_log_e.log', 'a')
    test_loss_log_file = open(train_params['log_dir'] + 'test_loss_log_e.log',
                              'a')

    # Add print for start of training time
    time = str(datetime.now())
    line = 'Training Started at' + str(time) + ' !!! \n'
    train_loss_log_file.writelines(line)
    train_loss_log_file.flush()

    # Keep track of losses
    train_losses = []
    eval_losses = []
    best_eval = float('inf')

    # Begin!
    for epoch in range(train_params['num_epochs']):
        train(net, criterion, optimizer, train_losses, train_params,
              train_loss_log_file, train_loader, cuda_available)
        eval_loss = evaluate(net, criterion, epoch, eval_losses, X_val_var,
                             y_val_var, L_test, test_loss_log_file,
                             cuda_available)

        #if eval_loss < best_eval:

        #    save_model(net,1,train_params['restore_dir'])

        #    torch.save(net.state_dict(), train_params['restore_dir'] +'bestmodel.pth')
        #    best_eval = eval_loss
        if epoch % train_params['check_point_every'] == 0:
            save_model(net, epoch_trained + epoch + 1,
                       train_params['restore_dir'])
            torch.save([train_losses, eval_losses, epoch],
                       train_params['restore_dir'] + 'data_params')

    # Add print for end of training time
    time = str(datetime.now())
    line = 'Training Ended at' + str(time) + ' !!! \n'
    train_loss_log_file.writelines(line)
    train_loss_log_file.flush()

    train_loss_log_file.close()
    test_loss_log_file.close()
예제 #5
0
def main():
    # load datasets
    train_dataset_path = os.path.join(args.data, 'numpy_features')
    train = NpzFolder(train_dataset_path)
    train.remove_too_long_seq(args.max_seq_len)
    train_loader = Dataset_Iter(train, batch_size=args.batch_size)
    train_loader.shuffle()

    valid_dataset_path = os.path.join(args.data, 'numpy_features_valid')
    valid = NpzFolder(valid_dataset_path)
    valid_loader = Dataset_Iter(valid, batch_size=args.batch_size)
    valid_loader.shuffle()

    # train_loader = Dataset_Iter(valid, batch_size=args.batch_size)

    # initiate tensorflow model
    input0 = tf.placeholder(tf.int64, [None, None])
    input1 = tf.placeholder(tf.float32, [None])  # contains length of sentence
    speaker = tf.placeholder(tf.int32, [None, 1])  # speaker identity
    target0 = tf.placeholder(tf.float32, [None, None, 63])
    target1 = tf.placeholder(tf.float32, [None])  # apparently speaker identity
    # idente  = tf.placeholder(tf.float32, [None,256])
    # s_t = tf.placeholder(tf.float32, [64,319,20])
    # mu_t = tf.placeholder(tf.float32, [64,10])
    # context  = tf.placeholder(tf.float32, [64,64,256])
    start = tf.placeholder(tf.bool, shape=(), name='start_new_batch')
    train_flag = tf.placeholder(tf.bool, shape=(), name='train_flag')
    # out_seq = tf.placeholder(tf.float32, [None, None, 63])
    # attns_seq = tf.placeholder(tf.float32, [None, None, 63])

    model = Loop(args)

    # Define loss and optimizer
    output, attns = model.forward(input0, speaker, target0, start, train_flag)
    loss_op = MaskedMSE(output, target0, target1)
    optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
    train_op, clip_flag = gradient_check_and_clip(loss_op, optimizer,
                                                  args.clip_grad,
                                                  args.ignore_grad)
    merged = tf.summary.merge_all()

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver(global_variable_list)
    load_model = not args.checkpoint == ''
    save_model = True
    best_eval = float('inf')
    sess_idx = 0
    train_losses = []
    valid_losses = []
    with tf.Session() as sess:
        # Run the initializer

        train_writer = tf.summary.FileWriter(
            "%s/%s/train" % (args.outpath, expName), sess.graph)
        valid_writer = tf.summary.FileWriter(
            "%s/%s/valid" % (args.outpath, expName), sess.graph)

        # Restore variables from disk.
        sess.run(init)
        if load_model:
            saver.restore(sess, args.checkpoint)
            print("Model restored from file: %s" % args.checkpoint)

        for epoch in range(args.epochs):
            train_enum = tqdm(train_loader,
                              desc='Train epoch %d' % epoch,
                              total=ceil_on_division(len(train_loader),
                                                     args.batch_size))
            # Train data
            for batch_ind in train_enum:
                batch_loss_list = []
                (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \
                    make_a_batch(train_loader.dataset, batch_ind)
                batch_iter = TBPTTIter((srcBatch, srcLengths),
                                       (tgtBatch, tgtLengths), full_spkr,
                                       args.seq_len)
                for (srcBatch,
                     srcLenths), (tgtBatch,
                                  tgtLengths), spkr, start2 in batch_iter:
                    loss, _, clip_flag1, summary = sess.run(
                        [loss_op, train_op, clip_flag, merged],
                        feed_dict={
                            input0: srcBatch,
                            speaker: spkr,
                            target0: tgtBatch,
                            target1: tgtLengths,
                            start: start2,
                            train_flag: True
                        })
                    train_writer.add_summary(summary, sess_idx)
                    sess_idx += 1
                    if not clip_flag1:
                        batch_loss_list.append(loss)
                    else:
                        print(
                            '-'
                        )  # if too many - appear, there are exploding gradients
                train_losses.append(batch_loss_list)
                if len(batch_loss_list) != 0:
                    batch_loss = sum(batch_loss_list) / len(batch_loss_list)
                    batch_loss_list.append(batch_loss)
                else:
                    batch_loss = -1.
                train_enum.set_description('Train (loss %.2f) epoch %d' %
                                           (batch_loss, epoch))
                train_enum.update(srcBatch.shape[0])

            # Validate data
            valid_enum = tqdm(valid_loader,
                              desc='Validating epoch %d' % epoch,
                              total=ceil_on_division(len(valid_loader),
                                                     args.batch_size))
            batch_loss_list = []
            for batch_ind in valid_enum:
                (srcBatch, srcLengths), (tgtBatch, tgtLengths), full_spkr = \
                    make_a_batch(valid_loader.dataset, batch_ind)

                loss, summary = sess.run(
                    [loss_op, merged],
                    feed_dict={
                        input0: srcBatch,
                        speaker: full_spkr,
                        target0: tgtBatch,
                        target1: tgtLengths,
                        start: True,
                        train_flag: False
                    })
                batch_loss_list.append(loss)
                train_enum.set_description('Train (loss %.2f) epoch %d' %
                                           (loss, epoch))
                valid_writer.add_summary(summary, sess_idx)
                sess_idx += 1
                valid_enum.set_description('Validating (loss %.2f) epoch %d' %
                                           (loss, epoch))
            if len(batch_loss_list) != 0:
                valid_losses.append(batch_loss_list)
                valid_loss = sum(batch_loss_list) / len(batch_loss_list)
            else:
                valid_loss = 99999.
            if valid_loss < best_eval and save_model:
                best_eval = valid_loss
                save_path = saver.save(sess,
                                       "%s/bestmodel.ckpt" % args.expName)
                print("NEW BEST MODEL!, model saved in file: %s" % save_path)
            print('Final validation loss for epoch %d is: %.2f' %
                  (epoch, valid_loss))
            train_loader.shuffle()
            valid_loader.shuffle()

        if save_model:
            save_path = saver.save(sess, "%s/model.ckpt" % args.expName)
            print("Model saved in file: %s" % save_path)

        train_writer.close()
        valid_writer.close()
예제 #6
0
def main():
    start_epoch = 1
    model = Loop(args)
    model.cuda()

    if args.checkpoint != '':
        checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth'
        checkpoint_args = torch.load(checkpoint_args_path)

        start_epoch = checkpoint_args[3]
        model.load_state_dict(
            torch.load(args.checkpoint,
                       map_location=lambda storage, loc: storage))

    criterion = MaskedMSE().cuda()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Keep track of losses
    train_losses = []
    eval_losses = []
    best_eval = float('inf')
    training_monitor = TrainingMonitor(file=args.expNameRaw,
                                       exp_name=args.expNameRaw,
                                       b_append=True,
                                       path='training_logs')

    # Begin!
    for epoch in range(start_epoch, start_epoch + args.epochs):
        # train model
        train(model, criterion, optimizer, epoch, train_losses)

        # evaluate on validation set
        eval_loss = evaluate(model, criterion, epoch, eval_losses)

        #chk, _, _, _ = ec.evaluate(model=model,
        #                                  criterion=criterion,
        #                                  epoch=epoch,
        #                                  loader=valid_loader,
        #                                  metrics=('loss')
        #                                  )

        # save checkpoint for this epoch
        # I'm saving every epoch so I can compute evaluation metrics across the training curve later on
        torch.save(model.state_dict(),
                   '%s/epoch_%d.pth' % (args.expName, epoch))
        torch.save([args, train_losses, eval_losses, epoch],
                   '%s/args.pth' % (args.expName))

        if eval_loss < best_eval:
            # if this is the best model yet, save it as 'bestmodel'
            torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName))
            best_eval = eval_loss

        # also keep a running copy of 'lastmodel'
        torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName))
        torch.save([args, train_losses, eval_losses, epoch],
                   '%s/args.pth' % (args.expName))

        # evaluate on a randomised subset of the training set
        if epoch % args.eval_epochs == 0:
            train_eval_loader = ec.get_training_data_for_eval(
                data=args.data, len_valid=len(valid_loader.dataset))

            train_loss, _, _, _ = ec.evaluate(model=model,
                                              criterion=criterion,
                                              epoch=epoch,
                                              loader=train_eval_loader,
                                              metrics=('loss'))
        else:
            train_loss = None

        # store loss metrics
        training_monitor.insert(epoch=epoch,
                                valid_loss=eval_loss,
                                train_loss=train_loss)
        training_monitor.write()