def train(features,
          fea_len,
          split_frac,
          out_file,
          save=False,
          save_folder=None):
    '''
	hyperparameters: 
		features
		amount of training data
		feature length
	'''
    if isinstance(out_file, str):
        out_file = open(out_file, 'w')
    d = Dataset(features, split_frac, 1, gpu)
    print 'defining architecture'
    enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last')
    predictor = Predictor(fea_len)
    loss = nn.NLLLoss()
    if gpu:
        enc.cuda()
        predictor.cuda()
        loss.cuda()

    optimizer = optim.Adam(
        list(enc.parameters()) + list(predictor.parameters()))

    print 'training'
    test_v_features, test_e_features, test_A_pls, test_B_pls, test_y = d.get_test_pairs(
    )
    test_y = test_y.data.cpu().numpy()
    for train_iter in xrange(12000):
        v_features, e_features, A_pls, B_pls, y = d.get_train_pairs(100)
        enc.zero_grad()
        predictor.zero_grad()
        A_code, B_code = encode(enc, fea_len, v_features, e_features, A_pls,
                                B_pls)
        softmax_output = predictor(A_code, B_code)
        loss_val = loss(softmax_output, y)
        loss_val.backward()
        optimizer.step()

        enc.zero_grad()
        predictor.zero_grad()
        test_A_code, test_B_code = encode(enc, fea_len, test_v_features,
                                          test_e_features, test_A_pls,
                                          test_B_pls)
        softmax_output = predictor(test_A_code, test_B_code).data.cpu().numpy()
        test_y_pred = softmax_output.argmax(axis=1)
        cur_acc = (test_y_pred == test_y).sum() / len(test_y)
        out_file.write('%f\n' % cur_acc)
        out_file.flush()
        if save and train_iter % 50 == 0:
            if save_folder[-1] == '/':
                save_folder = save_folder[:-1]
            torch.save(enc.state_dict(),
                       '%s/%i_enc.model' % (save_folder, train_iter))
            torch.save(predictor.state_dict(),
                       '%s/%i_pred.model' % (save_folder, train_iter))
    out_file.close()
Exemple #2
0
def train(features, fea_len, split_frac, out_file):
    if isinstance(out_file, str):
        out_file = open(out_file, 'w')
    d = Dataset(features, split_frac, gpu)
    print 'defining architecture'
    enc = ChainEncoder(d.get_v_fea_len(), d.get_e_fea_len(), fea_len, 'last')
    predictor = Predictor(fea_len)
    loss = nn.NLLLoss()
    if gpu:
        enc.cuda()
        predictor.cuda()
        loss.cuda()

    optimizer = optim.Adam(
        list(enc.parameters()) + list(predictor.parameters()))

    print 'training'
    test_chain_A, test_chain_B, test_y = d.get_test_pairs()
    test_y = test_y.data.cpu().numpy()
    for train_iter in xrange(4000):
        chains_A, chains_B, y = d.get_train_pairs(1000)
        enc.zero_grad()
        predictor.zero_grad()
        output_A = enc(chains_A)
        output_B = enc(chains_B)
        softmax_output = predictor(output_A, output_B)
        loss_val = loss(softmax_output, y)
        loss_val.backward()
        optimizer.step()

        enc.zero_grad()
        predictor.zero_grad()
        output_test_A = enc(test_chain_A)
        output_test_B = enc(test_chain_B)
        softmax_output = predictor(output_test_A,
                                   output_test_B).data.cpu().numpy()
        test_y_pred = softmax_output.argmax(axis=1)
        cur_acc = (test_y_pred == test_y).sum() / len(test_y)
        print 'test acc:', cur_acc
        out_file.write('%f\n' % cur_acc)
        if train_iter % 50 == 0:
            torch.save(enc.state_dict(), 'ckpt/%i_encoder.model' % train_iter)
            torch.save(predictor.state_dict(),
                       'ckpt/%i_predictor.model' % train_iter)
    out_file.close()
Exemple #3
0
def main(exp, frame_sizes, dataset, **params):
    params = dict(default_params,
                  exp=exp,
                  frame_sizes=frame_sizes,
                  dataset=dataset,
                  **params)

    os.environ['CUDA_VISIBLE_DEVICES'] = params['gpu']

    results_path = setup_results_dir(params)
    tee_stdout(os.path.join(results_path, 'log'))

    model = SampleRNN(frame_sizes=params['frame_sizes'],
                      n_rnn=params['n_rnn'],
                      dim=params['dim'],
                      learn_h0=params['learn_h0'],
                      q_levels=params['q_levels'],
                      weight_norm=params['weight_norm'],
                      dropout=params['dropout'])
    predictor = Predictor(model)
    if params['cuda']:
        model = model.cuda()
        predictor = predictor.cuda()

    optimizer = gradient_clipping(
        torch.optim.Adam(predictor.parameters(), lr=params['lr']))

    data_loader = make_data_loader(model.lookback, params)
    test_split = 1 - params['test_frac']
    val_split = test_split - params['val_frac']

    criterion = sequence_nll_loss_bits

    checkpoints_path = os.path.join(results_path, 'checkpoints')
    checkpoint_data = load_last_checkpoint(checkpoints_path, params)
    if checkpoint_data is not None:
        (state_dict, epoch, iteration) = checkpoint_data
        start_epoch = int(epoch)
        global_step = iteration
        start_epoch = iteration
        predictor.load_state_dict(state_dict)
    else:
        start_epoch = 0
        global_step = 0

    #writer = SummaryWriter("runs/{}-{}".format(params['dataset'], str(datetime.datetime.now()).split('.')[0].replace(' ', '-')))
    writer = SummaryWriter(
        os.path.join(
            results_path, "{}-{}".format(
                params['dataset'],
                str(datetime.datetime.now()).split('.')[0].replace(' ', '-'))))
    dataset_train = data_loader(0, val_split, eval=False)
    dataset_val = data_loader(val_split, test_split, eval=True)
    dataset_test = data_loader(test_split, 1, eval=True)

    generator = Generator(predictor.model, params['cuda'])
    best_val_loss = 10000000000000

    for e in range(start_epoch, int(params['epoch_limit'])):
        for i, data in enumerate(dataset_train):

            batch_inputs = data[:-1]
            batch_target = data[-1]

            def wrap(input):
                if torch.is_tensor(input):
                    input = torch.autograd.Variable(input)
                    if params['cuda']:
                        input = input.cuda()
                return input

            batch_inputs = list(map(wrap, batch_inputs))

            batch_target = torch.autograd.Variable(batch_target)
            if params['cuda']:
                batch_target = batch_target.cuda()

            plugin_data = [None, None]

            def closure():
                batch_output = predictor(*batch_inputs)

                loss = criterion(batch_output, batch_target)
                loss.backward()

                if plugin_data[0] is None:
                    plugin_data[0] = batch_output.data
                    plugin_data[1] = loss.data

                return loss

            optimizer.zero_grad()
            optimizer.step(closure)
            train_loss = plugin_data[1]

            # stats: iteration
            writer.add_scalar('train/train loss', train_loss, global_step)
            print("E:{:03d}-S{:05d}: Loss={}".format(e, i, train_loss))
            global_step += 1

        # validation: per epoch
        predictor.eval()
        with torch.no_grad():
            loss_sum = 0
            n_examples = 0
            for data in dataset_val:
                batch_inputs = data[:-1]
                batch_target = data[-1]
                batch_size = batch_target.size()[0]

                def wrap(input):
                    if torch.is_tensor(input):
                        input = torch.autograd.Variable(input)
                        if params['cuda']:
                            input = input.cuda()
                    return input

                batch_inputs = list(map(wrap, batch_inputs))

                batch_target = torch.autograd.Variable(batch_target)
                if params['cuda']:
                    batch_target = batch_target.cuda()

                batch_output = predictor(*batch_inputs)

                loss_sum += criterion(batch_output,
                                      batch_target).item() * batch_size

                n_examples += batch_size

            val_loss = loss_sum / n_examples
            writer.add_scalar('validation/validation loss', val_loss,
                              global_step)
            print("== Validation Step E:{:03d}: Loss={} ==".format(
                e, val_loss))

        predictor.train()

        # saver: epoch
        last_pattern = 'ep{}-it{}'
        best_pattern = 'best-ep{}-it{}'
        if not params['keep_old_checkpoints']:
            pattern = os.path.join(checkpoints_path,
                                   last_pattern.format('*', '*'))
            for file_name in glob(pattern):
                os.remove(file_name)
        torch.save(
            predictor.state_dict(),
            os.path.join(checkpoints_path, last_pattern.format(e,
                                                               global_step)))

        cur_val_loss = val_loss
        if cur_val_loss < best_val_loss:
            pattern = os.path.join(checkpoints_path,
                                   last_pattern.format('*', '*'))
            for file_name in glob(pattern):
                os.remove(file_name)
            torch.save(
                predictor.state_dict(),
                os.path.join(checkpoints_path,
                             best_pattern.format(e, global_step)))
            best_val_loss = cur_val_loss

        generate_sample(generator, params, writer, global_step, results_path,
                        e)

    # generate final results
    generate_sample(generator, params, None, global_step, results_path, 0)
Exemple #4
0

voc_size = len(Sigma)
lstm_dim = 10
batch_size = 128
num_of_layers = 1
num_of_directions = 1
num_epochs = 300
clip = 1.0

predictor = Predictor(voc_size, lstm_dim)
optimizer = optim.Adam(predictor.parameters())
criterion = nn.NLLLoss()

best_dev_acc = 0.0
best_model_wts = copy.deepcopy(predictor.state_dict())
best_test1_acc = 0.0
best_test2_acc = 0.0
best_test3_acc = 0.0
best_epoch_num = 0

total_epoch_num = 0
all_losses = []
all_acc_1 = []
all_acc_2 = []
all_acc_3 = []

for epoch in range(1, num_epochs):
    total_epoch_num += 1
    shuffled_id_blocks = get_shuffled_ids(_data['tr'], batch_size)
    running_loss = 0.0
Exemple #5
0
        optimizer.zero_grad()
        loss_p1.backward()
        optimizer.step()
        # training phase 2 without long-term sequence
        pred_model.module.memory.memory_w.requires_grad = False  # do not train memory weights
        out_pred = pred_model(short_data, None, train_out_len, phase=2)
        loss_p2 = l1_loss(out_pred, out_gt) + l2_loss(out_pred, out_gt)
        optimizer.zero_grad()
        loss_p2.backward()
        optimizer.step()

        train_loss.update(float(loss_p1) + float(loss_p2))

        if (train_i + 1) % args.print_freq == 0:
            torch.save(
                pred_model.state_dict(), args.checkpoint_save_dir +
                '/trained_file_' + str(train_i + 1).zfill(6) + '.pt')

            # validation phase
            pred_model.eval()
            with torch.no_grad():
                for valid_data in validloader:
                    # define data indexes
                    short_start, short_end = 0, args.short_len
                    out_gt_start, out_gt_end = short_end, short_end + args.out_len

                    # obtain input data and output gt
                    valid_data = torch.stack(valid_data).to(device)
                    valid_data = valid_data.transpose(
                        dim0=0, dim1=1)  # make (N, T, C, H, W)
                    short_data = valid_data[:, short_start:short_end, :, :, :]