Example #1
0
def test_running():
    epochs = 100000
    seq_batch_size = 100
    print_yes = 100
    loss_func = torch.nn.functional.nll_loss
    # create network and optimizer
    net = RNN(100, 120, 150, 2)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=3e-5)
    # main training loop:
    for epoch in range(epochs):
        dat = get_batch(train_data, seq_batch_size)
        dat = torch.LongTensor([vocab.find(item) for item in dat])
        # pull x and y
        x_t = dat[:-1]
        y_t = dat[1:]
        hidden = net.init_hidden()
        # turn all into cuda
        x_t, y_t, hidden = x_t.to(device), y_t.to(device), hidden.to(device)
        # initialize hidden state and forward pass
        logprob, hidden = net.forward(x_t, hidden)
        loss = loss_func(logprob, y_t)
        # update
        optim.zero_grad()
        loss.backward()
        optim.step()
        # print the loss for every kth iteration
        if epoch % print_yes == 0:
            print('*' * 100)
            print('\n epoch {}, loss:{} \n'.format(epoch, loss))
            # make sure to pass True flag for running on cuda
            print('sample speech:\n', run_words(net, vocab, 500, True))
Example #2
0
def train():
    iterators, dataset = data_loaders(batch_size)
    model = RNN(input_size, hidden_size, num_layers, dropout, n_classes,
                dataset.get_class_weights())
    optimizer = optim.SGD(model.parameters(), lr)
    log = train_procedure(model, iterators, n_epochs, optimizer)
    save_log(log)
Example #3
0
def load_model(args):
    if args.model == 'rnn':
        model = RNN(25, args.dim_hidden, 25)
    elif args.model == 'ffnn':
        model = FFNN(25,
                     args.dim_hidden,
                     memory=args.memory,
                     num_hidden_layers=args.num_hidden_layers)
    elif args.model == 'cnn3d':
        model = CNN3D((10, 10), ((4, 4, 2), (7, 12, 2)), 10, 7, 10, 7, 2,
                      100)  #/TODO un hard code
    else:
        raise Exception(
            "Only rnn, ffnn, cnn3d model types currently supported")

    if args.loss_fn == 'mse':
        loss_fn = torch.nn.MSELoss()
    else:
        raise Exception("Only mse loss function currently supported")

    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise Exception("Only adam optimizer currently supported")
    return model, loss_fn, optimizer
Example #4
0
def attention_test(args):
    chosen_params = dict(params)
    results = []

    for attention in [False, True]:
        chosen_params['attention'] = attention
        runs = 5
        acc_d = {}
        f1_d = {}

        for i in tqdm(range(runs)):
            train_dataset, valid_dataset, test_dataset = data.load_dataset(
                args.train_batch_size,
                args.test_batch_size,
                min_freq=chosen_params['min_freq'])
            embedding = data.generate_embedding_matrix(
                train_dataset.dataset.text_vocab,
                rand=chosen_params['rand_emb'],
                freeze=chosen_params['freeze'])

            model = RNN(embedding, chosen_params)
            criterion = nn.BCEWithLogitsLoss()
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=chosen_params['lr'])

            for epoch in range(args.epochs):
                print(f'******* epoch: {epoch+1} *******')
                train(model, train_dataset, optimizer, criterion, args)
                evaluate(model, valid_dataset, criterion, 'Validation')

            acc, f1 = evaluate(model, test_dataset, criterion, 'Test')
            acc_d['acc_' + 'run' + str(i)] = acc
            f1_d['f1_' + 'run' + str(i)] = f1

        mean = np.mean(list(acc_d.values()))
        std = np.std(list(acc_d.values()))
        acc_d['mean'] = mean
        acc_d['std'] = std

        mean = np.mean(list(f1_d.values()))
        std = np.std(list(f1_d.values()))
        f1_d['mean'] = mean
        f1_d['std'] = std

        results.append((acc_d, f1_d))

    with open(os.path.join(SAVE_DIR, 'attention.txt'), 'a') as f:
        print(f'', file=f)
        for idx, (acc, f1) in enumerate(results):
            print('[no attention]' if idx == 0 else '[attention]', file=f)
            print(acc, file=f)
            print(f1, file=f)
Example #5
0
def hyperparam_optim_test(args):

    var_params = {
        'cell_name': ['lstm'],
        'hidden_size': [50, 150, 300],
        'num_layers': [2, 4, 5],
        'min_freq': [0, 100, 500],
        'lr': [1e-3, 1e-4, 1e-7],
        'dropout': [0, 0.4, 0.7],
        'freeze': [False, True],
        'rand_emb': [False, True],
        'attention': [False]
    }

    results = []
    for i in tqdm(range(10)):
        chosen_params = {k: random.choice(v) for (k, v) in var_params.items()}

        train_dataset, valid_dataset, test_dataset = data.load_dataset(
            args.train_batch_size,
            args.test_batch_size,
            min_freq=chosen_params['min_freq'])
        embedding = data.generate_embedding_matrix(
            train_dataset.dataset.text_vocab,
            rand=chosen_params['rand_emb'],
            freeze=chosen_params['freeze'])

        model = RNN(embedding, chosen_params)
        criterion = nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=chosen_params['lr'])

        for epoch in range(args.epochs):
            print(f'******* epoch: {epoch+1} *******')
            train(model, train_dataset, optimizer, criterion, args)
            evaluate(model, valid_dataset, criterion, 'Validation')

        acc, f1 = evaluate(model, test_dataset, criterion, 'Test')
        result = dict(chosen_params)
        result['acc'] = acc
        result['f1'] = f1
        results.append(result)

    with open(os.path.join(SAVE_DIR, 'params_search.txt'), 'a') as f:
        for result in results:
            print(result, file=f)
Example #6
0
def cell_comparison_test(args):

    train_dataset, valid_dataset, test_dataset = data.load_dataset(
        args.train_batch_size, args.test_batch_size)
    embedding = data.generate_embedding_matrix(
        train_dataset.dataset.text_vocab)

    var_params = {
        'hidden_size': [50, 150, 300],
        'num_layers': [1, 2, 4],
        'dropout': [0.1, 0.4, 0.7],
        'bidirectional': [True, False],
        'attention': [False]
    }

    for idx, (key, values) in enumerate(var_params.items()):
        fig, ax = plt.subplots(nrows=1, ncols=1)
        ax.set_title('Variable ' + key)
        for cell_name in tqdm(['rnn', 'lstm', 'gru']):
            results = []
            for i in range(len(values)):
                current_params = {
                    k: v[i] if k == key else v[1]
                    for (k, v) in var_params.items()
                }
                current_params['cell_name'] = cell_name

                model = RNN(embedding, current_params)
                model
                criterion = nn.BCEWithLogitsLoss()
                optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

                for epoch in range(args.epochs):
                    print(f'******* epoch: {epoch+1} *******')
                    train(model, train_dataset, optimizer, criterion, args)
                    evaluate(model, valid_dataset, criterion, 'Validation')

                result, _ = evaluate(model, test_dataset, criterion, 'Test')
                results.append(result)

            ax.plot(values, results, marker='o', label=cell_name)
        plt.legend(loc='best')
        plt.xlabel(key)
        plt.ylabel('accuracy')
        fig.savefig(os.path.join(SAVE_DIR, key + '.png'))
        plt.close(fig)
def main():
    epochs = 301
    seq_batch_size = 200
    print_yes = 100
    iscuda = False

    # create our network, optimizer and loss function
    net = RNN(len(chars), 100, 150, 2)  #instanciate a RNN object
    optim = torch.optim.Adam(net.parameters(), lr=6e-4)
    loss_func = torch.nn.functional.nll_loss

    if iscuda:
        net = net.cuda()

    # main training loop:
    for epoch in range(epochs):
        dat = getSequence(book, seq_batch_size)
        dat = torch.LongTensor(
            [chars.find(item) for item in dat]
        )  #find corresponding char index for each character and store this in tensor

        # pull x, y and initialize hidden state
        if iscuda:
            x_t = dat[:-1].cuda()
            y_t = dat[1:].cuda()
            hidden = net.init_hidden().cuda()
        else:
            x_t = dat[:-1]
            y_t = dat[1:]
            hidden = net.init_hidden()

        # forward pass
        logprob, hidden = net.forward(x_t, hidden)
        loss = loss_func(logprob, y_t)
        # update
        optim.zero_grad()
        loss.backward()
        optim.step()
        # print the loss for every kth iteration
        if epoch % print_yes == 0:
            print('*' * 60)
            print('\n epoch {}, loss:{} \n'.format(epoch, loss))
            print('sample speech:\n', test_words(net, chars, seq_batch_size))

    torch.save(net.state_dict(), 'trainedBook_v2.pt')
Example #8
0
def main(args):
    chosen_params = dict(params)
    chosen_params['attention'] = True

    train_dataset, valid_dataset, test_dataset = data.load_dataset(
        args.train_batch_size, args.test_batch_size)
    embedding = data.generate_embedding_matrix(
        train_dataset.dataset.text_vocab, freeze=False)
    model = RNN(embedding, chosen_params)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(args.epochs):
        print(f'******* epoch: {epoch} *******')
        train(model, train_dataset, optimizer, criterion, args)
        evaluate(model, valid_dataset, criterion, 'Validation')

    evaluate(model, test_dataset, criterion, 'Test')
Example #9
0
def no_test_forward():
    loss_func = torch.nn.functional.nll_loss
    net = RNN(100, 100, 100)
    net.to(device)  # add cuda to device
    optim = torch.optim.Adam(net.parameters(), lr=1e-4)
    # step 2: create a training batch of data, size 101, format this data and convert it to pytorch long tensors
    dat = get_batch(train_data, 100)
    dat = torch.LongTensor([vocab.find(item) for item in dat])
    # step 3: convert our dat into input/output
    x_t = dat[:-1]
    y_t = dat[1:]
    ho = net.init_hidden()
    # remember to load all variables used by the model to the device, this means the i/o as well as the hidden state
    x_t, y_t, ho = x_t.to(device), y_t.to(device), ho.to(device)
    # test forward pass
    log_prob, hidden = net.forward(x_t, ho)
    # let's see if the forward pass of the next hidden state is already cuda
    #log_prob2, hidden2 = net.forward(x_t, hidden)
    loss = loss_func(log_prob, y_t)
    optim.zero_grad()
    loss.backward()
    optim.step()
Example #10
0
def embedding_baseline_test(args):
    chosen_params = dict(params)

    results = []

    for rand_emb in [True, False]:
        chosen_params['rand_emb'] = rand_emb
        train_dataset, valid_dataset, test_dataset = data.load_dataset(
            args.train_batch_size,
            args.test_batch_size,
            min_freq=chosen_params['min_freq'])
        embedding = data.generate_embedding_matrix(
            train_dataset.dataset.text_vocab,
            rand=chosen_params['rand_emb'],
            freeze=chosen_params['freeze'])

        result = {}
        for m in ['baseline', 'rnn']:
            if m == 'rnn':
                model = RNN(embedding, chosen_params)
            else:
                model = Baseline(embedding)

            criterion = nn.BCEWithLogitsLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

            for epoch in range(args.epochs):
                print(f'******* epoch: {epoch} *******')
                train(model, train_dataset, optimizer, criterion, args)
                evaluate(model, valid_dataset, criterion, 'Validation')

            acc, f1 = evaluate(model, test_dataset, criterion, 'Test')
            result[m + '_acc_rand_emb' + str(rand_emb)] = acc
            result[m + '_f1_rand_emb' + str(rand_emb)] = f1
        results.append(result)

    with open(os.path.join(SAVE_DIR, 'embedding_baseline.txt'), 'a') as f:
        for res in results:
            print(res, file=f)
Example #11
0
              'pretrained_word_embeddings_file': pretrained_word_embeddings_file, 'transform_train': transform_train, 
              'transform_val': transform_val, 'WEIGHT_DECAY': WEIGHT_DECAY, 'ADAM_FLAG': ADAM_FLAG, 'RNN_DROPOUT':RNN_DROPOUT,
              'CNN_DROPOUT': CNN_DROPOUT, 'GRAD_CLIP': GRAD_CLIP}


    print('Initializing models...')
    encoder = CNN(NO_WORD_EMBEDDINGS, pretrained_cnn_dir, freeze=True, dropout_prob=CNN_DROPOUT, model_name='resnet152')
    decoder = RNN(VOCAB_SIZE, NO_WORD_EMBEDDINGS, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS,
                  pre_trained_file=pretrained_word_embeddings_file, freeze=False, dropout_prob=RNN_DROPOUT)
    params['encoder'] = encoder
    params['decoder'] = decoder
    encoder.cuda()
    decoder.cuda()

    print('Initializing optimizer...')
    model_paras = list(encoder.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(model_paras, lr=LR, weight_decay=WEIGHT_DECAY)
    params['optimizer'] = optimizer


    pickle.dump(params, open(init_params_file, 'wb'))


# initialize accumulators.
current_epoch = 1
batch_step_count = 1
time_used_global = 0.0
checkpoint = 1


# load lastest model to resume training
Example #12
0
class Trainer:
    """
    训练
    """
    def __init__(self, _hparams):
        utils.set_seed(_hparams.fixed_seed)

        self.train_loader = get_train_loader(_hparams)
        self.val_loader = get_val_loader(_hparams)
        self.encoder = CNN().to(DEVICE)
        self.decoder = RNN(fea_dim=_hparams.fea_dim,
                           embed_dim=_hparams.embed_dim,
                           hid_dim=_hparams.hid_dim,
                           max_sen_len=_hparams.max_sen_len,
                           vocab_pkl=_hparams.vocab_pkl).to(DEVICE)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr)
        self.writer = SummaryWriter()

        self.max_sen_len = _hparams.max_sen_len
        self.val_cap = _hparams.val_cap
        self.ft_encoder_lr = _hparams.ft_encoder_lr
        self.ft_decoder_lr = _hparams.ft_decoder_lr
        self.best_CIDEr = 0

    def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path,
                          val_path):
        print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs',
              '*' * 20)
        self.encoder.fine_tune()
        self.optimizer = torch.optim.Adam([
            {
                'params': self.encoder.parameters(),
                'lr': self.ft_encoder_lr
            },
            {
                'params': self.decoder.parameters(),
                'lr': self.ft_decoder_lr
            },
        ])
        self.training(fine_tune_epochs, val_interval, save_path, val_path)
        self.encoder.froze()
        print('*' * 20, 'fine tune encoder complete', '*' * 20)

    def get_params(self):
        """
        模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数

        :return:
        """
        return list(self.decoder.parameters())

    def training(self, max_epochs, val_interval, save_path, val_path):
        """
        训练

        :param val_path: 保存验证过程生成句子的路径
        :param save_path: 保存模型的地址
        :param val_interval: 验证的间隔
        :param max_epochs: 最大训练的轮次
        :return:
        """
        print('*' * 20, 'train', '*' * 20)
        for epoch in range(max_epochs):
            self.set_train()

            epoch_loss = 0
            epoch_steps = len(self.train_loader)
            for step, (img, cap,
                       cap_len) in tqdm(enumerate(self.train_loader)):
                # batch_size * 3 * 224 * 224
                img = img.to(DEVICE)
                cap = cap.to(DEVICE)

                self.optimizer.zero_grad()

                features = self.encoder.forward(img)
                outputs = self.decoder.forward(features, cap)

                outputs = pack_padded_sequence(outputs,
                                               cap_len - 1,
                                               batch_first=True)[0]
                targets = pack_padded_sequence(cap[:, 1:],
                                               cap_len - 1,
                                               batch_first=True)[0]
                train_loss = self.loss_fn(outputs, targets)
                epoch_loss += train_loss.item()
                train_loss.backward()
                self.optimizer.step()

            epoch_loss /= epoch_steps
            self.writer.add_scalar('epoch_loss', epoch_loss, epoch)
            print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch))
            if (epoch + 1) % val_interval == 0:
                CIDEr = self.validating(epoch, val_path)
                if self.best_CIDEr <= CIDEr:
                    self.best_CIDEr = CIDEr
                    self.save_model(save_path, epoch)

    def save_model(self, save_path, train_epoch):
        """
        保存最好的模型

        :param save_path: 保存模型文件的地址
        :param train_epoch: 当前训练的轮次
        :return:
        """
        model_state_dict = {
            'encoder_state_dict': self.encoder.state_dict(),
            'decoder_state_dict': self.decoder.state_dict(),
            'tran_epoch': train_epoch,
        }
        print('*' * 20, 'save model to: ', save_path, '*' * 20)
        torch.save(model_state_dict, save_path)

    def validating(self, train_epoch, val_path):
        """
        验证

        :param val_path: 保存验证过程生成句子的路径
        :param train_epoch: 当前训练的epoch
        :return:
        """
        print('*' * 20, 'validate', '*' * 20)
        self.set_eval()
        sen_json = []
        with torch.no_grad():
            for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)):
                img = img.to(DEVICE)
                features = self.encoder.forward(img)
                sens, _ = self.decoder.sample(features)
                sen_json.append({'image_id': int(img_id), 'caption': sens[0]})

        with open(val_path, 'w') as f:
            json.dump(sen_json, f)

        result = coco_eval(self.val_cap, val_path)
        scores = {}
        for metric, score in result:
            scores[metric] = score
            self.writer.add_scalar(metric, score, train_epoch)

        return scores['CIDEr']

    def set_train(self):
        self.encoder.train()
        self.decoder.train()

    def set_eval(self):
        self.encoder.eval()
        self.decoder.eval()
Example #13
0
train_X, train_y = util.create_dataset(training_set_scaled, input_size=INPUT_SIZE)
# print(train_X.shape)
''' (672, 60) '''

# Reshape为 (batch, time_step, input_size), 这是放入LSTM的shape
train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1])
# print(train_X.shape)
''' (672, 1, 60) '''



# Part 2 - Building the RNN
rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)


optimiser = torch.optim.Adam(rnn.parameters(), lr=args.lr)
loss_func = nn.MSELoss()
if CUDA:
    loss_func.cuda()


plt.figure(1, figsize=(12, 5))
plt.ion()

hidden_state = None
for epoch in range(args.epochs):

    inputs = Variable(torch.from_numpy(train_X).float())
    labels = Variable(torch.from_numpy(train_y).float())
    if CUDA:
        inputs, labels = inputs.cuda(), labels.cuda()
Example #14
0
class TextClassifier:
    def __init__(self, batch_size, iterations, initial_lr, hidden_size,
                 dropout, kernel_sz, num_layers):

        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.use_cuda else 'cpu')

        self.data = DataReader()
        train_iter, val_iter, test_iter = self.data.init_dataset(
            batch_size, ('cuda:0' if self.use_cuda else 'cpu'))
        self.train_batch_loader = BatchGenerator(train_iter, 'text', 'label')
        self.val_batch_loader = BatchGenerator(val_iter, 'text', 'label')
        self.test_batch_loader = BatchGenerator(test_iter, 'text', 'label')

        # Store hyperparameters
        self.batch_size = batch_size
        self.iterations = iterations
        self.initial_lr = initial_lr

        # Create Model
        emb_size, emb_dim = self.data.TEXT.vocab.vectors.size()
        #         padding = (math.floor(kernel_sz / 2), 0)

        #         self.model = CNN(emb_size=emb_size, emb_dimension=emb_dim,
        #                              output_size=len(self.data.LABEL.vocab),
        #                              dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=padding,
        #                              out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors)

        self.model = RNN(emb_size=emb_size,
                         emb_dimension=emb_dim,
                         pretrained_emb=self.data.TEXT.vocab.vectors,
                         output_size=len(self.data.LABEL.vocab),
                         num_layers=num_layers,
                         hidden_size=hidden_size,
                         dropout=dropout)

        if self.use_cuda:
            self.model.cuda()

    def train(self, min_stride=3):

        train_loss_hist = []
        val_loss_hist = []
        train_acc_hist = []
        val_acc_hist = []
        test_acc_hist = []

        best_score = 0.0

        loss = 0.0

        for itr in range(self.iterations):
            print("\nIteration: " + str(itr + 1))
            optimizer = optim.SGD(self.model.parameters(), lr=self.initial_lr)
            self.model.train()
            total_loss = 0.0
            total_acc = 0.0
            steps = 0

            data_iter = iter(self.train_batch_loader)

            for i in range(len(self.train_batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(data_iter)

                #                 if torch.min(x_len_batch) > min_stride:
                optimizer.zero_grad()

                loss, logits = self.model.forward(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()
                steps += 1

                loss.backward()

                optimizer.step()

            train_loss_hist.append(total_loss / steps)
            train_acc_hist.append(total_acc / len(self.data.train_data))

            val_loss, val_acc = self.eval_model(self.val_batch_loader,
                                                len(self.data.val_data))

            val_loss_hist.append(val_loss)
            val_acc_hist.append(val_acc)

            if val_acc > best_score:
                best_score = val_acc
                test_loss, test_acc = self.eval_model(self.test_batch_loader,
                                                      len(self.data.test_data))

            print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " +
                  str(total_acc / len(self.data.train_data)) + " }")
            print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) +
                  " }")

        test_acc_hist.append(test_acc)

        return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc_hist

    def eval_model(self, batch_loader, N, min_stride=3):
        self.model.eval()

        total_loss = 0.0
        total_acc = 0.0
        steps = 0

        batch_iterator = iter(batch_loader)

        with torch.no_grad():
            for i in range(len(batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(batch_iterator)

                #                 if torch.min(x_len_batch) > min_stride:
                loss, logits = self.model(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()

        return (total_loss / N), (total_acc / N)
Example #15
0
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,
                            n_blocks=args.num_layers, dropout=1.-args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size=args.batch_size
    model.seq_len=args.seq_len
    model.vocab_size=vocab_size
else:
  print("Model type not recognized.")

model = model.to(device)

total_params = sum(p.numel() for p in model.parameters())
print("===> Total Model Parameters: ", total_params)

with open (os.path.join(experiment_path,'exp_config.txt'), 'a') as f:
	f.write('Total Model Parameters:'+'    '+str(total_params)+'\n')

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss()
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0 # we will not touch lr for the first m_flat_lr epochs
Example #16
0
args = get_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if not os.path.exists(args.prepro_root):
    os.makedirs(args.prepro_root)
train_dataset = torch.load(args.prepro_root + args.train_dataset_path)
valid_dataset = torch.load(args.prepro_root + args.valid_dataset_path)
train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
valid_loader = data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False)
valid_tensor = torch.load(args.prepro_root+'valid_writer_keywd.pkl').to(device)

model = RNN(args.num_readers, args.num_writers, args.num_keywords,
            args.num_items, args.num_magazines, args.hid_dim, valid_tensor).to(device)

optimizer = optim.Adam(model.parameters(), lr=args.lr)
criterion = torch.nn.CrossEntropyLoss(ignore_index=0)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(model)
print('# of params : ', params)

if args.start_epoch:
    model.load_state_dict(torch.load(args.save_path+'%d_rnn_attention.pkl' % args.start_epoch))

best_loss = 9999999
for epoch in range(args.num_epochs):
    model.train()
    for i, data in enumerate(tqdm.tqdm(train_loader, desc='Train')):
Example #17
0
# chunk into sequences of length seq_length + 1
batches = list(chunks(batches, seq_length + 1))

# chunk sequences into batches
batches = list(chunks(batches, minibatch_size))

# convert batches to tensors and transpose
batches = [torch.LongTensor(batch).transpose_(0, 1) for batch in batches]

loss_function = nn.CrossEntropyLoss()

model = RNN(minibatch_size, chars_len, hidden_size, chars_len, n_layers,
            minibatch_size).to(DEVICE)
hidden = Variable(model.create_hidden()).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)


def loop(itr, extras, stateful):
    batch_tensor = next(itr)
    if use_cuda:
        batch_tensor = batch_tensor.cuda()

    # reset the model
    model.zero_grad()

    # everything except the last
    input_variable = Variable(batch_tensor[:-1]).to(DEVICE)

    # everything except the first, flattened
    target_variable = Variable(
Example #18
0
def main():
    args = get_args()

    log.info(f'Parsed arguments: \n{pformat(args.__dict__)}')
    assert args.cond_type.lower() in ['none', 'platanios', 'oestling']

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    log.info('Using device {}.'.format(device))

    use_apex = False
    if torch.cuda.is_available() and args.fp16:
        log.info('Loading Nvidia Apex and using AMP')
        from apex import amp, optimizers
        use_apex = True
    else:
        log.info('Using FP32')
        amp = None

    log.info(f'Using time stamp {timestamp} to save models and logs.')

    if not args.no_seed:
        log.info(f'Setting random seed to {args.seed} for reproducibility.')
        torch.manual_seed(args.seed)
        random.seed(args.seed)

    data = Corpus(args.datadir)

    data_splits = [
        {
            'split': 'train',
            'languages': args.dev_langs + args.target_langs,
            'invert_include': True,
        },
        {
            'split': 'valid',
            'languages': args.dev_langs,
        },
        {
            'split': 'test',
            'languages': args.target_langs,
        },
    ]

    if args.refine:
        data_splits.append({
            'split': 'train_100',
            'languages': args.target_langs,
            'ignore_missing': True,
        })

    data_splits = data.make_datasets(data_splits, force_rebuild=args.rebuild)
    train_set, val_set, test_set = data_splits['train'], data_splits[
        'valid'], data_splits['test']
    dictionary = data_splits['dictionary']

    train_language_distr = get_sampling_probabilities(train_set, 1.0)
    train_set = Dataset(train_set,
                        batchsize=args.batchsize,
                        bptt=args.bptt,
                        reset_on_iter=True,
                        language_probabilities=train_language_distr)
    val_set = Dataset(val_set,
                      make_config=True,
                      batchsize=args.valid_batchsize,
                      bptt=args.bptt,
                      eval=True)
    test_set = Dataset(test_set,
                       make_config=True,
                       batchsize=args.test_batchsize,
                       bptt=args.bptt,
                       eval=True)

    train_loader = DataLoader(train_set, num_workers=args.workers)
    val_loader = DataLoader(val_set, num_workers=args.workers)
    test_loader = DataLoader(test_set, num_workers=args.workers)

    if args.refine:
        refine_set = dict()
        for lang, lang_d in data_splits['train_100'].items():
            refine_set[lang] = Dataset({lang: lang_d},
                                       batchsize=args.valid_batchsize,
                                       bptt=args.bptt,
                                       make_config=True)

    n_token = len(dictionary.idx2tkn)

    # Load and preprocess matrix of typological features
    # TODO: implement this, the OEST
    # prior_matrix = load_prior(args.prior, corpus.dictionary.lang2idx)
    # n_components = min(50, *prior_matrix.shape)
    # pca = PCA(n_components=n_components, whiten=True)
    # prior_matrix = pca.fit_transform(prior_matrix)
    prior = None

    model = RNN(args.cond_type,
                prior,
                n_token,
                n_input=args.emsize,
                n_hidden=args.nhidden,
                n_layers=args.nlayers,
                dropout=args.dropouto,
                dropoute=args.dropoute,
                dropouth=args.dropouth,
                dropouti=args.dropouti,
                wdrop=args.wdrop,
                wdrop_layers=[0, 1, 2],
                tie_weights=True).to(device)

    if args.opt_level != 'O2':
        loss_function = SplitCrossEntropyLoss(args.emsize,
                                              splits=[]).to(device)
    else:
        loss_function = CrossEntropyLoss().to(
            device)  # Should be ok to use with a vocabulary of this small size

    if use_apex:
        optimizer = optimizers.FusedAdam(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.wdecay)
    else:
        params = list(filter(lambda p: p.requires_grad,
                             model.parameters())) + list(
                                 loss_function.parameters())
        optimizer = Adam(params, lr=args.lr, weight_decay=args.wdecay)

    if use_apex:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.opt_level)

    parameters = {
        'model': model,
        'optimizer': optimizer,
        'loss_function': loss_function,
        'use_apex': use_apex,
        'amp': amp if use_apex else None,
        'clip': args.clip,
        'alpha': args.alpha,
        'beta': args.beta,
        'bptt': args.bptt,
        'device': device,
        'prior': args.prior,
    }

    # Add backward hook for gradient clipping
    if args.clip:
        if use_apex:
            for p in amp.master_params(optimizer):
                p.register_hook(
                    lambda grad: torch.clamp(grad, -args.clip, args.clip))
        else:
            for p in model.parameters():
                p.register_hook(
                    lambda grad: torch.clamp(grad, -args.clip, args.clip))

    if args.prior == 'vi':
        prior = VIPrior(model, device=device)
        parameters['prior'] = prior

        def sample_weights(module: torch.nn.Module, input: torch.Tensor):
            prior.sample_weights(module)

        sample_weights_hook = model.register_forward_pre_hook(sample_weights)

    # Load model checkpoint if available
    start_epoch = 1
    if args.resume:
        if args.checkpoint is None:
            log.error(
                'No checkpoint passed. Specify it using the --checkpoint flag')
            checkpoint = None
        else:
            log.info('Loading the checkpoint at {}'.format(args.checkpoint))
            checkpoint = load_model(args.checkpoint, **parameters)

            start_epoch = checkpoint['epoch']

        if args.wdrop:
            for rnn in model.rnns:
                if isinstance(rnn, WeightDrop):
                    rnn.dropout = args.wdrop
                elif rnn.zoneout > 0:
                    rnn.zoneout = args.wdrop

    saved_models = list()

    result_str = '| Language {} | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}'

    def test():
        log.info('=' * 89)
        log.info('Running test set (zero-shot results)...')
        test_loss, avg_loss = evaluate(test_loader, **parameters)
        log.info('Test set finished | test loss {} | test bpc {}'.format(
            test_loss, test_loss / math.log(2)))

        for lang, avg_l_loss in avg_loss.items():
            langstr = dictionary.idx2lang[lang]
            log.info(
                result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss),
                                  avg_l_loss / math.log(2)))

        log.info('=' * 89)

    if args.train:
        f = 1.
        stored_loss = 1e32
        epochs_no_improve = 0

        val_losses = list()

        # calculate specific language lr
        data_spec_count = sum([len(ds) for l, ds in train_set.data.items()])
        data_spec_avg = data_spec_count / len(train_set.data.items())
        data_spec_lrweights = dict([(l, data_spec_avg / len(ds))
                                    for l, ds in train_set.data.items()])

        # estimate total number of steps
        total_steps = sum(
            [len(ds) // args.bptt
             for l, ds in train_set.data.items()]) * args.no_epochs
        steps = 0

        try:
            pbar = tqdm.trange(start_epoch,
                               args.no_epochs + 1,
                               position=1,
                               dynamic_ncols=True)
            for epoch in pbar:

                steps = train(train_loader,
                              lr_weights=data_spec_lrweights,
                              **parameters,
                              total_steps=total_steps,
                              steps=steps,
                              scaling=args.scaling,
                              n_samples=args.n_samples,
                              tb_writer=tb_writer)

                val_loss, _ = evaluate(val_loader, **parameters)
                pbar.set_description('Epoch {} | Val loss {}'.format(
                    epoch, val_loss))

                # Save model
                if args.prior == 'vi':
                    sample_weights_hook.remove()

                filename = path.join(
                    args.checkpoint_dir, '{}_epoch{}{}_{}.pth'.format(
                        timestamp, epoch, '_with_apex' if use_apex else '',
                        args.prior))
                torch.save(make_checkpoint(epoch + 1, **parameters), filename)
                saved_models.append(filename)

                if args.prior == 'vi':
                    sample_weights_hook = model.register_forward_pre_hook(
                        sample_weights)

                # Early stopping
                if val_loss < stored_loss:
                    epochs_no_improve = 0
                    stored_loss = val_loss
                else:
                    epochs_no_improve += 1

                if epochs_no_improve == args.patience:
                    log.info('Early stopping at epoch {}'.format(epoch))
                    break

                val_losses.append(val_loss)

                # Reduce lr every 1/3 total epochs
                if epoch - 1 > f / 3 * args.no_epochs:
                    log.info('Epoch {}/{}. Dividing LR by 10'.format(
                        epoch, args.no_epochs))
                    for g in optimizer.param_groups:
                        g['lr'] = g['lr'] / 10

                    f += 1.
            test()
        except KeyboardInterrupt:
            log.info('Registered KeyboardInterrupt. Stopping training.')
            log.info('Saving last model to disk')

            if args.prior == 'vi':
                sample_weights_hook.remove()

            torch.save(
                make_checkpoint(epoch, **parameters),
                path.join(
                    args.checkpoint_dir, '{}_epoch{}{}_{}.pth'.format(
                        timestamp, epoch, '_with_apex' if use_apex else '',
                        args.prior)))
            return
    elif args.test:
        test()

    # Only test on existing languages if there are no held out languages
    if not args.target_langs:
        exit(0)

    importance = 1e-5

    # If use UNIV, calculate informed prior, else use boring prior
    if args.prior == 'laplace':
        if not isinstance(
                prior,
                LaplacePrior):  # only calculate matrix if it is not supplied.
            log.info('Creating laplace approximation dataset')
            laplace_set = Dataset(data_splits['train'],
                                  batchsize=args.batchsize,
                                  bptt=100,
                                  reset_on_iter=True)
            laplace_loader = DataLoader(laplace_set, num_workers=args.workers)
            log.info('Creating Laplacian prior')
            prior = LaplacePrior(model,
                                 loss_function,
                                 laplace_loader,
                                 use_apex=use_apex,
                                 amp=amp,
                                 device=device)
            parameters['prior'] = prior

            torch.save(
                make_checkpoint('fisher_matrix', **parameters),
                path.join(
                    args.checkpoint_dir, '{}_fishers_matrix{}_{}.pth'.format(
                        timestamp, '_with_apex' if use_apex else '',
                        args.prior)))
        importance = 1e5

    elif args.prior == 'ninf':
        log.info('Creating non-informative Gaussian prior')
        parameters['prior'] = GaussianPrior()
    elif args.prior == 'vi':
        importance = 1e-5
    elif args.prior == 'hmc':
        raise NotImplementedError
    else:
        raise ValueError(
            f'Passed prior {args.prior} is not an implemented inference technique.'
        )

    best_model = saved_models[-1] if not len(
        saved_models) == 0 else args.checkpoint

    # Remove sampling hook from model
    if args.prior == 'vi':
        sample_weights_hook.remove()

    # Refine on 100 samples on each target
    if args.refine:
        # reset learning rate
        optimizer.param_groups[0]['lr'] = args.lr
        loss = 0

        results = dict()

        # Create individual tests sets
        test_sets = dict()
        for lang, lang_d in data_splits['test'].items():
            test_sets[lang] = DataLoader(Dataset({lang: lang_d},
                                                 make_config=True,
                                                 batchsize=args.test_batchsize,
                                                 bptt=args.bptt,
                                                 eval=True),
                                         num_workers=args.workers)

        for lang, lang_data in tqdm.tqdm(refine_set.items()):
            final_loss = False
            refine_dataloader = DataLoader(lang_data, num_workers=args.workers)
            load_model(best_model, **parameters)

            log.info(f'Refining for language {dictionary.idx2lang[lang]}')
            for epoch in range(1, args.refine_epochs + 1):
                refine(refine_dataloader, **parameters, importance=importance)
                if epoch % 5 == 0:
                    final_loss = True
                    loss, avg_loss = evaluate(test_sets[lang],
                                              model,
                                              loss_function,
                                              only_l=lang,
                                              report_all=True,
                                              device=device)

                    for lang, avg_l_loss in avg_loss.items():
                        langstr = dictionary.idx2lang[lang]
                        log.debug(
                            result_str.format(langstr, avg_l_loss,
                                              math.exp(avg_l_loss),
                                              avg_l_loss / math.log(2)))

            if not final_loss:
                loss, avg_loss = evaluate(test_sets[lang],
                                          model,
                                          loss_function,
                                          only_l=lang,
                                          report_all=True,
                                          device=device)

            for lang, avg_l_loss in avg_loss.items():
                langstr = dictionary.idx2lang[lang]
                log.info(
                    result_str.format(langstr, avg_l_loss,
                                      math.exp(avg_l_loss),
                                      avg_l_loss / math.log(2)))
                results[lang] = avg_l_loss

        log.info('=' * 89)
        log.info('FINAL FEW SHOT RESULTS: ')
        log.info('=' * 89)
        for lang, avg_l_loss in results.items():
            langstr = dictionary.idx2lang[lang]
            log.info(
                result_str.format(langstr, avg_l_loss, math.exp(avg_l_loss),
                                  avg_l_loss / math.log(2)))
        log.info('=' * 89)
Example #19
0
def main(args):
    this_dir = osp.join(osp.dirname(__file__), '.')
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    datasets = {
        phase: DataLayer(
            data_root=osp.join(args.data_root, phase),
            phase=phase,
        )
        for phase in args.phases
    }

    data_loaders = {
        phase: data.DataLoader(
            datasets[phase],
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
        )
        for phase in args.phases
    }

    model = Model(
        input_size=args.input_size,
        hidden_size=args.hidden_size,
        bidirectional=args.bidirectional,
        num_classes=args.num_classes,
    ).apply(utl.weights_init).to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    softmax = nn.Softmax(dim=1).to(device)
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr)

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        losses = {phase: 0.0 for phase in args.phases}
        corrects = {phase: 0.0 for phase in args.phases}

        start = time.time()
        for phase in args.phases:
            training = 'Test' not in phase
            if training:
                model.train(True)
            else:
                if epoch in args.test_intervals:
                    model.train(False)
                else:
                    continue

            with torch.set_grad_enabled(training):
                for batch_idx, (spatial, temporal, length,
                                target) in enumerate(data_loaders[phase]):
                    spatial_input = torch.zeros(*spatial.shape)
                    temporal_input = torch.zeros(*temporal.shape)
                    target_input = []
                    length_input = []

                    index = utl.argsort(length)[::-1]
                    for i, idx in enumerate(index):
                        spatial_input[i] = spatial[idx]
                        temporal_input[i] = temporal[idx]
                        target_input.append(target[idx])
                        length_input.append(length[idx])

                    spatial_input = spatial_input.to(device)
                    temporal_input = temporal_input.to(device)
                    target_input = torch.LongTensor(target_input).to(device)
                    pack1 = pack_padded_sequence(spatial_input,
                                                 length_input,
                                                 batch_first=True)
                    pack2 = pack_padded_sequence(temporal_input,
                                                 length_input,
                                                 batch_first=True)

                    score = model(pack1, pack2)
                    loss = criterion(score, target_input)
                    losses[phase] += loss.item() * target_input.shape[0]
                    if args.debug:
                        print(loss.item())

                    if training:
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                    else:
                        pred = torch.max(softmax(score), 1)[1].cpu()
                        corrects[phase] += torch.sum(
                            pred == target_input.cpu()).item()
        end = time.time()

        print('Epoch {:2} | '
              'Train loss: {:.5f} Val loss: {:.5f} | '
              'Test loss: {:.5f} accuracy: {:.5f} | '
              'running time: {:.2f} sec'.format(
                  epoch,
                  losses['Train'] / len(data_loaders['Train'].dataset),
                  losses['Validation'] /
                  len(data_loaders['Validation'].dataset),
                  losses['Test'] / len(data_loaders['Test'].dataset),
                  corrects['Test'] / len(data_loaders['Test'].dataset),
                  end - start,
              ))

        if epoch in args.test_intervals:
            torch.save(
                model.state_dict(),
                osp.join(this_dir,
                         './state_dict-epoch-' + str(epoch) + '.pth'))
Example #20
0
# init model
if arch == "rnn":
    model = RNN(**params_model, output_size=num_classes).to(device=device)
elif arch == "gru":
    model = GRU(**params_model, output_size=num_classes).to(device=device)
elif arch == "lstm":
    model = LSTM(**params_model, output_size=num_classes).to(device=device)
else:
    raise Exception(
        "Only 'rnn', 'gru', and 'lstm' are available model options.")

print("Model size: {0}".format(count_parameters(model)))

criterion = nn.NLLLoss()
op = torch.optim.SGD(model.parameters(), **params_op)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(op,
                                                       patience=4,
                                                       factor=0.5,
                                                       verbose=True)

# resume from checkpoint
if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_acc1 = checkpoint['best_acc1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
Example #21
0
def main(argv):
    global args
    args = parser.parse_args(argv)
    if args.threads == -1:
        args.threads = torch.multiprocessing.cpu_count() - 1 or 1
    print('===> Configuration')
    print(args)

    cuda = args.cuda
    if cuda:
        if torch.cuda.is_available():
            print('===> {} GPUs are available'.format(
                torch.cuda.device_count()))
        else:
            raise Exception("No GPU found, please run with --no-cuda")

    # Fix the random seed for reproducibility
    # random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    # Data loading
    print('===> Loading entire datasets')
    with open(args.data_path + 'train.seqs', 'rb') as f:
        train_seqs = pickle.load(f)
    with open(args.data_path + 'train.labels', 'rb') as f:
        train_labels = pickle.load(f)
    with open(args.data_path + 'valid.seqs', 'rb') as f:
        valid_seqs = pickle.load(f)
    with open(args.data_path + 'valid.labels', 'rb') as f:
        valid_labels = pickle.load(f)
    with open(args.data_path + 'test.seqs', 'rb') as f:
        test_seqs = pickle.load(f)
    with open(args.data_path + 'test.labels', 'rb') as f:
        test_labels = pickle.load(f)

    max_code = max(
        map(lambda p: max(map(lambda v: max(v), p)),
            train_seqs + valid_seqs + test_seqs))
    num_features = max_code + 1

    print("     ===> Construct train set")
    train_set = VisitSequenceWithLabelDataset(train_seqs,
                                              train_labels,
                                              num_features,
                                              reverse=False)
    print("     ===> Construct validation set")
    valid_set = VisitSequenceWithLabelDataset(valid_seqs,
                                              valid_labels,
                                              num_features,
                                              reverse=False)
    print("     ===> Construct test set")
    test_set = VisitSequenceWithLabelDataset(test_seqs,
                                             test_labels,
                                             num_features,
                                             reverse=False)

    train_loader = DataLoader(dataset=train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=visit_collate_fn,
                              num_workers=args.threads)
    valid_loader = DataLoader(dataset=valid_set,
                              batch_size=args.eval_batch_size,
                              shuffle=False,
                              collate_fn=visit_collate_fn,
                              num_workers=args.threads)
    test_loader = DataLoader(dataset=test_set,
                             batch_size=args.eval_batch_size,
                             shuffle=False,
                             collate_fn=visit_collate_fn,
                             num_workers=args.threads)
    print('===> Dataset loaded!')

    # Create model
    print('===> Building a Model')

    model = RNN(dim_input=num_features, dim_emb=128, dim_hidden=128)

    if cuda:
        model = model.cuda()
    print(model)
    print('===> Model built!')

    weight_class0 = torch.mean(torch.FloatTensor(train_set.labels))
    weight_class1 = 1.0 - weight_class0
    weight = torch.FloatTensor([weight_class0, weight_class1])

    criterion = nn.CrossEntropyLoss(weight=weight)
    if args.cuda:
        criterion = criterion.cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                nesterov=False,
                                weight_decay=args.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, 'min')

    best_valid_epoch = 0
    best_valid_loss = sys.float_info.max

    train_losses = []
    valid_losses = []

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    for ei in trange(args.epochs, desc="Epochs"):
        # Train
        _, _, train_loss = rnn_epoch(train_loader,
                                     model,
                                     criterion=criterion,
                                     optimizer=optimizer,
                                     train=True)
        train_losses.append(train_loss)

        # Eval
        _, _, valid_loss = rnn_epoch(valid_loader, model, criterion=criterion)
        valid_losses.append(valid_loss)

        scheduler.step(valid_loss)

        is_best = valid_loss < best_valid_loss

        if is_best:
            best_valid_epoch = ei
            best_valid_loss = valid_loss

            # evaluate on the test set
            test_y_true, test_y_pred, test_loss = rnn_epoch(
                test_loader, model, criterion=criterion)

            if args.cuda:
                test_y_true = test_y_true.cpu()
                test_y_pred = test_y_pred.cpu()

            test_auc = roc_auc_score(test_y_true.numpy(),
                                     test_y_pred.numpy()[:, 1],
                                     average="weighted")
            test_aupr = average_precision_score(test_y_true.numpy(),
                                                test_y_pred.numpy()[:, 1],
                                                average="weighted")

            with open(args.save + 'train_result.txt', 'w') as f:
                f.write('Best Validation Epoch: {}\n'.format(ei))
                f.write('Best Validation Loss: {}\n'.format(valid_loss))
                f.write('Train Loss: {}\n'.format(train_loss))
                f.write('Test Loss: {}\n'.format(test_loss))
                f.write('Test AUROC: {}\n'.format(test_auc))
                f.write('Test AUPR: {}\n'.format(test_aupr))

            torch.save(model, args.save + 'best_model.pth')
            torch.save(model.state_dict(), args.save + 'best_model_params.pth')

        # plot
        if args.plot:
            plt.figure(figsize=(12, 9))
            plt.plot(np.arange(len(train_losses)),
                     np.array(train_losses),
                     label='Training Loss')
            plt.plot(np.arange(len(valid_losses)),
                     np.array(valid_losses),
                     label='Validation Loss')
            plt.xlabel('epoch')
            plt.ylabel('Loss')
            plt.legend(loc="best")
            plt.tight_layout()
            plt.savefig(args.save + 'loss_plot.eps', format='eps')
            plt.close()

    print('Best Validation Epoch: {}\n'.format(best_valid_epoch))
    print('Best Validation Loss: {}\n'.format(best_valid_loss))
    print('Train Loss: {}\n'.format(train_loss))
    print('Test Loss: {}\n'.format(test_loss))
    print('Test AUROC: {}\n'.format(test_auc))
    print('Test AUPR: {}\n'.format(test_aupr))
if __name__ == "__main__":
    train_iter, dev_iter, test_iter, TEXT, LABEL = load_iters(
        batch_size, device, data_path, vectors)
    vocab_size = len(TEXT.vocab.itos)
    # build model
    if use_rnn:
        model = RNN(vocab_size, embed_size, hidden_size, num_layers,
                    num_classes, bidirectional, dropout_rate)
    else:
        model = CNN(vocab_size, embed_size, num_classes, num_filters,
                    kernel_sizes, dropout_rate)
    if vectors is not None:
        model.embed.from_pretrained(TEXT.vocab.vectors, freeze=freeze)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    writer = SummaryWriter('logs', comment="rnn")
    for epoch in trange(train_epochs, desc="Epoch"):
        model.train()
        ep_loss = 0
        for step, batch in enumerate(tqdm(train_iter, desc="Iteration")):
            (inputs, lens), labels = batch.text, batch.label
            outputs = model(inputs, lens)
            loss = loss_func(outputs, labels)
            ep_loss += loss.item()

            model.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()
Example #23
0
def train():
    global_step = 0

    # Loaded pretrained VAE
    vae = VAE(hp.vsize).to(DEVICE)
    ckpt = sorted(glob.glob(os.path.join(hp.ckpt_dir, 'vae',
                                         '*k.pth.tar')))[-1]
    vae_state = torch.load(ckpt)
    vae.load_state_dict(vae_state['model'])
    vae.eval()
    print('Loaded vae ckpt {}'.format(ckpt))

    rnn = RNN(hp.vsize, hp.asize, hp.rnn_hunits).to(DEVICE)
    ckpts = sorted(glob.glob(os.path.join(hp.ckpt_dir, 'rnn', '*k.pth.tar')))
    if ckpts:
        ckpt = ckpts[-1]
        rnn_state = torch.load(ckpt)
        rnn.load_state_dict(rnn_state['model'])
        global_step = int(os.path.basename(ckpt).split('.')[0][:-1]) * 1000
        print('Loaded rnn ckpt {}'.format(ckpt))

    data_path = hp.data_dir if not hp.extra else hp.extra_dir
    # optimizer = torch.optim.RMSprop(rnn.parameters(), lr=1e-3)
    optimizer = torch.optim.Adam(rnn.parameters(), lr=1e-4)
    dataset = GameEpisodeDataset(data_path, seq_len=hp.seq_len)
    loader = DataLoader(dataset,
                        batch_size=1,
                        shuffle=True,
                        drop_last=True,
                        num_workers=hp.n_workers,
                        collate_fn=collate_fn)
    testset = GameEpisodeDataset(data_path, seq_len=hp.seq_len, training=False)
    test_loader = DataLoader(testset,
                             batch_size=1,
                             shuffle=False,
                             drop_last=False,
                             collate_fn=collate_fn)

    ckpt_dir = os.path.join(hp.ckpt_dir, 'rnn')
    sample_dir = os.path.join(ckpt_dir, 'samples')
    os.makedirs(sample_dir, exist_ok=True)

    l1 = nn.L1Loss()

    while global_step < hp.max_step:
        # GO_states = torch.zeros([hp.batch_size, 1, hp.vsize+hp.asize]).to(DEVICE)
        with tqdm(enumerate(loader), total=len(loader), ncols=70,
                  leave=False) as t:
            t.set_description('Step {}'.format(global_step))
            for idx, (obs, actions) in t:
                obs, actions = obs.to(DEVICE), actions.to(DEVICE)
                with torch.no_grad():
                    latent_mu, latent_var = vae.encoder(obs)  # (B*T, vsize)
                    z = latent_mu
                    # z = vae.reparam(latent_mu, latent_var) # (B*T, vsize)
                    z = z.view(-1, hp.seq_len, hp.vsize)  # (B*n_seq, T, vsize)
                # import pdb; pdb.set_trace()

                next_z = z[:, 1:, :]
                z, actions = z[:, :-1, :], actions[:, :-1, :]
                states = torch.cat([z, actions], dim=-1)  # (B, T, vsize+asize)
                # states = torch.cat([GO_states, next_states[:,:-1,:]], dim=1)
                x, _, _ = rnn(states)

                loss = l1(x, next_z)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                global_step += 1

                if global_step % hp.log_interval == 0:
                    eval_loss = evaluate(test_loader, vae, rnn, global_step)
                    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    with open(os.path.join(ckpt_dir, 'train.log'), 'a') as f:
                        log = '{} || Step: {}, train_loss: {:.4f}, loss: {:.4f}\n'.format(
                            now, global_step, loss.item(), eval_loss)
                        f.write(log)
                    S = 2
                    y = vae.decoder(x[S, :, :])
                    v = vae.decoder(next_z[S, :, :])
                    save_image(
                        y,
                        os.path.join(sample_dir,
                                     '{:04d}-rnn.png'.format(global_step)))
                    save_image(
                        v,
                        os.path.join(sample_dir,
                                     '{:04d}-vae.png'.format(global_step)))
                    save_image(
                        obs[S:S + hp.seq_len - 1],
                        os.path.join(sample_dir,
                                     '{:04d}-obs.png'.format(global_step)))

                if global_step % hp.save_interval == 0:
                    d = {
                        'model': rnn.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    }
                    torch.save(
                        d,
                        os.path.join(
                            ckpt_dir,
                            '{:03d}k.pth.tar'.format(global_step // 1000)))
# All 14 years of data
X_test_dep_std  = X
X_train_dep_std = np.expand_dims(X_train_dep_std, axis=0)
y_train_dep_std = np.expand_dims(y_train_dep_std, axis=0)
X_test_dep_std = np.expand_dims(X_test_dep_std, axis=0)

# Transfer to Pytorch Variable
X_train_dep_std = Variable(torch.from_numpy(X_train_dep_std).float())
y_train_dep_std = Variable(torch.from_numpy(y_train_dep_std).float())
X_test_dep_std = Variable(torch.from_numpy(X_test_dep_std).float())

# Define rnn model
model = RNN(input_size=5, hidden_size=40, num_layers=2, class_size=1, dropout=0.5, rnn_type='lstm',dropout_bool=True)
# Define optimization function
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)   # optimize all rnn parameters
# Define loss function
loss_func = nn.MSELoss()

# Start training
for iter in range(10000):
    model.train()
    prediction = model(X_train_dep_std)
    loss = loss_func(prediction, y_train_dep_std)
    optimizer.zero_grad()  # clear gradients for this training step
    loss.backward()        # back propagation, compute gradients
    optimizer.step()
    if iter % 100 == 0:
        print("iteration: %s, loss: %s" % (iter, loss.item()))

# Save model
Example #25
0
if args.freeze_embedding:
    model.embedding.weight.requires_grad = False

model = model.to(device)


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


myprint(f'The model has {count_parameters(model):,} trainable parameters')

if args.optimizer == "SGD":
    print("Using SGD")
    optimizer = optim.SGD(model.parameters(),
                          weight_decay=args.weight_decay,
                          lr=args.lr,
                          momentum=args.momentum,
                          nesterov=args.nesterov)
else:
    print("Using Adam")
    optimizer = optim.Adam(model.parameters(),
                           weight_decay=args.weight_decay,
                           lr=args.lr,
                           amsgrad=args.amsgrad)

criterion = nn.CrossEntropyLoss().to(device)
accuracy = categorical_accuracy
if args.task == "MIMIC-D":
    accuracy = f1_score
Example #26
0
    train_set = PCDataset(DATA_ROOT, exclude_patterns=val_signer)
    val_set = PCDataset(DATA_ROOT, exclude_patterns=signerlist)

    print('-' * 80)
    print(
        f'[INFO] Training on {len(train_set)} samples from {len(signerlist)} signers'
    )
    print(
        f'[INFO] Validating on {len(val_set)} samples from {len(val_signer)} signers'
    )

    train_loader = PCDataLoader(train_set)
    val_loader = PCDataLoader(val_set)

    model = RNN(60, 100, 30)
    criterion = torch.nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.1)

    _, history = train(model, train_loader, val_loader, criterion, optimizer,
                       scheduler, NUM_EPOCH, None, SAVE_PATH)

    print(
        f'[INFO] Training finished with best validation accuracy: {max(history["val_acc"]):.4f}'
    )
    print('-' * 80)

    plot_history(history, SAVE_PATH, 'svg')
Example #27
0
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

model = model.to(device)

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss()
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs

###############################################################################
#
# DEFINE COMPUTATIONS FOR PROCESSING ONE EPOCH
#
###############################################################################


def repackage_hidden(h):
    """
Example #28
0
else:
    # no embedding layer (one-hot encoding)
    model = OneHotRNN(vocabulary=dataset.vocabulary,
                      rnn_type=args.rnn_type,
                      hidden_size=args.hidden_size,
                      n_layers=args.n_layers,
                      dropout=args.dropout,
                      bidirectional=args.bidirectional,
                      nonlinearity=args.nonlinearity)

# optionally, load model parameters from file
if args.pretrain_model is not None:
    model.load_state_dict(torch.load(args.pretrain_model))

# set up optimizer
optimizer = optim.Adam(model.parameters(),
                       betas=(0.9, 0.999), ## default
                       eps=1e-08, ## default
                       lr=args.learning_rate)

# set up early stopping
early_stop = EarlyStopping(patience=args.patience)

# set up training schedule file
sched_filename = "training_schedule-" + str(args.sample_idx + 1) + ".csv"
sched_file = os.path.join(args.output_dir, sched_filename)

# iterate over epochs
counter = 0
for epoch in range(args.max_epochs):
    # iterate over batches
Example #29
0
print("Train Size: ", len(loader_train.sampler.indices))
print("Validation Size: ", len(loader_val.sampler.indices))

num_classes = len(train_set.label_encoder.classes_)
model = RNN(embeddings, num_classes=num_classes, **_hparams)

weights = class_weigths(train_set.labels)

if torch.cuda.is_available():
    model.cuda()
    weights = weights.cuda()

criterion = torch.nn.CrossEntropyLoss(weight=weights)

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=lr)

#############################################################
# Train
#############################################################

best_val_loss = None

colms_l = ['Train_Loss', 'Val_Loss']
colms_acc = ['Train_Acc', 'Val_Acc']
colms_f1 = ['Train_F1', 'Val_F1']

df_l = pd.DataFrame(columns=colms_l, index=range(1, EPOCHS + 1))
df_acc = pd.DataFrame(columns=colms_acc, index=range(1, EPOCHS + 1))
df_f1 = pd.DataFrame(columns=colms_f1, index=range(1, EPOCHS + 1))
Example #30
0
def main(args):
    # hyperparameters
    batch_size = args.batch_size
    num_workers = 1

    # Image Preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    # load COCOs dataset
    IMAGES_PATH = 'data/train2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json'

    vocab = load_vocab()
    train_loader = get_coco_data_loader(path=IMAGES_PATH,
                                        json=CAPTION_FILE_PATH,
                                        vocab=vocab,
                                        transform=transform,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=num_workers)

    IMAGES_PATH = 'data/val2014'
    CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json'
    val_loader = get_coco_data_loader(path=IMAGES_PATH,
                                      json=CAPTION_FILE_PATH,
                                      vocab=vocab,
                                      transform=transform,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers)

    losses_val = []
    losses_train = []

    # Build the models
    ngpu = 1
    initial_step = initial_epoch = 0
    embed_size = args.embed_size
    num_hiddens = args.num_hidden
    learning_rate = 1e-3
    num_epochs = 3
    log_step = args.log_step
    save_step = 500
    checkpoint_dir = args.checkpoint_dir

    encoder = CNN(embed_size)
    decoder = RNN(embed_size,
                  num_hiddens,
                  len(vocab),
                  1,
                  rec_unit=args.rec_unit)

    # Loss
    criterion = nn.CrossEntropyLoss()

    if args.checkpoint_file:
        encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models(
            args.checkpoint_file, args.sample)
        initial_step, initial_epoch, losses_train, losses_val = meta
        encoder.load_state_dict(encoder_state_dict)
        decoder.load_state_dict(decoder_state_dict)
    else:
        params = list(decoder.parameters()) + list(
            encoder.linear.parameters()) + list(encoder.batchnorm.parameters())
        optimizer = torch.optim.Adam(params, lr=learning_rate)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    if args.sample:
        return utils.sample(encoder, decoder, vocab, val_loader)

    # Train the Models
    total_step = len(train_loader)
    try:
        for epoch in range(initial_epoch, num_epochs):

            for step, (images, captions,
                       lengths) in enumerate(train_loader, start=initial_step):

                # Set mini-batch dataset
                images = utils.to_var(images, volatile=True)
                captions = utils.to_var(captions)
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]

                # Forward, Backward and Optimize
                decoder.zero_grad()
                encoder.zero_grad()

                if ngpu > 1:
                    # run on multiple GPU
                    features = nn.parallel.data_parallel(
                        encoder, images, range(ngpu))
                    outputs = nn.parallel.data_parallel(
                        decoder, features, range(ngpu))
                else:
                    # run on single GPU
                    features = encoder(images)
                    outputs = decoder(features, captions, lengths)

                train_loss = criterion(outputs, targets)
                losses_train.append(train_loss.data[0])
                train_loss.backward()
                optimizer.step()

                # Run validation set and predict
                if step % log_step == 0:
                    encoder.batchnorm.eval()
                    # run validation set
                    batch_loss_val = []
                    for val_step, (images, captions,
                                   lengths) in enumerate(val_loader):
                        images = utils.to_var(images, volatile=True)
                        captions = utils.to_var(captions, volatile=True)

                        targets = pack_padded_sequence(captions,
                                                       lengths,
                                                       batch_first=True)[0]
                        features = encoder(images)
                        outputs = decoder(features, captions, lengths)
                        val_loss = criterion(outputs, targets)
                        batch_loss_val.append(val_loss.data[0])

                    losses_val.append(np.mean(batch_loss_val))

                    # predict
                    sampled_ids = decoder.sample(features)
                    sampled_ids = sampled_ids.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(sampled_ids, vocab)
                    print('Sample:', sentence)

                    true_ids = captions.cpu().data.numpy()[0]
                    sentence = utils.convert_back_to_text(true_ids, vocab)
                    print('Target:', sentence)

                    print(
                        'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}'
                        .format(epoch, step, losses_train[-1], losses_val[-1]))
                    encoder.batchnorm.train()

                # Save the models
                if (step + 1) % save_step == 0:
                    utils.save_models(encoder, decoder, optimizer, step, epoch,
                                      losses_train, losses_val, checkpoint_dir)
                    utils.dump_losses(
                        losses_train, losses_val,
                        os.path.join(checkpoint_dir, 'losses.pkl'))

    except KeyboardInterrupt:
        pass
    finally:
        # Do final save
        utils.save_models(encoder, decoder, optimizer, step, epoch,
                          losses_train, losses_val, checkpoint_dir)
        utils.dump_losses(losses_train, losses_val,
                          os.path.join(checkpoint_dir, 'losses.pkl'))