Esempio n. 1
0
    def create_model(self, device):
        ntokens = len(self.corpus.dictionary)
        net = model.RNNModel(self.model_type, ntokens, self.emsize, self.nhid, self.nlayers, self.dropout, self.tied).to(device)

        self.criterion = nn.NLLLoss()

        return net
Esempio n. 2
0
def objective(hparams):
    """Minimizing validation loss wrt our hyperparameters"""
    nlayers = int(hparams[0])
    emsize = int(hparams[1])
    nhid = int(hparams[2])
    rnn_type = str(hparams[3])

    ntokens = len(corpus.dictionary)

    global model
    model = model.RNNModel(rnn_type, ntokens, emsize, nhid, nlayers, args.dropout, args.tied).to(device)
    #model = nn.DataParallel(model)

    global lr
    lr = args.lr
    best_val_loss = None
    for epoch in range(1, args.epochs+1):
        train()
        val_loss = evaluate(val_data)

        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))

        if not best_val_loss or val_loss < best_val_loss:
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0

    return val_loss
def test_torch_lm():
    # device = torch.device('cuda:4')
    net = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                         args.nlayers, args.dropout, args.tied)
    # net = loaded_model(pretrained=False)

    x = np.arange(80 * 5 * 32 / 20).reshape(20, 32).astype('int64')
    init_hidden = np.zeros((20, 32, 200), dtype='float32')
    init_hidden = torch.tensor(init_hidden)
    init_cell = np.zeros((20, 32, 200), dtype='float32')
    pre_cell = torch.tensor(init_cell)
    # x = torch.LongTensor(x)

    # net.to(device)
    # x = x.to(device)
    # init_hidden = init_hidden.to(device)
    # init_cell = pre_cell.to(device)

    with torch.no_grad():
        # warmup
        out = net(x, init_hidden, pre_cell)

        t1 = time.time()
        for i in range(100):
            out = net(x, init_hidden, pre_cell)
        t2 = time.time()
        print('torch cost: {} ms.'.format((t2 - t1) * 10))
Esempio n. 4
0
    def __init__(self):
        # TODO can we just change this to 1?
        self.eval_batch_size = 10
        self.train_data = self.batchify(corpus.train, args.batch_size)
        self.val_data = self.batchify(corpus.valid, self.eval_batch_size)
        self.test_data = self.batchify(corpus.test, self.eval_batch_size)
        # self.test_data = self.batchify(corpus.test, 1)

        ###############################################################################
        # Build the model
        ###############################################################################

        ntokens = len(corpus.dictionary)
        if args.load:
            f = open(args.save, 'rb')
            self.model = torch.load(f)
            f.close()
        else:
            self.model = model.RNNModel(args.model, ntokens, args.emsize,
                                        args.nhid, args.nlayers, args.dropout,
                                        args.tied)
        if args.cuda:
            self.model.cuda()

        self.criterion = nn.CrossEntropyLoss()
Esempio n. 5
0
    def model_func(wrapped_import, inputs):
        ###############################################################################
        # Build the model
        ###############################################################################
        if wrapped_import:
            nn = wrapped_import("torch.nn")
            model = wrapped_import("model")
        else:
            from torch import nn
            import model

        if args.model == 'Transformer':
            net = model.TransformerModel(ntokens, args.emsize, args.nhead,
                                         args.nhid, args.nlayers, args.dropout)
        else:
            net = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                                 args.nlayers, args.dropout, args.tied)

        net.eval()  # for verification, need no random elements (e.g. dropout)
        # criterion = nn.NLLLoss()

        if args.model != 'Transformer':
            hidden = net.init_hidden(args.batch_size)
        else:
            hidden = None
        with torch.no_grad():
            if args.model == 'Transformer':
                output = net(inputs)
                output = output.view(-1, ntokens)
            else:
                output, hidden = net(inputs, hidden)

            return output
Esempio n. 6
0
def train_rnn_model(x_train, y_train, x_val, y_val, params):
    rnn_model = model.RNNModel(
        max_seq_length=x_train.shape[1],
        input_size=params["input_size"],
        output_size=params["output_size"],
        embed_dim=params["embed_dim"],
        emb_trainable=params["emb_trainable"],
        model_name=params["model_name"],
        hidden_activation=params["hidden_activation"],
        out_activation=params["out_activation"],
        hidden_dim=params["hidden_dims"][0],
        kernel_initializer=params["kernel_initializer"],
        kernel_regularizer=params["kernel_regularizer"],
        recurrent_regularizer=params["recurrent_regularizer"],
        input_dropout=params["input_dropout"],
        recurrent_dropout=params["recurrent_dropout"],
        rnn_unit_type=params["rnn_unit_type"],
        bidirectional=params["bidirectional"],
        attention=params["attention"],
        embs_matrix=params["embs_matrix"])
    history = train_model(rnn_model,
                          x_train,
                          y_train,
                          out_dir=params["out_dir"],
                          validation_data=(x_val, y_val),
                          save_checkpoint=params["save_checkpoint"],
                          n_epochs=params["n_epochs"],
                          batch_size=params["batch_size"],
                          verbose=params["verbose"],
                          early_stopping=params["early_stopping"],
                          learning_rate=params["learning_rate"],
                          loss=params["loss"],
                          ckpt_name_prefix=utils.get_file_name(params))
    return utils.extract_results_from_history(history)
Esempio n. 7
0
def _get_model(is_train):
    if FLAGS.model == 'rnn':
        return model.RNNModel(is_train=is_train)
    elif FLAGS.model == 'sprnn':
        return model.SPRNNModel(is_train=is_train)
    else:
        raise AttributeError("Model unimplemented: " + FLAGS.model)
Esempio n. 8
0
def main():
    data_loader = DataLoader(args)
    if (args.process_data):
        data_loader.process_data()
        return
    torch.cuda.set_device(args.gpu_device)

    data_loader.load()
    if (args.model == "rnn"):
        myModel = model.RNNModel(args, data_loader.vocab_size, 8,
                                 data_loader.id_2_vec).cuda()
    elif (args.model == "cnn"):
        myModel = model.CNNModel(args, data_loader.vocab_size, 8,
                                 data_loader.id_2_vec).cuda()
    elif (args.model == "baseline"):
        myModel = model.Baseline(args, data_loader.vocab_size, 8,
                                 data_loader.id_2_vec).cuda()
    else:
        print("invalid model type")
        exit(1)

    if (args.test_only):
        test(myModel, data_loader, args)
    else:
        train(myModel, data_loader, args)
Esempio n. 9
0
def main_func(datasets, context_len, epochs):
    for dataset in datasets:
        for bptt in context_len:
            train_d, valid_d, test_d, data = data_generator(dataset)
            train_data = batchify(train_d, bptt)
            val_data = batchify(valid_d, bptt)
            test_data = batchify(test_d, bptt)

            ntokens = len(set(data))
            best_val_loss = None
            lr = args.lr
            if args.model == 'Transformer':
                model = model1.TransformerModel(ntokens, args.emsize,
                                                args.nhead, args.nhid,
                                                args.nlayers,
                                                args.dropout).to(device)
            else:
                model = model1.RNNModel(args.model, ntokens, args.emsize,
                                        args.nhid, args.nlayers, args.dropout,
                                        args.tied).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), args.lr)

            for epoch in range(1, epochs + 1):
                epoch_start_time = time.time()
                train(train_data, bptt, ntokens, model, criterion, optimizer,
                      epoch)
                val_loss = evaluate(val_data, bptt, ntokens, model, criterion)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)
                # Save the model if the validation loss is the best we've seen so far.
                if not best_val_loss or val_loss < best_val_loss:
                    with open(args.save, 'wb') as f:
                        torch.save(model, f)
                    best_val_loss = val_loss
                else:
                    lr /= 0.4

            # Load the best saved model.
            with open(args.save, 'rb') as f:
                model = torch.load(f)
                if args.model in ['RNN_TANH', 'RNN_RELU', 'LSTM', 'GRU']:
                    model.rnn.flatten_parameters()
            # Run on test data.
            test_loss = evaluate(test_data, bptt, ntokens, model, criterion)
            print('=' * 89)
            print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.
                  format(test_loss, math.exp(test_loss)))
            print('=' * 89)
            fr.writelines("test loss for len %d and dataset %s is %f\n" %
                          (bptt, dataset, test_loss))
            #fr.close()
    return
def evaluate_rnn(params):
    x_data, y_data = utils.load_data(params["eval_x_data"],
                                     params["eval_y_data"])
    rnn_model = model.RNNModel(
        max_seq_length=x_data.shape[1],
        input_size=params["input_size"],
        output_size=params["output_size"],
        embed_dim=params["embed_dim"],
        emb_trainable=params["emb_trainable"],
        model_name=params["model_name"],
        hidden_activation=params["hidden_activation"],
        out_activation=params["out_activation"],
        hidden_dim=params["hidden_dims"][0],
        kernel_initializer=params["kernel_initializer"],
        kernel_regularizer=params["kernel_regularizer"],
        recurrent_regularizer=params["recurrent_regularizer"],
        input_dropout=0.0,
        recurrent_dropout=0.0,
        rnn_unit_type=params["rnn_unit_type"],
        bidirectional=params["bidirectional"],
        attention=params["attention"],
        embs_matrix=params["embs_matrix"])
    utils.load_model(params["eval_weights_ckpt"], rnn_model,
                     params["learning_rate"])
    print("Model from checkpoint %s was loaded." % params["eval_weights_ckpt"])
    # if y_data is not None:
    #     metrics_names, scores = rnn_model.evaluate(x_data, y_data, batch_size=params["batch_size"], verbose=params["verbose"])
    #     loss = scores[0]
    #     print("Evaluation loss: %.3f"%loss)
    sample_idxs = np.random.randint(x_data.shape[0],
                                    size=params["attn_sample_size"])
    x_data_sample = x_data[sample_idxs, :]
    cPickle.dump(
        sample_idxs,
        open(os.path.join(params["eval_res_folder"], "sample_idxs.pickle"),
             "wb"))
    if params["attention"]:
        attention_weights = get_attention_weights(rnn_model, x_data_sample)
        print("Attention weights shape: ", attention_weights.shape)
        cPickle.dump(
            attention_weights,
            open(
                os.path.join(params["eval_res_folder"], "attn_weights.pickle"),
                "wb"))
        vocab = cPickle.load(open(params["vocab_file"]))
        if params["plot_attn"]:
            inverse_vocab = {value: key for key, value in vocab.items()}
            utils.plot_attention_weights(x_data_sample,
                                         attention_weights,
                                         inverse_vocab,
                                         params["eval_res_folder"],
                                         ids=sample_idxs)

    predictions = rnn_model.predict(x_data,
                                    batch_size=params["batch_size"],
                                    verbose=params["verbose"])
    utils.save_predictions(predictions, params["eval_res_folder"],
                           rnn_model.model_name + "_predictions.txt")
def save_torch_ptb_lm():
    RNNModel = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                              args.nlayers, args.dropout, args.tied)
    x = np.arange(80 * 5 * 32 / 20).reshape(20, 32).astype('int64')
    init_hidden = np.zeros((20, 32, 200), dtype='float32')
    init_hidden = torch.tensor(init_hidden)
    init_cell = np.zeros((20, 32, 200), dtype='float32')
    pre_cell = torch.tensor(init_cell)
    x = torch.LongTensor(x)

    RNNModel = torch.jit.trace(RNNModel, [x, init_hidden, pre_cell])
    RNNModel.save(torch_model_dir + "RNNModel.pth")
Esempio n. 12
0
def train_lstm():
    ###########################################################################
    # Build the model
    ###########################################################################
    train_data, val_data = read_data(args.data_dir)
    model_instance = model.RNNModel(args.model, args.nclasses,
                                    train_data[0].size(-1), args.nhid,
                                    args.nlayers, args.dropout).cuda()

    if args.multilabel:
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = nn.CrossEntropyLoss()

    # Loop over epochs.
    lr = args.lr
    best_val_acc = None

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(1, args.epochs + 1):
            epoch_start_time = time.time()
            train(lr, model_instance, criterion, train_data)
            val_acc, final_res, _, _, _ = evaluate(model_instance, val_data)
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s | '
                  'valid acc {:5.4f} | '.format(
                      epoch, (time.time() - epoch_start_time), val_acc))
            print('-' * 89)
            # Save the model if the validation loss is the best we've seen so
            # far.
            if not best_val_acc or val_acc > best_val_acc:
                with open(args.save, 'wb') as f:
                    torch.save(model_instance, f)
                best_val_acc = val_acc
            else:
                # Anneal the learning rate if no improvement has been
                # seen in the validation dataset.
                lr /= 4.0
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
    # Run final test, and store the predictions
    _, _, idx, _, preds = evaluate(model_instance, val_data)
    preds_to_store = []
    for idx_i, pred_i in zip(idx.tolist(), preds.tolist()):
        preds_to_store.append((idx_i, pred_i))
    with open(osp.join(osp.dirname(args.save), 'results_probs_lstm.pkl'),
              'wb') as f:
        pkl.dump(preds_to_store, f, protocol=2)
    return final_res
Esempio n. 13
0
    def initialize(self):

        cuda = torch.cuda.is_available()

        self.model_test = model.RNNModel(self.model_nn, self.data_size,
                                         self.emsize, self.nhid, self.nlayers,
                                         self.dropout, self.tied)

        if cuda:
            self.model_test.cuda()

        with open(self.load_model, 'rb') as f:
            self.sd = torch.load(f)

        try:
            self.model_test.load_state_dict(self.sd)
            print('Model loaded state dict')
        except:
            apply_weight_norm(self.model_test.rnn)
            self.model_test.load_state_dict(sd)
            remove_weight_norm(self.model_test)

        # Get the neuron and polarity
        self.neuron, self.polarity = get_neuron_and_polarity(
            self.sd, self.neuron)
        self.neuron = self.neuron if self.visualize or self.overwrite is not None else None
        self.mask = self.overwrite is not None

        # model_test train ?
        self.model_test.eval()

        # Computing

        self.hidden = self.model_test.rnn.init_hidden(1)
        self.input = Variable(torch.LongTensor([int(ord('\n'))]))

        if cuda:
            self.input = self.input.cuda()

        self.input = self.input.view(1, 1).contiguous()
        model_step(self.model_test, self.input, self.neuron, self.mask,
                   self.overwrite, self.polarity)
        self.input.data.fill_(int(ord(' ')))
        out = model_step(self.model_test, self.input, self.neuron, self.mask,
                         self.overwrite, self.polarity)
        if self.neuron is not None:
            out = out[0]
        self.input.data.fill_(sample(out, self.temperature))
Esempio n. 14
0
def build_model(corpus, model_name, emsize, nhid, nlayers, dropout, dropouth,
                dropouti, dropoute, wdrop, lr, tied, resume, cuda):
    criterion = None

    ntokens = len(corpus.dictionary)
    model = model_module.RNNModel(model_name, ntokens, emsize, nhid, nlayers,
                                  dropout, dropouth, dropouti, dropoute, wdrop,
                                  tied)
    ###
    if resume:
        print('Resuming model ...')
        model, criterion, optimizer = model_load(resume)
        optimizer.param_groups[0]['lr'] = lr
        model.dropouti, model.dropouth, model.dropout, model.dropoute = dropouti, dropouth, dropout, dropoute
        if wdrop:
            from weight_drop import WeightDrop
            for rnn in model.rnns:
                if type(rnn) == WeightDrop: rnn.dropout = wdrop
                elif rnn.zoneout > 0: rnn.zoneout = wdrop
    ###
    if not criterion:
        splits = []
        if ntokens > 500000:
            # One Billion
            # This produces fairly even matrix mults for the buckets:
            # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422
            splits = [4200, 35000, 180000]
        elif ntokens > 75000:
            # WikiText-103
            splits = [2800, 20000, 76000]
        print('Using', splits)
        criterion = SplitCrossEntropyLoss(emsize, splits=splits, verbose=False)
    ###
    if cuda:
        model = model.cuda()
        criterion = criterion.cuda()
    ###
    params = list(model.parameters()) + list(criterion.parameters())
    total_params = sum(x.size()[0] *
                       x.size()[1] if len(x.size()) > 1 else x.size()[0]
                       for x in params if x.size())
    print('Args:', args)
    print('Model total parameters:', total_params)

    return model, criterion, None
Esempio n. 15
0
def main(device):
    lines, vocab_size = readData(args.data + '/train.txt', args.vsize)
    print("Vocabulary size : " + str(vocab_size))
    with open('vocab.pickle', 'wb') as handle:
        pickle.dump(vocab, handle, protocol=pickle.HIGHEST_PROTOCOL)

    rnn = model.RNNModel('LSTM', vocab_size, args.nhid, args.nhid, 2,
                         0.2).to(device)
    criterion = nn.CrossEntropyLoss()

    num_epochs = 40
    start_time = timeit.default_timer()
    num_tokens = len(lines[0].split())
    lines = ['<start> ' + line + ' <end>' for line in lines]
    text = ' '.join(lines).split()
    num_batches = len(text) / (BPTT * args.bsz)
    print("# of batches %d" % num_batches)
    for e in range(num_epochs):
        for i in range(num_batches):
            hidden = rnn.initHidden(args.bsz, device)
            input_batch, target_batch = get_batch_continuous(text, i, device)
            train(rnn, hidden, criterion, args.lr, input_batch, target_batch)

        elapsed = timeit.default_timer() - start_time
        print('##################')
        print('Epoch %d :' % e)
        print('Time elapsed : %s' % (get_readable_time(int(elapsed))))

        if args.cuda:
            loss, perp = evaluate(args.data + '/validation.txt',
                                  rnn,
                                  vocab,
                                  cuda=True)
        else:
            loss, perp = evaluate(args.data + '/validation.txt', rnn, vocab)
        print('Validation loss : %.1f' % loss)
        print('Validation perplexity : %.1f' % perp)
        rnn.eval()
        samples = generate(rnn, vocab, args.cuda)
        print('Samples : ')
        for sample in samples:
            print(sample)
        with open('model.pt', 'wb') as f:
            torch.save(rnn, f)
Esempio n. 16
0
def main():
    if args.mode == 'sentence':
        corpus = data.Corpus(args.data)
    else:
        corpus = data.CorpusCharacter(args.data)
    eval_batch_size = 10

    train_data = utils.batchify(corpus.train, args.batch_size, device)
    val_data = utils.batchify(corpus.valid, eval_batch_size, device)
    test_data = utils.batchify(corpus.test, eval_batch_size, device)
    ntokens = len(corpus.dictionary)

    _model = model.RNNModel(args.model, ntokens, args.embsize, args.nhidden,
                            args.nlayers, args.dropout).to(device)
    _criterion = nn.NLLLoss()
    _lr = args.lr
    best_val_loss = None

    try:
        for epoch in range(1, args.epochs + 1):
            epoch_start_time = time.time()
            train(_model, _criterion, train_data, ntokens, _lr, epoch)
            val_loss = evaluate(_model, _criterion, val_data, eval_batch_size)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch,
                                           (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))
            print('-' * 89)
            if not best_val_loss or val_loss < best_val_loss:
                with open(args.save, 'wb') as f:
                    torch.save(_model, f)
                    best_val_loss = val_loss
            else:
                _lr /= 4.0
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    test(_criterion, test_data, eval_batch_size)
Esempio n. 17
0
        train_label.reshape(len(train_label))).long()
    if args.cuda:
        train_data, train_label = train_data.cuda(), train_label.cuda(
        )  #和nn.Module不同,调用tensor.cuda()只是返回这个tensor对象在GPU内存上的拷贝,而不会对自身进行改变。因此必须对tensor进行重新赋值,即tensor=tensor.cuda().
        val_data, val_label = val_data.cuda(), val_label.cuda()

eval_batch_size = len(val_label)  #一次测试全部测试集数据
print(len(val_label))
'''###############################################################################
# Build the model 实例化模型
###############################################################################'''
model = model.RNNModel(rnn_type=args.model,
                       ninp=args.ninp,
                       nhid=args.nhid,
                       nlayers=args.nlayers,
                       dropout=args.dropout,
                       dropouth=args.dropouth,
                       wdrop=args.wdrop,
                       tie_weights=args.tied,
                       class_num=args.class_num)
criterion = nn.CrossEntropyLoss()
image = mate_data()

if args.resume:
    print('Resuming model ...')
    model_load(args.resume)
    optimizer.param_groups[0]['lr'] = args.lr
    model.dropouti, model.dropouth, model.dropout, args.dropoute = args.dropouti, args.dropouth, args.dropout, args.dropoute
    if args.wdrop:
        from weight_drop import WeightDrop
        for rnn in model.rnns:
Esempio n. 18
0
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.model == 'Transformer':
    model = model.TransformerModel(ntokens, args.emsize, args.nhead, args.nhid,
                                   args.nlayers, args.dropout).to(device)
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.NLLLoss()

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
Esempio n. 19
0
corpus = data.Corpus(args.data)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.dropouth,
                       args.dropouti, args.dropoute, args.wdrop, args.tied)
if args.cuda:
    model.cuda()
total_params = sum(x.size()[0] *
                   x.size()[1] if len(x.size()) > 1 else x.size()[0]
                   for x in model.parameters())
print('Args:', args)
print('Model total parameters:', total_params)

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################

eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model,
                       ntokens,
                       args.emsize,
                       args.nhid,
                       args.nlayers,
                       dropout=args.dropout,
                       tie_weights=args.tied)

if torch.cuda.device_count() > 1:
    LOGGER.info("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model = nn.DataParallel(model)

if args.cuda:
    model.cuda()

criterion = nn.CrossEntropyLoss()

###############################################################################
Esempio n. 21
0
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.continue_train:
    model = torch.load(os.path.join(args.save, 'model.pt'))
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.nhidlast, args.nlayers, args.dropout,
                           args.dropouth, args.dropouti, args.dropoute,
                           args.wdrop, args.tied, args.dropoutl,
                           args.n_experts, args.num4embed, args.num4first,
                           args.num4second)

if args.cuda:
    if args.single_gpu:
        parallel_model = model.cuda()
    else:
        parallel_model = nn.DataParallel(model, dim=1).cuda()
else:
    parallel_model = model

total_params = sum(x.data.nelement() for x in model.parameters())
logging('Args: {}'.format(args))
logging('Model total parameters: {}'.format(total_params))
Esempio n. 22
0
eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.continue_train:
    model = torch.load(os.path.join(args.save, 'model.pt'))
else:
    model = model.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast, args.nlayers,
                       0., 0., 0., 0., 0., 0., args.n_experts, args.emblocks, args.emdensity,
                       sparse_mode=args.sparse_mode,
                       sparse_fract=args.sparse_fract)

if args.cuda:
    if not args.multi_gpu:
        parallel_model = model.cuda()
    else:
        parallel_model = nn.DataParallel(model, dim=1).cuda()
else:
    parallel_model = model

logging('Args: {}'.format(args))

params_total, params_encoder, params_rnns = 0, 0, 0
for n, p in model.named_parameters():
    #print('param {}: {}'.format(n, p.nelement()))
Esempio n. 23
0
              num_operations=args.num_operations,
              intermediate_nodes=args.num_intermediate_nodes,
              args=args,
              genos_init=args.uniform_genos_init)

# swarm = Swarm(args, population_size=args.population_size)

# initial genotype
genotype = swarm.global_best.genotype()

# initializing the model
if args.use_pretrained:
    logger.info('PRETRAINED MODEL LOADED!')
    model = torch.load(os.path.join(args.pretrained_dir, 'model.pt'))
else:
    model = model_module.RNNModel(ntokens, args, genotype=genotype)

size = 0
for p in model.parameters():
    size += p.nelement()
logger.info('param size: {}'.format(size))
logger.info('initial genotype:')
logger.info(model.genotype())

if args.cuda:
    if args.single_gpu:
        parallel_model = model.cuda()
    else:
        parallel_model = nn.DataParallel(model, dim=1).cuda()
else:
    parallel_model = model
Esempio n. 24
0

eval_batch_size = args.batch_size
train_data = batchify(corpus.train, args.batch_size)
print(train_data.size())
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

T = (train_data.size(0) // args.bptt) * args.epochs
###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.tied,
                       args.bidirectional)

# Load checkpoint
if args.checkpoint != '':
    if args.cuda:
        model = torch.load(args.checkpoint)
    else:
        # Load GPU model on CPU
        model = torch.load(args.checkpoint,
                           map_location=lambda storage, loc: storage)

if args.finetune:
    assert args.pretrained, "you must specify a pre-trained model"

    with open(args.pretrained, 'rb') as f:
Esempio n. 25
0
################################################3

criterion = None

ntokens = len(corpus.dictionary)  # 10000

# pre_emb,_= tools.load_fasttext_embd(args.emb_path, corpus, words_to_load=100000, reload=False)
if args.wvec:
    model = model.RNNModel(
        args.model,
        ntokens,
        args.emsize,
        args.nhid,
        args.chunk_size,
        args.nlayers,
        args.dropout,
        args.dropouth,
        args.dropouti,
        args.dropoute,
        args.wdrop,
        args.tied,
        pre_emb=pre_emb,
    )
else:
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                           args.chunk_size, args.nlayers, args.dropout,
                           args.dropouth, args.dropouti, args.dropoute,
                           args.wdrop, args.tied)
###
if args.resume:
    tools.print_log(args.save, 'Resuming model ...')
Esempio n. 26
0
if not args.test and not args.interact:
    if args.load_checkpoint:
        # Load the best saved model.
        print('  Continuing training from previous checkpoint')
        with open(args.model_file, 'rb') as f:
            if args.cuda:
                model = torch.load(f).to(device)
            else:
                model = torch.load(f, map_location='cpu')
    else:
        ntokens = len(corpus.dictionary)
        model = model.RNNModel(
            args.model,
            ntokens,
            args.emsize,
            args.nhid,
            args.nlayers,
            embedding_file=args.embedding_file,
            dropout=args.dropout,
            tie_weights=args.tied,
            freeze_embedding=args.freeze_embedding).to(device)

    if args.cuda and (not args.single) and (torch.cuda.device_count() > 1):
        # If applicable, use multi-gpu for training
        # Scatters minibatches (in dim=1) across available GPUs
        model = nn.DataParallel(model, dim=1)
    if isinstance(model, torch.nn.DataParallel):
        # if multi-gpu, access real model for training
        model = model.module
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    model.rnn.flatten_parameters()
Esempio n. 27
0
eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

# # load glove embeddings to tensor
# glove_dict = load_glove_to_dict(args.glove_path, args.emsize)
# glove_tensor = glove_dict_to_tensor(corpus.dictionary.word2idx, glove_dict)

# Build the model
ntokens = len(corpus.dictionary)
# model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.glove, glove_tensor, args.dropout, args.tied)
#改动3
# model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.glove, args.dropout, args.tied)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.tied,
                       args.glove)  #glove_tensor
if args.cuda:
    model.cuda()

criterion = nn.CrossEntropyLoss()

# Loop over epochs.
lr = args.lr
best_val_loss = None

# At any point you can hit Ctrl + C to break out of training early.
try:
    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train()
Esempio n. 28
0
    if args.cuda:
        data = data.cuda()
    return data


eval_batch_size = 10
train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.init)
if args.cuda:
    model.cuda()

criterion = nn.CrossEntropyLoss()

###############################################################################
# Training code
###############################################################################


def clip_gradient(model, clip):
    """Computes a gradient clipping coefficient based on gradient norm."""
    totalnorm = 0
    for p in model.parameters():
        modulenorm = p.grad.data.norm()
Esempio n. 29
0
    print("load test data - complete")

    cutoffs = args.cutoffs + [ntokens]


# with doing('Constructing model'):
    # if not args.lm1b:
    #     criterion = AdaptiveLoss(cutoffs)
    # else:
    #     criterion = SplitCrossEntropyLoss(args.emsize, args.cutoffs, verbose=False)
    #     criterion.cuda()
logging.info("Constructing model")
criterion = AdaptiveLoss(cutoffs).cuda()
if args.old is None:
    logging.info("building model")
    model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, cutoffs, args.proj, args.dropout, args.tied,
                           args.lm1b)
else:
    with open(args.old, 'rb') as model_file:
        model = torch.load(model_file)
if args.cuda:
    model.cuda()

optimizer = optim.Adagrad(model.parameters(), args.lr, weight_decay=1e-6)
eval_batch_size = 1


###############################################################################
# Training code
###############################################################################

# Loop over epochs.
Esempio n. 30
0
device = torch.device("cuda" if args.cuda else "cpu")
train_data = batchify(corpus.train, args.batch_size, device)
val_data = batchify(corpus.valid, eval_batch_size, device)
test_data = batchify(corpus.test, eval_batch_size, device)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
if args.load is None:
    model = model.RNNModel(args.model,
                           ntokens,
                           args.emsize,
                           args.nhid,
                           args.nlayers,
                           args.dropout,
                           args.tied,
                           corpus=corpus,
                           embeddings=embeddings)
else:
    with open(args.load, 'rb') as f:
        model = torch.load(f)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       lr=args.lr)

###############################################################################