Esempio n. 1
0
def train():
    # Turn on training mode which enables dropout.
    print(f'starting training for epoch {epoch}')
    if args.model == 'QRNN': model.reset()
    global STEP
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch = 0
    for tr_data_shard in corpus.iterate_train_shards():
        print('opening new data shard!')
        train_data = batchify(tr_data_shard, args.batch_size, args)
        i = 0
        while i < train_data.size(0) - 1 - 1:
            bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
            # Prevent excessively small or negative sequence lengths
            seq_len = max(5, int(np.random.normal(bptt, 5)))
            # There's a very small chance that it could select a very long sequence length resulting in OOM
            # seq_len = min(seq_len, args.bptt + 10)

            lr2 = optimizer.param_groups[0]['lr']
            optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
            model.train()
            data, targets = get_batch(train_data, i, args, seq_len=seq_len)

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            hidden = repackage_hidden(hidden)
            optimizer.zero_grad()

            output, hidden, rnn_hs, dropped_rnn_hs = model(data,
                                                           hidden,
                                                           return_h=True)
            raw_loss = criterion(model.decoder.weight, model.decoder.bias,
                                 output, targets)

            loss = raw_loss
            # Activiation Regularization
            if args.alpha:
                loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                                  for dropped_rnn_h in dropped_rnn_hs[-1:])
            # Temporal Activation Regularization (slowness)
            if args.beta:
                loss = loss + sum(args.beta *
                                  (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                                  for rnn_h in rnn_hs[-1:])
            loss.backward()
            STEP += 1
            writer.add_scalar('Loss/train-batch', raw_loss.data, STEP)

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
            optimizer.step()

            total_loss += raw_loss.data
            optimizer.param_groups[0]['lr'] = lr2
            if batch % args.log_interval == 0 and batch > 0:
                cur_loss = total_loss.item() / args.log_interval
                elapsed = time.time() - start_time
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                        epoch, batch,
                        len(train_data) // args.bptt,
                        optimizer.param_groups[0]['lr'],
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss), cur_loss / math.log(2)))
                writer.add_scalar('Loss/train', cur_loss, STEP)
                writer.add_scalar('BPC/train', curr_loss / math.log(2), STEP)
                total_loss = 0
                start_time = time.time()
            ###
            batch += 1
            i += seq_len
Esempio n. 2
0
    parser = argparse.ArgumentParser(description='LSTM language model')

    parser.add_argument('--test', type=str, help='test data')
    parser.add_argument('--model', type=str, required=True)
    parser.add_argument('--vocab', type=str, required=True)

    args = parser.parse_args()

    # load dictionary
    word2idx = utils.read_json(args.vocab)
    # load data
    test = utils.process_valid_data(args.test, word2idx)
    # load model #
    params = torch.load(args.model)

    test_batches = utils.batchify(test, params["sequence_length"],
                                  params["batch_size"], word2idx)

    model = lstm_lm.LSTMLM(params)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    loss_function = nn.NLLLoss()
    load_model(params, model, optimizer)

    if params["use_gpu"]:
        model = model.cuda()

    model.eval()  # change state to evaluation mode
    print(
        "Test perplexity: ",
        utils.evaluate(model, loss_function, test_batches, params["use_gpu"]) /
        len(test_batches))
Esempio n. 3
0
            decoder_args=dec_args)

conf.experiment_name = experiment_name
conf.save(os.path.join(train_dir, 'configuration'))

reset_tf_graph()
ae = PointNetAutoEncoder(conf.experiment_name, conf)

#buf_size = 1 # flush each line
#fout = open(os.path.join(conf.train_dir, 'train_stats.txt'), 'a', buf_size)
#train_stats = ae.train(pcd_dataset, conf, log_file=fout)
#fout.close()

ae.restore_model('data/shapenet_1024_ae_128', 90, True)

print("Transforming Training data")
X_train_trans = []
for x_b in batchify(X_train, 100):
    X_train_trans.append(ae.transform(x_b))
X_train_trans = np.concatenate(X_train_trans)

print("Transforming test data")
X_test_trans = []
for x_b in batchify(X_test, 100):
    X_test_trans.append(ae.transform(x_b))
X_test_trans = np.concatenate(X_test_trans)

print("Fitting svm")
svm = LinearSVC()
svm.fit(X_train_trans, y_train[:len(X_train_trans)])
print(svm.score(X_test_trans, y_test[:len(X_test_trans)]))
Esempio n. 4
0
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    else:
        torch.cuda.set_device(0)
        torch.cuda.manual_seed(args.seed)

###############################################################################
# Load data
###############################################################################

corpus = data_ori_type.Corpus(args.data)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

train_data_type = batchify(corpus.train_type, args.batch_size, args)
val_data_type = batchify(corpus.valid_type, eval_batch_size, args)
test_data_type = batchify(corpus.test_type, test_batch_size, args)


corpus2 = data.Corpus(args.data_type)


train_data2 = batchify(corpus2.train, args.batch_size, args)
val_data2 = batchify(corpus2.valid, eval_batch_size, args)
test_data2 = batchify(corpus2.test, test_batch_size, args)
Esempio n. 5
0
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.set_device(args.gpu)
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.cuda.manual_seed_all(args.seed)

corpus = data.Corpus(args.data)
test_batch_size = 1
test_data = batchify(corpus.test, test_batch_size, args)


def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        print(i, data_source.size(0) - 1)
        data, targets = get_batch(data_source, i, args, evaluation=True)
        targets = targets.view(-1)

        log_prob, hidden = parallel_model(data, hidden)
        loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)),
Esempio n. 6
0
def worker(gpu, ngpus_per_node, config_in):
    # init
    config = copy.deepcopy(config_in)
    args = config
    jobid = os.environ["SLURM_JOBID"]
    procid = int(os.environ["SLURM_PROCID"])
    config.gpu = gpu

    if config.gpu is not None:
        writer_name = "tb.{}-{:d}-{:d}".format(jobid, procid, gpu)
        logger_name = "{}.{}-{:d}-{:d}.search.log".format(config.name, jobid, procid, gpu)
        model_name = "{}-{:d}-{:d}-model.pt".format(jobid, procid, gpu)
        optimizer_name = "{}-{:d}-{:d}-optimizer.pt".format(jobid, procid, gpu)
        msic_name = "{}-{:d}-{:d}-misc.pt".format(jobid, procid, gpu)
        ck_name = "{}-{:d}-{:d}".format(jobid, procid, gpu)
    else:
        writer_name = "tb.{}-{:d}-all".format(jobid, procid)
        logger_name = "{}.{}-{:d}-all.search.log".format(config.name, jobid, procid)
        model_name = "{}-{:d}-all-model.pt".format(jobid, procid)
        optimizer_name = "{}-{:d}-all-optimizer.pt".format(jobid, procid)
        msic_name = "{}-{:d}-all-misc.pt".format(jobid, procid)
        ck_name = "{}-{:d}-all".format(jobid, procid)

    writer = SummaryWriter(log_dir=os.path.join(config.path, writer_name))
    # writer.add_text('config', config.as_markdown(), 0)
    logger = get_logger(os.path.join(config.path, logger_name))

    # get cuda device
    device = torch.device('cuda', gpu)

    # ==============================  begin  ==============================
    logger.info("Logger is set - training start")
    logger.info('Args: {}'.format(args))

    if config.dist_url == "env://" and config.rank == -1:
        config.rank = int(os.environ["RANK"])

    if config.mp_dist:
        # For multiprocessing distributed training, rank needs to be the
        # global rank among all the processes
        config.rank = config.rank * ngpus_per_node + gpu
    # print('back:{}, dist_url:{}, world_size:{}, rank:{}'.format(config.dist_backend, config.dist_url, config.world_size, config.rank))
    dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url,
                            world_size=config.world_size, rank=config.rank)

    # get data
    corpus = data.Corpus(args.data)

    eval_batch_size = 10
    test_batch_size = 1

    train_data = batchify(corpus.train, args.batch_size, args)
    search_data = batchify(corpus.valid, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)
    test_data = batchify(corpus.test, test_batch_size, args)

    # split data ( with respect to GPU_id)
    def split_set(set_in):
        per_set_length = set_in.size(0) // config.world_size
        set_out = set_in[per_set_length*config.rank + 0: per_set_length*config.rank + per_set_length]
        return set_out
    train_data = split_set(train_data).to(device)
    search_data = split_set(search_data).to(device)
    val_data = split_set(val_data).to(device)
    test_data = split_set(test_data).to(device)

    if config.dist_privacy:
        logger.info("PRIVACY ENGINE ON")

    # build model
    ntokens = len(corpus.dictionary)
    if args.continue_train:
        model = torch.load(os.path.join(args.save, model_name))
    else:
        genotype = eval("genotypes.%s" % args.arch)
        model = model_aug.RNNModel(ntokens, args.emsize, args.nhid, args.nhidlast,
                        args.dropout, args.dropouth, args.dropoutx, args.dropouti, args.dropoute,
                        cell_cls=model_aug.DARTSCell, genotype=genotype)
    # make model distributed
    if config.gpu is not None:
        torch.cuda.set_device(config.gpu)
        # model = model.to(device)
        model.cuda(config.gpu)
        # When using a single GPU per process and per DistributedDataParallel, we need to divide
        # the batch size ourselves based on the total number of GPUs we have
        # config.batch_size = int(config.batch_size / ngpus_per_node)
        config.workers = int((config.workers + ngpus_per_node - 1) / ngpus_per_node)
        # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.rank])
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu])
        # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=None, output_device=None)
    else:
        model.cuda()
        # DistributedDataParallel will divide and allocate batch_size to all
        # available GPUs if device_ids are not set
        model = torch.nn.parallel.DistributedDataParallel(model)

    total_params = sum(x.data.nelement() for x in model.parameters())
    logger.info('Model total parameters: {}'.format(total_params))
    logger.info('Genotype: {}'.format(genotype))


    # Loop over epochs.
    lr = args.lr
    best_val_loss = []
    stored_loss = 100000000

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        if args.continue_train:
            optimizer_state = torch.load(os.path.join(args.save, optimizer_name))
            if 't0' in optimizer_state['param_groups'][0]:
                optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
            else:
                optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay)
            optimizer.load_state_dict(optimizer_state)
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay)

        epoch = 1
        while epoch < args.epochs + 1:
            epoch_start_time = time.time()
            try:
                # train()
                train(model, epoch, corpus, train_data, search_data, optimizer,
                    device, logger, writer, args)
            except:
                logger.info('rolling back to the previous best model ...')
                model = torch.load(os.path.join(args.save, model_name))
                model = model.cuda()

                optimizer_state = torch.load(os.path.join(args.save, optimizer_name))
                if 't0' in optimizer_state['param_groups'][0]:
                    optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
                else:
                    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay)
                optimizer.load_state_dict(optimizer_state)

                epoch = torch.load(os.path.join(args.save, msic_name))['epoch']
                continue

            if 't0' in optimizer.param_groups[0]:
                tmp = {}
                for prm in model.parameters():
                    tmp[prm] = prm.data.clone()
                    prm.data = optimizer.state[prm]['ax'].clone()

                val_loss2 = evaluate(model, corpus, args, val_data)
                logger.info('-' * 89)
                logger.info('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                        'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                                val_loss2, math.exp(val_loss2)))
                logger.info('-' * 89)

                if val_loss2 < stored_loss:
                    save_checkpoint(model, optimizer, epoch, args.save, dist_name=ck_name)
                    logger.info('Saving Averaged!')
                    stored_loss = val_loss2

                for prm in model.parameters():
                    prm.data = tmp[prm].clone()

            else:
                val_loss = evaluate(model, corpus, args, val_data, eval_batch_size)
                logger.info('-' * 89)
                logger.info('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                        'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                                val_loss, math.exp(val_loss)))
                logger.info('-' * 89)

                if val_loss < stored_loss:
                    save_checkpoint(model, optimizer, epoch, args.save, dist_name=ck_name)
                    logger.info('Saving Normal!')
                    stored_loss = val_loss

                if 't0' not in optimizer.param_groups[0] and (len(best_val_loss)>args.nonmono and val_loss > min(best_val_loss[:-args.nonmono])):
                    logger.info('Switching!')
                    optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
                best_val_loss.append(val_loss)

            epoch += 1

    except KeyboardInterrupt:
        logger.info('-' * 89)
        logger.info('Exiting from training early')

    # Load the best saved model.
    model = torch.load(os.path.join(args.save, model_name))
    model = model.cuda()

    test_loss = evaluate(model, corpus, args, test_data, test_batch_size)
    logger.info('=' * 89)
    logger.info('| End of training & Testing | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    logger.info('=' * 89)
Esempio n. 7
0

import os
import hashlib

fn = 'corpus'
if os.path.exists(fn):
    print('Loading cached dataset...')
    corpus = torch.load(fn)
else:
    print('Producing dataset...')
    corpus = data.Corpus(args.data)
    torch.save(corpus, fn)
eval_batch_size = 10
test_batch_size = 1
train_data, train_rps = batchify(corpus.train, corpus.train_rps,
                                 args.batch_size, args)
val_data, val_rps = batchify(corpus.valid, corpus.valid_rps, eval_batch_size,
                             args)
test_data, test_rps = batchify(corpus.test, corpus.test_rps, test_batch_size,
                               args)
print('Args:', args)


def evaluate(data_source, rps, batch_size=10):
    # Turn on evaluation mode which disables dropout.

    criterion = torch.nn.CrossEntropyLoss()
    ntokens = len(corpus.dictionary)
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
Esempio n. 8
0
 def _get_online_predictions(self, lines: List[str], types: List[str] = None) -> List[int]:
     """retrieves predictions by triggering google cloud function, which
     invokes ml-engine to make a prediction for each line.
     """
     contexts = self._get_line_context(lines, n=CONTEXT_N_LINES)
     instances = []
     for i, line in enumerate(lines):
         context = contexts[i]
         if MAX_LENGTH > 0:
             if len(line) > MAX_LENGTH:
                 line = line[:MAX_LENGTH]
                 context = ''
             elif (len(line) + len(context)) > MAX_LENGTH:
                 context = context[:MAX_LENGTH-len(line)]
             assert (len(line) + len(context)) <= MAX_LENGTH
         instances.append({'inputs': line, 'context': context})
     if self.verbosity > 1:
         raw_instances = instances.copy()
     if LABEL_SPEECHES_ONLY:
         assert types is not None, '`types` must be provided when LABEL_SPEECHES_ONLY == True.'
         assert len(types) == len(lines), f'types must have same length as lines, but {len(types)} != {len(lines)}.'
         speeches = []
         speeches_idx = []
         for i, instance in enumerate(instances):
             if types[i] == 'speech':
                 speeches.append(instance)
                 speeches_idx.append(i)
         instances = speeches
     if self.verbosity > 0:
         print(f'Making "speaker span" predictions for {len(instances)} lines...')
     problem_class = PROBLEM_CLASSES[PROBLEM]
     problem = problem_class()
     encoders = problem.feature_encoders(data_dir=DATA_DIR)
     instances_b64 = []
     for instance in instances:
         if 'targets' not in instance:
             instance['targets'] = ''
         encoded_instance = problem.encode_example(instance, encoders)
         # encoded_sample.pop('targets')
         # encoded_sample.pop('context')
         serialized_instance = to_example(encoded_instance).SerializeToString()
         instances_b64.append({"b64": base64.b64encode(serialized_instance).decode('utf-8')})
     instances = instances_b64
     preds = []
     batch_generator = batchify(instances, BATCH_SIZE)
     if self.verbosity > 0:
         batch_generator = tqdm(batch_generator, total=np.ceil(len(instances)/BATCH_SIZE).astype(int))
     for batch in batch_generator:
         try:
             # print([len(inst['inputs']) + len(inst['context']) for inst in raw_instances[len(preds):len(preds)+BATCH_SIZE]])
             if LOCAL:
                 res = requests.post(LOCAL_URL, data=json.dumps({"instances": batch}),
                     headers={"Content-Type": "application/json"})
             else:
                 res = self._get_cloud_predictions(project=PROJECT, model=MODEL, instances=batch, version=VERSION)
             assert res.ok, f'request failed. Reason: {res.reason}.'
             predictions = json.loads(res.content)
             predictions = predictions['predictions']
             for i, pred in enumerate(predictions):
                 pred_out = pred['outputs']
                 pred_out = encoders['targets'].decode(pred_out)
                 # removes spaces.
                 pred_out = re.sub(r'\s+', '', pred_out)
                 try:
                     eos_idx = pred_out.lower().index('<eos>')
                     pred_out = pred_out[:eos_idx]
                 except ValueError:
                     if self.verbosity > 1:
                         logging.warn(f'<eos> not found in prediction: {pred_out}')
                 preds.append(pred_out)
                 # preds.append([token_pred[0][0] for token_pred in pred['outputs']])
         except AssertionError as e:
             print(e)
             for i in range(len(batch)):
                 preds.append(None)
     if LABEL_SPEECHES_ONLY:
         preds_all_lines = []
         for i, line in enumerate(lines):
             pred = 'O' * len(line)
             preds_all_lines.append(pred)
         n_preds = 0
         assert len(speeches_idx) == len(preds)
         for i, idx in enumerate(speeches_idx):
             preds_all_lines[idx] = preds[i]
             n_preds += 1
         # sanity check.
         assert n_preds == len(preds)
         preds = preds_all_lines
     if self.verbosity > 1:
         for i, pred in enumerate(preds):
             instance = raw_instances[i]
             if 'targets' in instance:
                 instance.pop('targets')
             if 'label' in instance:
                 instance.pop('label')
             print(f'INPUT (len={len(instance["inputs"])}): {instance}\nOUTPUT (len={len(pred) if pred is not None else None}): {pred}')
     return preds
Esempio n. 9
0
File: main.py Progetto: sacmehta/PRU
def trainEvalLM(args):
    fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
    if os.path.exists(fn):
        print('Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        print('Producing dataset...')
        corpus = data.Corpus(args.data)
        torch.save(corpus, fn)

    if torch.cuda.is_available():
        args.cuda = True

    ntokens = len(corpus.dictionary)
    eval_batch_size = 10
    train_data = batchify(corpus.train, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)

    # Build the model and loss function
    model = lmModel.RNNModel(args.model,
                             ntokens,
                             args.emsize,
                             args.nhid,
                             args.nlayers,
                             args.dropout,
                             args.tied,
                             g=args.g,
                             k=args.k)
    criterion = nn.CrossEntropyLoss()
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()

    #compute network parameters
    params = list(model.parameters())
    total_params = np.sum([np.prod(p.size()) for p in params])
    print(
        '\033[1;32;40mTotal parameters (in million):\033[0m\033[1;31;40m {:0.2f} \033[0m\n'
        .format(total_params / 1e6, 2))

    optimizer = torch.optim.SGD(params, lr=args.lr, weight_decay=args.wdecay)
    start_epoch = 1
    if args.resume:
        print('Resuming model ...')
        model, criterion, optimizer, start_epoch = model_load(args.resume)
        optimizer.param_groups[0]['lr'] = args.lr
        model.dropout = args.dropout

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        #Create folder for saving model and log files
        args.saveDir += '_' + args.model
        # =====================
        if not os.path.isdir(args.saveDir):
            os.mkdir(args.saveDir)

        save_str = 'nl_' + str(args.nlayers) + '_nh_' + str(
            args.nhid) + '_g_' + str(args.g) + '_k_' + str(args.k)
        args.save = args.saveDir + '/model_' + save_str + '.pt'

        logFileLoc = args.saveDir + '/logs_' + save_str + '.txt'
        logger = open(logFileLoc, 'w')
        logger.write(str(args))
        logger.write('\n Total parameters (in million): {:0.2f}'.format(
            total_params / 1e6, 2))
        logger.write('\n\n')
        logger.write(
            "\n%s\t%s\t%s\t%s\t%s" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'ppl (tr)', 'ppl (val)'))
        logger.flush()

        best_val_loss = []
        stored_loss = 100000000
        # Loop over epochs.
        for epoch in range(start_epoch, args.epochs + 1):
            epoch_start_time = time.time()
            train_loss = train(args, model, criterion, optimizer, epoch,
                               train_data, ntokens)

            ### TRAIN WITH ASGD
            if 't0' in optimizer.param_groups[0]:
                tmp = {}
                for prm in model.parameters():
                    tmp[prm] = prm.data.clone()
                    prm.data = optimizer.state[prm]['ax'].clone()

                val_loss = evaluate(args, model, criterion, val_data, ntokens,
                                    eval_batch_size)

                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)

                logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f" %
                             (epoch, train_loss, val_loss,
                              math.exp(train_loss), math.exp(val_loss)))
                logger.flush()

                if val_loss < stored_loss:
                    model_save(args.save, model, criterion, optimizer, epoch)
                    print('Saving Averaged (new best validation)')
                    stored_loss = val_loss

                for prm in model.parameters():
                    prm.data = tmp[prm].clone()

            else:
                val_loss = evaluate(args, model, criterion, val_data, ntokens,
                                    eval_batch_size)

                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss)))
                print('-' * 89)

                logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f" %
                             (epoch, train_loss, val_loss,
                              math.exp(train_loss), math.exp(val_loss)))
                logger.flush()

                if val_loss < stored_loss:
                    model_save(args.save, model, criterion, optimizer, epoch)
                    print('Saving model (new best validation)')
                    stored_loss = val_loss

                if 't0' not in optimizer.param_groups[0] and (
                        len(best_val_loss) > args.nonmono
                        and val_loss > min(best_val_loss[:-args.nonmono])):
                    print('Switching to ASGD')
                    optimizer = torch.optim.ASGD(model.parameters(),
                                                 lr=args.lr,
                                                 t0=0,
                                                 lambd=0.,
                                                 weight_decay=args.wdecay)
                best_val_loss.append(val_loss)
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
Esempio n. 10
0
def get_df(text):
    fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
    if args.philly:
        fn = os.path.join(os.environ['PT_OUTPUT_DIR'], fn)
    if os.path.exists(fn):
        print('Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        print('Producing dataset...')
        corpus = data.Corpus(data_path, mode=mode)
        torch.save(corpus, fn)

    ntokens = len(corpus.dictionary)

    #initialize the model
    model = RNNModel(args.model, ntokens, args.emsize, args.nhid,
                     args.chunk_size, args.nlayers, args.dropout,
                     args.dropouth, args.dropouti, args.dropoute, args.wdrop,
                     args.tied)

    with open(model_path, "rb") as f:
        model, criterion, optimizer = torch.load(f)

    #prepare data
    eval_batch_size = 10
    test_batch_size = 1
    train_data = batchify(corpus.train, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)
    test_data = batchify(corpus.test, test_batch_size, args)

    def idx2text(index):
        global corpus
        text = [corpus.dictionary.idx2word[idx] for idx in index]
        text = " ".join(text)
        return text

    def text2idx(text, mode="chinese"):
        global corpus
        if mode == "chinese":
            idx = [
                corpus.dictionary.word2idx.get(word,
                                               corpus.dictionary.word2idx['K'])
                for word in text
            ]
        else:
            idx = [
                corpus.dictionary.word2idx.get(
                    word, corpus.dictionary.word2idx['<unk>'])
                for word in text.split()
            ]
        return idx

    idx = torch.tensor(text2idx(text, mode=mode)).unsqueeze(dim=-1).cuda()
    # seq_len = idx.size(0)
    hidden = model.init_hidden(args.batch_size)
    hidden = repackage_hidden(hidden)
    output, hidden, distances = model(idx, hidden, return_d=True)

    target_layer = 2
    target_idx = 0

    df = distances[0].cpu().data.numpy()

    target_text = [word for word in texts]
    df = df[target_layer, :, target_idx]

    return df
Esempio n. 11
0
    else:
        # to convert model trained on cuda to cpu model
        model = torch.load(f, map_location=lambda storage, loc: storage)
model.eval()

if args.cuda:
    model.cuda()
else:
    model.cpu()

eval_batch_size = 1
seq_len = 20

dictionary = dictionary_corpus.Dictionary(args.data)
vocab_size = len(dictionary)
print("Vocab size", vocab_size)
print("TESTING")

# assuming the mask file contains one number per line indicating the index of the target word
index_col = 0

mask = create_target_mask(args.path + ".text", args.path + ".eval", index_col)
mask_data = batchify(torch.LongTensor(mask), eval_batch_size, args.cuda)
test_data = batchify(
    dictionary_corpus.tokenize(dictionary, args.path + ".text"),
    eval_batch_size, args.cuda)

f_output = open(args.path + ".output_" + args.suffix, 'w')
evaluate(test_data, mask_data)
f_output.close()
Esempio n. 12
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='WikiText-2 language modeling')
    parser.add_argument('--batch-size',
                        type=int,
                        default=20,
                        metavar='N',
                        help='input batch size for training (default: 90)'),
    parser.add_argument('--eval-batch-size',
                        type=int,
                        default=20,
                        metavar='N',
                        help='input batch size for training (default: 50)'),
    parser.add_argument('--save-directory',
                        type=str,
                        default='output/wikitext-2',
                        help='output directory')
    parser.add_argument('--model-save-directory',
                        type=str,
                        default='models/',
                        help='output directory')
    parser.add_argument('--epochs',
                        type=int,
                        default=5,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--base-seq-len',
                        type=int,
                        default=35,
                        metavar='N',
                        help='Batch length'),
    parser.add_argument('--min-seq-len',
                        type=int,
                        default=35,
                        metavar='N',
                        help='minimum batch length'),
    parser.add_argument('--seq-prob',
                        type=int,
                        default=0.95,
                        metavar='N',
                        help='prob of being divided by 2'),
    parser.add_argument('--seq-std',
                        type=int,
                        default=5,
                        metavar='N',
                        help='squence length std'),
    parser.add_argument('--hidden-dim',
                        type=int,
                        default=200,
                        metavar='N',
                        help='Hidden dim')
    parser.add_argument('--embedding-dim',
                        type=int,
                        default=200,
                        metavar='N',
                        help='Embedding dim')
    parser.add_argument('--lr',
                        type=int,
                        default=20,
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--weight-decay',
                        type=int,
                        default=2e-6,
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--tag',
                        type=str,
                        default='drop-out-training.pt',
                        metavar='N',
                        help='learning rate'),
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    args = parser.parse_args(argv)
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # load dataset
    train_data, val_data, vocabulary = (np.load('./dataset/wiki.train.npy'),
                                        np.load('./dataset/wiki.valid.npy'),
                                        np.load('./dataset/vocab.npy'))

    word_count = len(vocabulary)

    #model = models.RNNModel(word_count, args)
    loss_fn = torch.nn.CrossEntropyLoss()

    checkpoint_path = os.path.join(args.model_save_directory, args.tag)

    if not os.path.exists(checkpoint_path):
        model = models.LSTMModelSingle(word_count, args.embedding_dim,
                                       args.hidden_dim)
    else:
        print("Using pre-trained model")
        print("*" * 90)
        model = models.LSTMModelSingle(word_count, args.embedding_dim,
                                       args.hidden_dim)
        checkpoint_path = os.path.join(args.model_save_directory, args.tag)
        model.load_state_dict(torch.load(checkpoint_path))

    if args.cuda:
        model = model.cuda()
        loss_fn = loss_fn.cuda()
    '''
    generated = utils.generate(
        model,
        sequence_length=10,
        batch_size=2,
        stochastic=True,
        args=args).data.cpu().numpy()
    utils.print_generated(
        utils.to_text(
            preds=generated,
            vocabulary=vocabulary))
    '''
    print('Model: ', model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    logging = dict()
    logging['loss'] = []
    logging['train_acc'] = []
    logging['val_loss'] = []
    val_data_ = utils.batchify(utils.to_tensor(np.concatenate(val_data)),
                               args.eval_batch_size)
    val_data_loader = utils.custom_data_loader(val_data_,
                                               args,
                                               evaluation=True)
    #X, y, seq_len = next(val_data_loader)
    #model.eval()
    #hidden = model.init_hidden(args.batch_size)
    #output = model.generate(X, hidden, 10)
    #print('output: ', output.shape)
    model.train()
    return
    for epoch in range(args.epochs):

        epoch_time = time.time()
        np.random.shuffle(train_data)
        train_data_ = utils.batchify(
            utils.to_tensor(np.concatenate(train_data)), args.batch_size)

        train_data_loader = utils.custom_data_loader(train_data_,
                                                     args,
                                                     evaluation=True)
        val_data_loader = utils.custom_data_loader(val_data_,
                                                   args,
                                                   evaluation=True)
        # number of words
        train_size = train_data_.size(0) * train_data_.size(1)
        val_size = val_data_.size(0) * val_data_.size(1)

        n_batchs = len(train_data_)
        n_batchs_val = len(val_data_)
        correct = 0
        epoch_loss = 0
        batch_index = 0
        seq_len = 0
        counter = 0
        hidden = model.init_hidden(args.batch_size)
        while (batch_index < n_batchs - 1):

            #optimizer.zero_grad()

            X, y, seq_len = next(train_data_loader)
            #print('X: ', X.shape, 'y: ', y.shape)
            hidden = repackage_hidden(hidden)
            #out, hidden = model(X, hidden)
            model.zero_grad()

            out, hidden = model(X, hidden)

            loss = loss_fn(out.view(-1, word_count), y)

            loss.backward()
            # scale lr with respect the size of the seq_len
            #utils.adjust_learning_rate(optimizer, args, seq_len)
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)

            for p in model.parameters():
                p.data.add_(-args.lr, p.grad.data)

            #optimizer.step()
            #utils.adjust_learning_rate(optimizer, args, args.base_seq_len)

            epoch_loss += loss.data.sum()
            batch_index += seq_len
            if counter % 200 == 0 and counter != 0:
                print('|batch {:3d}|train loss {:5.2f}|'.format(
                    counter, epoch_loss / counter))

            counter += 1

        train_loss = epoch_loss / counter
        val_loss = validate(model, val_data_loader, loss_fn, n_batchs_val,
                            word_count)

        logging['loss'].append(train_loss)
        logging['val_loss'].append(val_loss)
        utils.save_model(model, checkpoint_path)

        print('=' * 83)
        print('|epoch {:3d}|time: {:5.2f}s|valid loss {:5.2f}|'
              'train loss {:8.2f}'.format(epoch + 1,
                                          (time.time() - epoch_time), val_loss,
                                          train_loss))
Esempio n. 13
0
def main():

    parser = argparse.ArgumentParser(
        description='PyTorch PennTreeBank RNN/LSTM Language Model')
    parser.add_argument('--data',
                        type=str,
                        default='data/penn/',
                        help='location of the data corpus')
    parser.add_argument('--model',
                        type=str,
                        default='LSTM',
                        help='type of recurrent net (LSTM, QRNN, GRU)')
    parser.add_argument('--emsize',
                        type=int,
                        default=400,
                        help='size of word embeddings')
    parser.add_argument('--nhid',
                        type=int,
                        default=1150,
                        help='number of hidden units per layer')
    parser.add_argument('--nlayers',
                        type=int,
                        default=3,
                        help='number of layers')
    parser.add_argument('--lr',
                        type=float,
                        default=30,
                        help='initial learning rate')
    parser.add_argument('--clip',
                        type=float,
                        default=0.25,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=8000,
                        help='upper epoch limit')
    parser.add_argument('--max-steps-per-epoch',
                        type=int,
                        default=-1,
                        help='upper steps per epoch epoch limit')
    parser.add_argument('--batch-size',
                        type=int,
                        default=80,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--bptt', type=int, default=70, help='sequence length')
    parser.add_argument('--warmup',
                        type=int,
                        default=4000,
                        help='warmup for learning rate')
    parser.add_argument('--cooldown',
                        type=int,
                        default=None,
                        help='cooldown for learning rate')
    parser.add_argument(
        '--accumulate',
        type=int,
        default=1,
        help='number of batches to accumulate before gradient update')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.4,
                        help='dropout applied to layers (0 = no dropout)')
    parser.add_argument('--dropouth',
                        type=float,
                        default=0.3,
                        help='dropout for rnn layers (0 = no dropout)')
    parser.add_argument(
        '--dropouti',
        type=float,
        default=0.65,
        help='dropout for input embedding layers (0 = no dropout)')
    parser.add_argument(
        '--dropoute',
        type=float,
        default=0.1,
        help='dropout to remove words from embedding layer (0 = no dropout)')
    parser.add_argument(
        '--wdrop',
        type=float,
        default=0.0,
        help=
        'amount of weight dropout to apply to the RNN hidden to hidden matrix')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--nonmono', type=int, default=5, help='random seed')
    parser.add_argument('--cuda', action='store_false', help='use CUDA')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        metavar='N',
                        help='report interval')
    randomhash = ''.join(str(time.time()).split('.'))
    parser.add_argument('--save',
                        type=str,
                        default=randomhash + '.pt',
                        help='path to save the final model')
    parser.add_argument(
        '--alpha',
        type=float,
        default=2,
        help=
        'alpha L2 regularization on RNN activation (alpha = 0 means no regularization)'
    )
    parser.add_argument(
        '--beta',
        type=float,
        default=1,
        help=
        'beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)'
    )
    parser.add_argument('--wdecay',
                        type=float,
                        default=1.2e-6,
                        help='weight decay applied to all weights')
    parser.add_argument('--resume',
                        type=str,
                        default='',
                        help='path of model to resume')
    parser.add_argument('--optimizer',
                        type=str,
                        default='sgd',
                        help='optimizer to use (sgd, adam)')
    parser.add_argument(
        '--when',
        nargs="+",
        type=int,
        default=[-1],
        help=
        'When (which epochs) to divide the learning rate by 10 - accepts multiple'
    )
    parser.add_argument(
        '--discard-highest-losses',
        type=float,
        default=0.0,
        help=
        'discard highest percentage of prediction losses before executing an optimizer step'
    )
    parser.add_argument(
        '--enlarge-model-every-n-epochs',
        type=int,
        default=-1,
        help='enlarge model (hidden and embedding dims) after every n epochs')

    args = parser.parse_args()
    args.tied = True

    # Set the random seed manually for reproducibility.
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else:
            torch.cuda.manual_seed(args.seed)

    ###############################################################################
    # Load data
    ###############################################################################

    import os
    import hashlib
    fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
    if os.path.exists(fn):
        print('Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        print('Producing dataset...')
        corpus = data.Corpus(args.data)
        torch.save(corpus, fn)

    eval_batch_size = min(100, args.batch_size)
    print('Eval batch size of', eval_batch_size)
    test_batch_size = 8
    train_data = batchify(corpus.train, args.batch_size, args)
    val_data = batchify(corpus.valid, eval_batch_size, args)
    test_data = batchify(corpus.test, test_batch_size, args)

    ###############################################################################
    # Build the model
    ###############################################################################

    from splitcross import SplitCrossEntropyLoss
    criterion = None

    ntokens = len(corpus.dictionary)
    print('Total number of tokens:', ntokens)
    #model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    #model = model.BoomRNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    if args.enlarge_model_every_n_epochs <= 0:
        model = SHARNN(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.dropouth,
                       args.dropouti, args.dropoute, args.wdrop, args.tied)
    else:
        model = None
    #model = model.AttnRNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    #model = model.RecAttn(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    #model = model.LNRNN(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    #model = model.LNRR(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
    ###

    splits = []
    if ntokens > 500000:
        # One Billion
        # This produces fairly even matrix mults for the buckets:
        # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422
        splits = [4200, 35000, 180000]
    elif ntokens > 75000:
        # WikiText-103
        splits = [2800, 20000, 76000]
    print('Using', splits)

    if model is not None:
        if args.resume and args.epochs > 0:
            print('Resuming model ...')
            criterion = model_load(args.resume, model)
            #optimizer.param_groups[0]['lr'] = args.lr
            model.dropouti, model.dropouth, model.dropout, args.dropoute = args.dropouti, args.dropouth, args.dropout, args.dropoute
            #if args.wdrop:
            #    from weight_drop import WeightDrop
            #    for rnn in model.rnns:
            #        if type(rnn) == WeightDrop: rnn.dropout = args.wdrop
            #        elif rnn.zoneout > 0: rnn.zoneout = args.wdrop
        ###
        if not criterion:

            criterion = SplitCrossEntropyLoss(args.emsize,
                                              splits=splits,
                                              verbose=False)
        ###
        if args.cuda:
            model = model.cuda()
            criterion = criterion.cuda()
        if False:  # or args.jit:
            print('Jitting ...')
            model.eval()
            model.lmr = torch.jit.trace(model.lmr, (torch.rand([
                args.bptt, args.batch_size, args.emsize
            ]).cuda(), torch.rand([1, args.batch_size, args.emsize]).cuda()))
        #model = torch.jit.trace_module(model, torch.zeros((args.bptt, args.batch_size), dtype=torch.long))
        ###

    ###############################################################################
    # Training code
    ###############################################################################

    # Loop over epochs.
    #lr = args.lr
    best_val_loss = []
    stored_loss = 100000000

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        if model is not None:
            model, optimizer, params = init_optimizer(args, model, criterion)

        for epoch in range(1, args.epochs + 1):
            epoch_start_time = time.time()
            discard_highest_losses = args.discard_highest_losses * (
                args.epochs - epoch + 1) / args.epochs
            if args.enlarge_model_every_n_epochs > 0 and (
                    epoch - 1) % args.enlarge_model_every_n_epochs == 0:
                prev_model = model
                current_factor = (args.enlarge_model_every_n_epochs + epoch -
                                  1) / (args.enlarge_model_every_n_epochs +
                                        args.epochs)
                emsize = int(args.emsize * current_factor)
                nhid = int(args.nhid * current_factor)
                print(
                    f'enlarge model: emsize={emsize}, nhid={nhid} (discard_highest_losses={discard_highest_losses})'
                )
                model = SHARNN(args.model, ntokens, emsize, nhid, args.nlayers,
                               args.dropout, args.dropouth, args.dropouti,
                               args.dropoute, args.wdrop, args.tied)
                criterion = SplitCrossEntropyLoss(emsize,
                                                  splits=splits,
                                                  verbose=False)
                if args.cuda:
                    model = model.cuda()
                    criterion = criterion.cuda()
                if prev_model is not None:
                    model.load_from_smaller_and_freeze(prev_model)
                model, optimizer, params = init_optimizer(
                    args, model, criterion)

            train(model,
                  optimizer,
                  criterion,
                  args,
                  train_data,
                  params,
                  epoch=epoch - 1,
                  max_steps=args.max_steps_per_epoch,
                  discard_highest_losses=discard_highest_losses)
            if 't0' in optimizer.param_groups[0]:
                tmp = {}
                for prm in model.parameters():
                    tmp[prm] = prm.data.clone()
                    prm.data = optimizer.state[prm]['ax'].clone()

                val_loss2 = evaluate(model, criterion, args, val_data)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f} | valid bpc {:8.3f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss2,
                        math.exp(val_loss2), val_loss2 / math.log(2)))
                print('-' * 89)

                if val_loss2 < stored_loss:
                    model_save(args.save, model, criterion)
                    print('Saving Averaged!')
                    stored_loss = val_loss2

                for prm in model.parameters():
                    prm.data = tmp[prm].clone()

            else:
                val_loss = evaluate(model, criterion, args, val_data,
                                    eval_batch_size)
                print('-' * 89)
                print(
                    '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                    'valid ppl {:8.2f} | valid bpc {:8.3f}'.format(
                        epoch, (time.time() - epoch_start_time), val_loss,
                        math.exp(val_loss), val_loss / math.log(2)))
                print('-' * 89)

                if val_loss < stored_loss:
                    model_save(args.save, model, criterion)
                    print('Saving model (new best validation)')
                    stored_loss = val_loss

                if args.optimizer == 'sgd' and 't0' not in optimizer.param_groups[
                        0] and (len(best_val_loss) > args.nonmono and
                                val_loss > min(best_val_loss[:-args.nonmono])):
                    print('Switching to ASGD')
                    optimizer = torch.optim.ASGD(model.parameters(),
                                                 lr=args.lr,
                                                 t0=0,
                                                 lambd=0.,
                                                 weight_decay=args.wdecay)

                if epoch in args.when:
                    print('Saving model before learning rate decreased')
                    model_save('{}.e{}'.format(args.save, epoch), model,
                               criterion)
                    print('Dividing learning rate by 10')
                    optimizer.param_groups[0]['lr'] /= 10.

                best_val_loss.append(val_loss)

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model.
    criterion = model_load(args.save, model)

    params = list(model.parameters()) + list(criterion.parameters())
    total_params = sum(x.size()[0] *
                       x.size()[1] if len(x.size()) > 1 else x.size()[0]
                       for x in params if x.size())
    print('Model total parameters:', total_params)

    # Run on test data.
    test_loss = evaluate(model, criterion, args, test_data, test_batch_size)
    print('=' * 89)
    print(
        '| End of training | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}'
        .format(test_loss, math.exp(test_loss), test_loss / math.log(2)))
    print('=' * 89)
Esempio n. 14
0

import os
import hashlib
fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
if os.path.exists(fn):
    print('Loading cached dataset...')
    corpus = torch.load(fn)
else:
    print('Producing dataset...')
    corpus = data.Corpus(args.data)
    torch.save(corpus, fn)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

from splitcross import SplitCrossEntropyLoss
criterion = None

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.dropouth,
                       args.dropouti, args.dropoute, args.wdrop, args.tied)
###
Esempio n. 15
0
    vocabdict = json.load(f)
vocabdict = {k: int(v) for k, v in vocabdict.items()}
corpus = Corpus(datafiles,
                maxlen=args.maxlen,
                vocab_size=args.vocab_size,
                lowercase=args.lowercase,
                vocab=vocabdict)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens

eval_batch_size = 100
en_data = batchify(corpus.data[args.corpus_name],
                   eval_batch_size,
                   shuffle=False)
print(len(en_data))
print("Loaded data!")

model_args, idx2word, autoencoder, gan_gen, gan_disc = load_models(
    args.outf, args.epochs, twodecoders=True)

if args.cuda:
    autoencoder = autoencoder.cuda()
    gan_gen = gan_gen.cuda()
    gan_disc = gan_disc.cuda()

one = to_gpu(args.cuda, torch.FloatTensor([1]))
mone = one * -1
Esempio n. 16
0
parser.add_argument('--theta', type=float, default=0.6625523432485668,
                    help='mix between uniform distribution and pointer softmax distribution over previous words')
parser.add_argument('--lambdasm', type=float, default=0.12785920428335693,
                    help='linear mix between only pointer (1) and only vocab (0) distribution')
args = parser.parse_args()

###############################################################################
# Load data
###############################################################################

corpus = data.Corpus(args.data)

eval_batch_size = 1
test_batch_size = 1
#train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, test_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
criterion = nn.CrossEntropyLoss()

def one_hot(idx, size, cuda=True):
    a = np.zeros((1, size), np.float32)
    a[0][idx] = 1
    v = Variable(torch.from_numpy(a))
    if cuda: v = v.cuda()
    return v
Esempio n. 17
0
def train(source, target, encoder, decoder, lr, conf):
    """
    ----------
    @params
        source: list of list, sequences of source language
        target: list of list, sequences of target language
        encoder: Encoder, object of encoder in NMT
        decoder: Decoder, object of decoder in NMT
        lr: float, learning rate
        conf: Config, wraps anything needed
    ----------
    """
    encoder.train()
    decoder.train()
    enc_opt = optim.Adam(encoder.parameters(), lr=lr)
    dec_opt = optim.Adam(decoder.parameters(), lr=lr)
    loss_fn = nn.NLLLoss()

    total_loss = 0
    for batch, (x, x_len, y, mask) in enumerate(utils.batchify(
        source, target, conf.stride, conf.batch_size, True)):
        enc_opt.zero_grad()
        dec_opt.zero_grad()
        loss = 0

        x = x[:,1:] # skip <SOS>
        batch_size, src_len = x.shape
        x = Variable(torch.LongTensor(x.tolist()), volatile=False)
        y = Variable(torch.LongTensor(y.tolist()))

        enc_h = encoder.init_hidden(batch_size)

        if conf.cuda:
            x = x.cuda()
            y = y.cuda()
            enc_h = enc_h.cuda()

        encoder_out, enc_h = encoder(x, enc_h, x_len-1)
        # use last forward hidden state in encoder
        dec_h = enc_h[:decoder.n_layers]
        #dec_h = decoder.init_hidden(enc_h)

        target_len = y.size(1)
        decoder_input = y[:, 0:1]

        # Scheduled sampling
        use_teacher_forcing = random.random() < conf.teaching_ratio

        if use_teacher_forcing:
            for i in range(1, target_len):
                decoder_out, dec_h = decoder(decoder_input, dec_h)
                loss += utils.loss_in_batch(decoder_out, y[:,i], mask[:,i], loss_fn)
                decoder_input = y[:, i:i+1]

        else:
            for i in range(1, target_len):
                decoder_out, dec_h = decoder(decoder_input, dec_h)
                loss += utils.loss_in_batch(decoder_out, y[:,i], mask[:,i], loss_fn)

                topv, topi = decoder_out.data.topk(1)
                ni = topi[:,:1]
                decoder_input = Variable(torch.LongTensor(ni.tolist()))
                if conf.cuda:
                    decoder_input = decoder_input.cuda()

        total_loss += loss.data[0]
        loss /= batch_size
        loss.backward()

        enc_opt.step()
        dec_opt.step()

    return total_loss / len(source)
Esempio n. 18
0
parser.add_argument('--reward', type=int, default=80)
parser.add_argument('--mu0', type=int, default=1)  # 高斯分布的均值
parser.add_argument('--sigma0', type=int, default=1)  # 高斯分布的方差
parser.add_argument('--sigma_tilde', type=int, default=1)
args = parser.parse_args()

np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True  # cuDNN使用的非确定性算法就会自动寻找最适合当前配置的高效算法,来达到优化运行效率
    cudnn.enabled = True

corpus = data.Corpus(data_path)
train_data = batchify(corpus.train, train_batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, test_batch_size)

n_tokens = len(corpus.dictionary)
model = RNNModel(n_tokens,
                 embed_size,
                 n_hid,
                 n_hid_last,
                 dropout,
                 dropout_h,
                 dropout_x,
                 dropout_i,
                 dropout_e,
                 cell_cls=DARTSCell)
parallel_model = model.cuda()
Esempio n. 19
0
# ************** CREATE DATASET, MODEL AND OPTIMIZER******************

bpe = yttm.BPE(model=args.bpe_path)
TEXT = torchtext.data.Field(tokenize=lambda x: utils.bpe_tokenize(x, bpe),
                            lower=True)
train_txt, val_txt, test_txt = utils.get_datasets(args.dataset).splits(TEXT)
print('Dataset fetched')
TEXT.build_vocab(train_txt)
vocab_size = len(TEXT.vocab.stoi)
print(f"Unique tokens in vocabulary: {len(TEXT.vocab)}")

device = torch.device(
    f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu")
print(f'device={device}')

train_data = utils.batchify(train_txt, TEXT, args.batch_size, device)
val_data = utils.batchify(val_txt, TEXT, args.batch_size, device)

layernorm = not args.nolayernorm
model = transformer.LMTransformer(vocab_size,
                                  args.dmodel,
                                  args.nheads,
                                  args.dff,
                                  args.nlayers,
                                  args.dropout,
                                  tie_embeddings=args.tie_embeddings,
                                  dfa=args.dfa,
                                  no_training=args.no_training,
                                  dfa_after_vocab=args.dfa_after_vocab,
                                  dfa_embed=args.dfa_embed,
                                  attn=args.attention,
Esempio n. 20
0
    def __init__(self, args, dataset):
        """Constructor for training algorithm.

        Args:
            args: From command line, picked up by `argparse`.
            dataset: Currently only `data.text.Corpus` is supported.

        Initializes:
            - Data: train, val and test.
            - Model: shared and controller.
            - Inference: optimizers for shared and controller parameters.
            - Criticism: cross-entropy loss for training the shared model.
        """
        self.args = args
        self.controller_step = 0
        self.cuda = args.cuda
        self.dataset = dataset
        self.epoch = 0
        self.shared_step = 0
        self.start_epoch = 0
        # best_evaluated_dag on the validation set
        self.best_evaluated_dag = None
        self.best_ppl = np.inf
        self.best_epoch = None

        logger.info('regularizing:')
        for regularizer in [('activation regularization',
                             self.args.activation_regularization),
                            ('temporal activation regularization',
                             self.args.temporal_activation_regularization),
                            ('norm stabilizer regularization',
                             self.args.norm_stabilizer_regularization)]:
            if regularizer[1]:
                logger.info(f'{regularizer[0]}')

        self.train_data = utils.batchify(dataset.train, args.batch_size,
                                         self.cuda)
        # NOTE(brendan): The validation set data is batchified twice
        # separately: once for computing rewards during the Train Controller
        # phase (valid_data, batch size == 64), and once for evaluating ppl
        # over the entire validation set (eval_data, batch size == 1)
        self.valid_data = utils.batchify(dataset.valid, args.batch_size,
                                         self.cuda)
        self.eval_data = utils.batchify(dataset.valid, args.test_batch_size,
                                        self.cuda)
        self.test_data = utils.batchify(dataset.test, args.test_batch_size,
                                        self.cuda)

        self.max_length = self.args.shared_rnn_max_length

        if args.use_tensorboard:
            self.tb = TensorBoard(args.model_dir)
        else:
            self.tb = None
        self.build_model()

        if self.args.load_path:
            self.load_model()

        shared_optimizer = _get_optimizer(self.args.shared_optim)
        controller_optimizer = _get_optimizer(self.args.controller_optim)

        self.shared_optim = shared_optimizer(
            self.shared.parameters(),
            lr=self.shared_lr,
            weight_decay=self.args.shared_l2_reg)

        self.controller_optim = controller_optimizer(
            self.controller.parameters(), lr=self.args.controller_lr)

        self.ce = nn.CrossEntropyLoss()
Esempio n. 21
0
        print ("Error: input files directory does not exist")
        exit(0)

    params = {}
    params["embedding_size"] = args.embedding_size
    params["rnn_size"] = args.rnn_size
    params["rnn_layers"] = args.rnn_layers
    params["dropout"] = args.dropout
    params["use_gpu"] = use_gpu
    params["sequence_length"] = args.sequence_length
    params["batch_size"] = args.batch_size

    train, valid, test, word2idx = load_data(args.data_dir)
    params["vocab_size"] = len(word2idx)

    train_batches = utils.batchify(train, args.sequence_length, args.batch_size, word2idx)
    valid_batches = utils.batchify(valid, args.sequence_length, args.batch_size, word2idx)
    test_batches =  utils.batchify(test, args.sequence_length, args.batch_size, word2idx)
    
    # define loss, model and optimization
    model = LSTMLM(params)
    if use_gpu:
        print ("CUDA found!")
        model.cuda()
    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    # model summary
    print (str(model))

    # training
Esempio n. 22
0
    def __init__(self, save_path, seed, batch_size, grad_clip, config='eval'):
        if config == 'search':
            args = {
                'emsize': 300,
                'nhid': 300,
                'nhidlast': 300,
                'dropoute': 0,
                'wdecay': 5e-7
            }
        elif config == 'eval':
            args = {
                'emsize': 850,
                'nhid': 850,
                'nhidlast': 850,
                'dropoute': 0.1,
                'wdecay': 8e-7
            }
        args['config'] = config

        args['data'] = '/home/liamli4465/darts/data/penn'
        args['lr'] = 20
        args['clip'] = grad_clip
        args['batch_size'] = batch_size
        args['search_batch_size'] = 256 * 4
        args['small_batch_size'] = batch_size
        args['bptt'] = 35
        args['dropout'] = 0.75
        args['dropouth'] = 0.25
        args['dropoutx'] = 0.75
        args['dropouti'] = 0.2
        args['seed'] = seed
        args['nonmono'] = 5
        args['log_interval'] = 50
        args['save'] = save_path
        args['alpha'] = 0
        args['beta'] = 1e-3
        args['max_seq_length_delta'] = 20
        args['unrolled'] = True
        args['gpu'] = 0
        args['cuda'] = True
        args = AttrDict(args)
        self.args = args
        self.seed = seed

        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.set_device(args.gpu)
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.cuda.manual_seed_all(args.seed)

        corpus = data.Corpus(args.data)
        self.corpus = corpus

        eval_batch_size = 10
        test_batch_size = 1

        self.train_data = batchify(corpus.train, args.batch_size, args)
        self.search_data = batchify(corpus.valid, args.search_batch_size, args)
        self.val_data = batchify(corpus.valid, eval_batch_size, args)
        self.test_data = batchify(corpus.test, test_batch_size, args)
        self.batch = 0
        self.steps = 0
        self.epochs = 0
        self.total_loss = 0
        self.start_time = time.time()

        ntokens = len(corpus.dictionary)
        # if args.continue_train:
        #    model = torch.load(os.path.join(args.save, 'model.pt'))
        try:
            model = torch.load(os.path.join(args.save, 'model.pt'))
            print('Loaded model from checkpoint')
        except Exception as e:
            print(e)
            model = RNNModel(ntokens,
                             args.emsize,
                             args.nhid,
                             args.nhidlast,
                             args.dropout,
                             args.dropouth,
                             args.dropoutx,
                             args.dropouti,
                             args.dropoute,
                             genotype=genotypes.DARTS)

        size = 0
        for p in model.parameters():
            size += p.nelement()
        logging.info('param size: {}'.format(size))
        logging.info('initial genotype:')
        logging.info(model.rnns[0].genotype)

        total_params = sum(x.data.nelement() for x in model.parameters())
        logging.info('Args: {}'.format(args))
        logging.info('Model total parameters: {}'.format(total_params))

        self.model = model.cuda()
        self.optimizer = torch.optim.SGD(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.wdecay)
Esempio n. 23
0
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed_all(args.seed)

###############################################################################
# Load data
###############################################################################

corpus = data.Corpus(args.data)

eval_batch_size = 10
test_batch_size = 1

train_data_src, train_data_trg = batchify(corpus.train_src, corpus.train_trg,
                                          args.batch_size, args)
val_data_src, val_data_trg = batchify(corpus.valid_src, corpus.valid_trg,
                                      eval_batch_size, args)
test_data_src, test_data_trg = batchify(
    corpus.valid_src, corpus.valid_trg, test_batch_size,
    args)  # test data is same as valid data, we just use different batch size

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)

if args.continue_train:  # probably needs to be fixed
    model = torch.load(os.path.join(args.save, 'model.pt'))
    print("Loaded existing model.")
forget_gates = {}
input_gates = {}
output_gates = {}
cell_states = {}
hidden_states = {}

relevant_labels = {}

with open(input_path) as input_file:
    for line in input_file:
        sentence, labels = parse_line(line)

        tokenized_data = corpus.safe_tokenize_sentence(sentence.strip())

        batch_size = 1
        input_data = batchify(tokenized_data,batch_size,False)
        gate_data, outputs = evaluate(input_data,batch_size)

        for lyr, gates in enumerate(gate_data):
            if lyr not in forget_gates:
                forget_gates[lyr] = []
            if lyr not in input_gates:
                input_gates[lyr] = []
            if lyr not in output_gates:
                output_gates[lyr] = []
            if lyr not in cell_states:
                cell_states[lyr] = []
            if lyr not in hidden_states:
                hidden_states[lyr] = []
            if lyr not in relevant_labels:
                relevant_labels[lyr] = []
Esempio n. 25
0
args = parser.parse_args()

scope_autoencoder = 'autoencoder'
scope_critic = 'critic'
scope_generator = 'generator'

corpus = Corpus(args.data_path,
                maxlen=args.maxlen,
                vocab_size=args.vocab_size,
                lowercase=True)

# Prepare data
ntokens = len(corpus.dictionary.word2idx)
args.ntokens = ntokens

test_data = batchify(corpus.test, args.batch_size, args.maxlen, shuffle=False)
train_data = batchify(corpus.train,
                      args.batch_size,
                      args.maxlen,
                      shuffle=False)

tf.reset_default_graph()

# Build graph
fixed_noise = tf.Variable(
    tf.random_normal(shape=(args.batch_size, args.z_size),
                     mean=0.0,
                     stddev=1.0,
                     dtype=tf.float32))

with tf.variable_scope(scope_autoencoder):
Esempio n. 26
0
        model, criterion, optimizer = torch.load(f)

import os
import hashlib
fn = 'corpus.{}.data'.format(hashlib.md5(args.data.encode()).hexdigest())
if os.path.exists(fn):
    print('Loading cached dataset...')
    corpus = torch.load(fn)
else:
    print('Producing dataset...')
    corpus = data.Corpus(args.data)
    torch.save(corpus, fn)

eval_batch_size = 10
test_batch_size = 1
train_data = batchify(corpus.train, args.batch_size, args)
val_data = batchify(corpus.valid, eval_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

from splitcross import SplitCrossEntropyLoss
criterion = None

ntokens = len(corpus.dictionary)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied)
###
if args.resume:
    print('Resuming model ...')
# dumping vocabulary
with open('{}/vocab.json'.format(args.outf), 'w') as f:
    json.dump(corpus.dictionary.word2idx, f)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens
with open('{}/args.json'.format(args.outf), 'w') as f:
    json.dump(vars(args), f)
with open("{}/log.txt".format(args.outf), 'w') as f:
    f.write(str(vars(args)))
    f.write("\n\n")

eval_batch_size = 100
test1_data = batchify(corpus.data['valid1'], eval_batch_size, shuffle=False)
test2_data = batchify(corpus.data['valid2'], eval_batch_size, shuffle=False)
train1_data = batchify(corpus.data['train1'], args.batch_size, shuffle=True)
train2_data = batchify(corpus.data['train2'], args.batch_size, shuffle=True)

print("Loaded data!")

###############################################################################
# Build the models
###############################################################################

ntokens = len(corpus.dictionary.word2idx)
autoencoder = Seq2Seq2Decoder(emsize=args.emsize,
                              nhidden=args.nhidden,
                              ntokens=ntokens,
                              nlayers=args.nlayers,
Esempio n. 28
0
                    maxlen=args.maxlen,
                    vocab_size=args.vocab_size,
                    lowercase=args.lowercase,
                    load_vocab=cur_dir + '/vocab.json')
else:
    corpus = Corpus(args.data_path,
                    maxlen=args.maxlen,
                    vocab_size=args.vocab_size,
                    lowercase=args.lowercase)

eval_batch_size = 10
if not args.convolution_enc:
    args.packed_rep = True
train_data = batchify(corpus.train,
                      args.batch_size,
                      args.maxlen,
                      packed_rep=args.packed_rep,
                      shuffle=True)
corpus_test = SNLIDataset(
    train=False,
    vocab_size=41578,
    reset_vocab="/home/ddua/data/arae/output/example/1504200881/vocab.json")
testloader = torch.utils.data.DataLoader(corpus_test,
                                         batch_size=10,
                                         collate_fn=collate_snli,
                                         shuffle=False)
test_data = iter(testloader)

classifier1 = Baseline_Embeddings(100, maxlen=10, gpu=True, vocab_size=41578)
classifier1.load_state_dict(
    torch.load("/home/ddua/data/snli/baseline/model_emb.pt"))
Esempio n. 29
0
#load conditonal information
test_C = np.load('data/test_weight-YAGO.npy')
train_C = np.load('data/train_weight-YAGO.npy')

test_C = preprocessing.normalize(test_C, norm='l2')
train_C = preprocessing.normalize(train_C, norm='l2')

test_data, test_c = batchify_C(corpus.test,
                               test_C,
                               eval_batch_size,
                               shuffle=False)
train_data, train_c = batchify_C(corpus.train,
                                 train_C,
                                 args.batch_size,
                                 shuffle=False)
test_final = batchify(test_C, len(test_C), shuffle=False)

print("Loaded data!")

###############################################################################
# Build the models
###############################################################################

ntokens = len(corpus.dictionary.word2idx)
autoencoder = Seq2Seq(emsize=args.emsize,
                      nhidden=args.nhidden,
                      ntokens=ntokens,
                      nlayers=args.nlayers,
                      noise_radius=args.noise_radius,
                      hidden_init=args.hidden_init,
                      dropout=args.dropout,
Esempio n. 30
0
# dumping vocabulary
with open('./output/{}/vocab.json'.format(args.outf), 'w') as f:
    json.dump(corpus.dictionary.word2idx, f)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens
with open('./output/{}/args.json'.format(args.outf), 'w') as f:
    json.dump(vars(args), f)
with open("./output/{}/logs.txt".format(args.outf), 'w') as f:
    f.write(str(vars(args)))
    f.write("\n\n")

eval_batch_size = 10
test_data = batchify(corpus.test, eval_batch_size, shuffle=False)
train_data = batchify(corpus.train, args.batch_size, shuffle=True)

print("Loaded data!")

###############################################################################
# Build the models
###############################################################################

ntokens = len(corpus.dictionary.word2idx)
autoencoder = Seq2Seq(emsize=args.emsize,
                      nhidden=args.nhidden,
                      ntokens=ntokens,
                      nlayers=args.nlayers,
                      noise_radius=args.noise_radius,
                      hidden_init=args.hidden_init,
Esempio n. 31
0
                    help='linear mix between only pointer (1) and only vocab (0) distribution')
# ThinkNet params
add_tn_params(parser)

args = parser.parse_args()

###############################################################################
# Load data
###############################################################################

corpus = data.Corpus(args.data)

eval_batch_size = 1
test_batch_size = 1
#train_data = batchify(corpus.train, args.batch_size)
val_data = batchify(corpus.valid, test_batch_size, args)
test_data = batchify(corpus.test, test_batch_size, args)

###############################################################################
# Build the model
###############################################################################

ntokens = len(corpus.dictionary)
criterion = nn.CrossEntropyLoss()

def one_hot(idx, size, cuda=True):
    a = np.zeros((1, size), np.float32)
    a[0][idx] = 1
    v = Variable(torch.from_numpy(a))
    if cuda: v = v.cuda()
    return v
Esempio n. 32
0
    if args.philly:
        fn = os.path.join(os.environ['PT_OUTPUT_DIR'], fn)
    if os.path.exists(fn):
        tools.print_log(args.save, 'Loading cached dataset...')
        corpus = torch.load(fn)
    else:
        tools.print_log(args.save, 'Producing dataset...')
        corpus = data.Corpus(args.data)
        torch.save(corpus, fn)

# Generate data

eval_batch_size = 10
test_batch_size = 1

train_data = batchify(corpus.train, args.batch_size, args)  # tensor (46479 * 20) 929589 / tot words887521
val_data = batchify(corpus.valid, eval_batch_size, args)  # 7376 * 10  / 70390
test_data = batchify(corpus.test, test_batch_size, args)  # 82430 * 1 / 78669 (tot tokens) + 3761 ('eos')

if args.debug:
    train_data = train_data[:50]
    val_data = val_data[:50]
    test_data = test_data[:50]

###############################################################################
# Build the model
###############################################################################

################################################3

criterion = None
Esempio n. 33
0
# dumping vocabulary
with open('./output/{}/vocab.json'.format(args.outf), 'w') as f:
    json.dump(corpus.dictionary.word2idx, f)

# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens
with open('./output/{}/args.json'.format(args.outf), 'w') as f:
    json.dump(vars(args), f)
with open("./output/{}/logs.txt".format(args.outf), 'w') as f:
    f.write(str(vars(args)))
    f.write("\n\n")

eval_batch_size = 10
test_data = batchify(corpus.test, eval_batch_size, shuffle=False)
train_data = batchify(corpus.train, args.batch_size, shuffle=True)

print("Loaded data!")

###############################################################################
# Build the models
###############################################################################

ntokens = len(corpus.dictionary.word2idx)
autoencoder = Seq2Seq(emsize=args.emsize,
                      nhidden=args.nhidden,
                      ntokens=ntokens,
                      nlayers=args.nlayers,
                      noise_radius=args.noise_radius,
                      hidden_init=args.hidden_init,