Beispiel #1
0
def test_model():
    vocab, embeddings = data_helper.load_embeddings(config.get('data', 'embedding_file'))
    model = RNNModel(embeddings, num_classes=5)
    model.load(config.get('data', 'model_dir'))
    test_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'test.txt'))
    numeric_test_samples = data_helper.convert_to_numeric_samples(test_data, vocab, num_classes=5)
    model.eval(numeric_test_samples)
Beispiel #2
0
def export_onnx(path, batch_size, seq_len):
    print('The model is also exported in ONNX format at {}'.format(
        os.path.realpath(args.onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(
        -1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)
Beispiel #3
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)
    return total_loss / (len(data_source) - 1)
Beispiel #4
0
def train_model():
    vocab, embeddings = data_helper.load_embeddings(config.get('data', 'embedding_file'))
    train_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'train.txt'))
    numeric_train_samples = data_helper.convert_to_numeric_samples(train_data, vocab, num_classes=5)
    model = RNNModel(embeddings, num_classes=5, model_config=config['model'])
    dev_data = data_helper.load_data(os.path.join(config.get('data', 'treebank_dir'), 'dev.txt'))
    numeric_dev_samples = data_helper.convert_to_numeric_samples(dev_data, vocab, num_classes=5)
    eval_func = lambda: model.eval(numeric_dev_samples)
    model.train(numeric_train_samples, eval_func)
    model.save(config.get('data', 'model_dir'))
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-l', '--load_path', default=None)
    args = parser.parse_args()
    return args


cl_args = parse_args()
dataset = Corpus()
dataset.process_data()
sos = dataset.target_dict.word2idx['<sos>']
eos = dataset.target_dict.word2idx['<eos>']
args = np.load(os.path.join(cl_args.load_path, 'args.npy')).tolist()

model = RNNModel(args).cuda()
model.eval()
if cl_args.load_path:
    file = os.path.join(cl_args.load_path, 'model.pt')
    model.load_state_dict(torch.load(file))

itr = dataset.create_epoch_iterator('test', 1)
for i in xrange(50):
    source, target = itr.next()
    output = model.sample(source, sos, eos)

    print "Source: ", ''.join([
        dataset.source_dict.idx2word[x]
        for x in source.cpu().data.numpy()[:, 0]
    ])

    print "Original: ", ''.join([
Beispiel #6
0
     # target: term0, term1
     lm_input, lens, lm_output = batch
     predictions, _, lens = model(cudalize(lm_input), cudalize(lens))
     loss = loss_function(predictions, cudalize(lm_output), lens)
     optimizer.zero_grad()
     loss.backward()
     optimizer.step()
     total_loss += loss.item()
     avg_loss = total_loss / (batch_num + 1)
     ppl = math.exp(avg_loss)
     print(f'\rLM Training: epoch:{epoch} train batch:{batch_num} ' +
           f'loss:{avg_loss} ppl:{ppl}',
           end='')
 print()
 valid_loss = 0
 model = model.eval()
 for batch_num, batch in enumerate(valid_loader):
     lm_input, lens, lm_output = batch
     predictions, _, lens = model(cudalize(lm_input), cudalize(lens))
     loss = loss_function(predictions, cudalize(lm_output), lens)
     valid_loss += loss.item()
     valid_avg_loss = valid_loss / (batch_num + 1)
     valid_ppl = math.exp(valid_avg_loss)
     print(f'\rvalid batch:{batch_num} loss: {valid_avg_loss:.4f} ' +
           f'ppl:{valid_ppl:.4f}',
           end='')
 print()
 if valid_loss / (batch_num + 1) < min_loss:
     min_loss = valid_loss / (batch_num + 1)
     earlystop_count = 0
     torch.save(model.state_dict(), './model.pt')
Beispiel #7
0
if USE_HOLIDAY == 'no_use':
    X_train[:, :, [1]] = 0
    # X_val[:, :, [1]] = 0
    X_test[:, :, [1]] = 0

    model = RNNModel(
        lookback=LOOKBACK, lookahead=LOOKAHEAD, input_dim=2, hid_dim=40, 
        device='cuda', data_dir=DATA_DIR, task=USE_HOLIDAY
    )
    model.fit(
        X_train=X_train, y_train=y_train, 
        X_val=X_test, y_val=y_test, 
        metric='mae', max_epoch=MAX_EPOCH, patience=3000000, 
        batch_size=128, lr=1e-3, weight_decay=1e-3
    )
    forecast = model.eval(X_test=X_test)


elif USE_HOLIDAY == 'feature':
    model = RNNModel(
        lookback=LOOKBACK, lookahead=LOOKAHEAD, input_dim=2, hid_dim=40, 
        device='cuda', data_dir=DATA_DIR, task=USE_HOLIDAY
    )
    model.fit(
        X_train=X_train, y_train=y_train, 
        X_val=X_test, y_val=y_test, 
        metric='mae', max_epoch=MAX_EPOCH, patience=3000000, 
        batch_size=128, lr=1e-3, weight_decay=1e-3
    )
    forecast = model.eval(X_test=X_test)
Beispiel #8
0
class DartsTrainer():
    def __init__(self, arm):
        # Default params for eval network
        args = {
            'emsize': 850,
            'nhid': 850,
            'nhidlast': 850,
            'dropoute': 0.1,
            'wdecay': 8e-7
        }

        args['data'] = '/home/liamli4465/darts/data/penn'
        args['lr'] = 20
        args['clip'] = 0.25
        args['batch_size'] = 64
        args['search_batch_size'] = 256 * 4
        args['small_batch_size'] = 64
        args['bptt'] = 35
        args['dropout'] = 0.75
        args['dropouth'] = 0.25
        args['dropoutx'] = 0.75
        args['dropouti'] = 0.2
        args['seed'] = arm['seed']
        args['nonmono'] = 5
        args['log_interval'] = 50
        args['save'] = arm['dir']
        args['alpha'] = 0
        args['beta'] = 1e-3
        args['max_seq_length_delta'] = 20
        args['unrolled'] = True
        args['gpu'] = 0
        args['cuda'] = True
        args['genotype'] = arm['genotype']
        args = AttrDict(args)
        self.args = args
        self.epoch = 0

        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.set_device(args.gpu)
        cudnn.benchmark = True
        cudnn.enabled = True
        torch.cuda.manual_seed_all(args.seed)

        corpus = data.Corpus(args.data)
        self.corpus = corpus

        self.eval_batch_size = 10
        self.test_batch_size = 1

        self.train_data = batchify(corpus.train, args.batch_size, args)
        self.search_data = batchify(corpus.valid, args.search_batch_size, args)
        self.val_data = batchify(corpus.valid, self.eval_batch_size, args)
        self.test_data = batchify(corpus.test, self.test_batch_size, args)

        self.ntokens = len(corpus.dictionary)

    def model_save(self, fn, to_save):
        if self.epoch % 150 == 0:
            with open(
                    os.path.join(self.args.save,
                                 "checkpoint-incumbent-%d" % self.epoch),
                    'wb') as f:
                torch.save(to_save, f)

        with open(fn, 'wb') as f:
            torch.save(to_save, f)

    def model_load(self, fn):
        with open(fn, 'rb') as f:
            self.model, self.optimizer, rng_state, cuda_state = torch.load(f)
            torch.set_rng_state(rng_state)
            torch.cuda.set_rng_state(cuda_state)

    def model_resume(self, filename):
        logging.info('Resuming model from %s' % filename)
        self.model_load(filename)
        self.optimizer.param_groups[0]['lr'] = self.args.lr
        for rnn in self.model.rnns:
            rnn.genotype = self.args.genotype

    def train_epochs(self, epochs):
        args = self.args
        resume_filename = os.path.join(self.args.save, "checkpoint.incumbent")
        if os.path.exists(resume_filename):
            self.model_resume(resume_filename)
            logging.info('Loaded model from checkpoint')
        else:
            self.model = RNNModel(self.ntokens,
                                  args.emsize,
                                  args.nhid,
                                  args.nhidlast,
                                  args.dropout,
                                  args.dropouth,
                                  args.dropoutx,
                                  args.dropouti,
                                  args.dropoute,
                                  genotype=args.genotype)
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             lr=args.lr,
                                             weight_decay=args.wdecay)

        size = 0
        for p in self.model.parameters():
            size += p.nelement()
        logging.info('param size: {}'.format(size))
        logging.info('initial genotype:')
        logging.info(self.model.rnns[0].genotype)

        total_params = sum(x.data.nelement() for x in self.model.parameters())
        logging.info('Args: {}'.format(args))
        logging.info('Model total parameters: {}'.format(total_params))

        self.model = self.model.cuda()
        # Loop over epochs.
        lr = args.lr
        best_val_loss = []
        stored_loss = 100000000

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            for epoch in range(epochs):
                epoch_start_time = time.time()
                self.train()
                if 't0' in self.optimizer.param_groups[0]:
                    tmp = {}
                    for prm in self.model.parameters():
                        tmp[prm] = prm.data.clone()
                        prm.data = self.optimizer.state[prm]['ax'].clone()

                    val_loss2 = self.evaluate(self.val_data)
                    logging.info('-' * 89)
                    logging.info(
                        '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                        'valid ppl {:8.2f} | valid bpc {:8.3f}'.format(
                            self.epoch,
                            (time.time() - epoch_start_time), val_loss2,
                            math.exp(val_loss2), val_loss2 / math.log(2)))
                    logging.info('-' * 89)

                    if val_loss2 < stored_loss:
                        self.model_save(
                            os.path.join(args.save, 'checkpoint.incumbent'), [
                                self.model, self.optimizer,
                                torch.get_rng_state(),
                                torch.cuda.get_rng_state()
                            ])
                        logging.info('Saving Averaged!')
                        stored_loss = val_loss2

                    for prm in self.model.parameters():
                        prm.data = tmp[prm].clone()

                else:
                    val_loss = self.evaluate(self.val_data,
                                             self.eval_batch_size)
                    logging.info('-' * 89)
                    logging.info(
                        '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                        'valid ppl {:8.2f} | valid bpc {:8.3f}'.format(
                            self.epoch,
                            (time.time() - epoch_start_time), val_loss,
                            math.exp(val_loss), val_loss / math.log(2)))
                    logging.info('-' * 89)

                    if val_loss < stored_loss:
                        self.model_save(
                            os.path.join(args.save, 'checkpoint.incumbent'), [
                                self.model, self.optimizer,
                                torch.get_rng_state(),
                                torch.cuda.get_rng_state()
                            ])
                        logging.info('Saving model (new best validation)')
                        stored_loss = val_loss

                    if (self.epoch > 75
                            and 't0' not in self.optimizer.param_groups[0] and
                        (len(best_val_loss) > args.nonmono
                         and val_loss > min(best_val_loss[:-args.nonmono]))):
                        logging.info('Switching to ASGD')
                        self.optimizer = torch.optim.ASGD(
                            self.model.parameters(),
                            lr=args.lr,
                            t0=0,
                            lambd=0.,
                            weight_decay=args.wdecay)

                    best_val_loss.append(val_loss)

        except Exception as e:
            logging.info('-' * 89)
            logging.info(e)
            logging.info('Exiting from training early')
            return 0, 10000, 10000

        # Load the best saved model.
        self.model_load(os.path.join(args.save, 'checkpoint.incumbent'))

        # Run on test data.
        val_loss = self.evaluate(self.val_data, self.eval_batch_size)
        logging.info(math.exp(val_loss))
        test_loss = self.evaluate(self.test_data, self.test_batch_size)
        logging.info('=' * 89)
        logging.info(
            '| End of training | test loss {:5.2f} | test ppl {:8.2f} | test bpc {:8.3f}'
            .format(test_loss, math.exp(test_loss), test_loss / math.log(2)))
        logging.info('=' * 89)

        return 0, math.exp(val_loss), math.exp(test_loss)

    def train(self):
        args = self.args
        corpus = self.corpus
        total_loss = 0
        start_time = time.time()
        hidden = [
            self.model.init_hidden(args.small_batch_size)
            for _ in range(args.batch_size // args.small_batch_size)
        ]
        batch, i = 0, 0

        while i < self.train_data.size(0) - 1 - 1:
            bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
            # Prevent excessively small or negative sequence lengths
            seq_len = max(5, int(np.random.normal(bptt, 5)))
            # There's a very small chance that it could select a very long sequence length resulting in OOM
            seq_len = min(seq_len, args.bptt + args.max_seq_length_delta)

            lr2 = self.optimizer.param_groups[0]['lr']
            self.optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
            self.model.train()
            data, targets = get_batch(self.train_data,
                                      i,
                                      args,
                                      seq_len=seq_len)

            self.optimizer.zero_grad()

            start, end, s_id = 0, args.small_batch_size, 0
            while start < args.batch_size:
                cur_data, cur_targets = data[:, start:
                                             end], targets[:, start:
                                                           end].contiguous(
                                                           ).view(-1)

                # Starting each batch, we detach the hidden state from how it was previously produced.
                # If we didn't, the model would try backpropagating all the way to start of the dataset.
                hidden[s_id] = repackage_hidden(hidden[s_id])

                log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = self.model(
                    cur_data, hidden[s_id], return_h=True)
                raw_loss = nn.functional.nll_loss(
                    log_prob.view(-1, log_prob.size(2)), cur_targets)

                loss = raw_loss
                # Activiation Regularization
                if args.alpha > 0:
                    loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                                      for dropped_rnn_h in dropped_rnn_hs[-1:])
                # Temporal Activation Regularization (slowness)
                loss = loss + sum(args.beta *
                                  (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                                  for rnn_h in rnn_hs[-1:])
                loss *= args.small_batch_size / args.batch_size
                total_loss += raw_loss.data * args.small_batch_size / args.batch_size
                loss.backward()

                s_id += 1
                start = end
                end = start + args.small_batch_size

                gc.collect()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
            torch.nn.utils.clip_grad_norm(self.model.parameters(), args.clip)
            self.optimizer.step()

            # total_loss += raw_loss.data
            self.optimizer.param_groups[0]['lr'] = lr2

            if np.isnan(total_loss[0]):
                raise

            #if batch % args.log_interval == 0 and batch > 0:
            #    cur_loss = total_loss[0] / args.log_interval
            #    elapsed = time.time() - start_time
            #    logging.info('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
            #            'loss {:5.2f} | ppl {:8.2f}'.format(
            #        self.epoch, batch, len(self.train_data) // args.bptt, self.optimizer.param_groups[0]['lr'],
            #        elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            #    total_loss = 0
            #    start_time = time.time()
            batch += 1
            i += seq_len
        self.epoch += 1

    def evaluate(self, data_source, batch_size=10):
        # Turn on evaluation mode which disables dropout.
        self.model.eval()
        total_loss = 0
        hidden = self.model.init_hidden(batch_size)
        for i in range(0, data_source.size(0) - 1, self.args.bptt):
            data, targets = get_batch(data_source,
                                      i,
                                      self.args,
                                      evaluation=True)
            targets = targets.view(-1)

            log_prob, hidden = self.model(data, hidden)
            loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)),
                                          targets).data

            total_loss += loss * len(data)

            hidden = repackage_hidden(hidden)
        return total_loss[0] / len(data_source)
def train():
    best_val_loss = 100

    ntokens = len(corpus.dictionary)
    train_data = batchify(corpus.train, args.batch_size)  # num_batches, batch_size
    val_data = batchify(corpus.valid, args.batch_size)
    model = RNNModel(rnn_type=args.model,
                     ntoken=ntokens,
                     ninp=args.emsize,
                     nfeat=args.nfeat,
                     nhid=args.nhid,
                     nlayers=args.nlayers,
                     font_path=args.font_path,
                     font_size=args.font_size,
                     dropout=args.dropout,
                     tie_weights=args.tied,
                     ).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    print('start training...')
    hidden = model.init_hidden(args.batch_size)
    epoch_start_time = time.time()

    for epoch in range(args.epochs):

        model.eval()  # 在validation上测试
        total_loss = 0.
        with torch.no_grad():
            for idx in range(0, val_data.size(0) - 1, args.bptt):
                data, targets = get_batch(val_data, idx)
                output, hidden = model(data, hidden)
                output_flat = output.view(-1, ntokens)  # (seq_len, batch, ntokens) -> (seq_len*batch, ntokens)
                total_loss += len(data) * criterion(output_flat, targets.view(-1)).item()
                hidden = repackage_hidden(hidden)
        val_loss = total_loss / len(val_data)
        best_val_loss = min(best_val_loss, val_loss)
        print('-' * 100)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f} | best valid ppl {:8.2f}'
              .format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss), math.exp(best_val_loss)))
        print('-' * 100)
        epoch_start_time = time.time()
        if val_loss == best_val_loss:  # Save the model if the validation loss is best so far.
            torch.save(model, os.path.join(args.save, 'model.pkl'))
        else:
            args.lr /= 4.0

        model.train()  # 在training set上训练
        total_loss = 0.
        start_time = time.time()
        for i, idx in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
            data, targets = get_batch(train_data, idx)
            hidden = repackage_hidden(hidden)
            model.zero_grad()  # 求loss和梯度
            output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, ntokens), targets.view(-1))
            loss.backward()
            total_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)  # 用梯度更新参数
            optimizer.step()
            # for p in model.parameters():
            #     p.data.add_(-args.lr, p.grad.data)

            if i % args.log_interval == 0 and i > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} |loss {:5.2f} | ppl {:8.2f}'
                      .format(epoch + 1, i, len(train_data) // args.bptt, args.lr, elapsed * 1000 / args.log_interval,
                              cur_loss, math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()
Beispiel #10
0
    def decompress(self, compressedfile):
        start = time.time()
        filename_split = compressedfile.split('_')
        checkpoint = torch.load(compressedfile, map_location=self.device)
        body = checkpoint['bytes']
        dictionary = Dictionary()
        dictionary.word2idx = checkpoint['word2idx']
        dictionary.idx2word = checkpoint['idx2word']
        context_map = Context(dictionary)
        ntokens = len(dictionary)
        model = RNNModel('LSTM',
                         ntokens,
                         200,
                         200,
                         2,
                         dropout=0.2,
                         tie_weights=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(self.device)
        model.eval()
        bit_string = ''
        join_body = list(body)
        for i in join_body:
            bit_string += "{0:08b}".format(i)
        encoded_text = self.remove_padding(bit_string)
        # decompress start here
        current_code = ''
        decoded_text = ''
        # we define an initial context
        # then we predict the initial huffman tree
        # read bits until we get to a leaf
        # convert the leaf to a char and add it to decompressed text
        # update the context and repeat the process
        context = ['<s>'] * 10

        def tree_from_context(context):
            huffman = HuffmanCoding()
            prob = huffman.make_context_frequency_dict(
                context,
                model,
                context_map,
                self.device,
                threshold=self.args.threshold)
            huffman.make_heap_node(prob)
            huffman.merge_nodes()
            huffman.encode()
            huffman.reverse_mapping = {v: k for k, v in huffman.codes.items()}
            return huffman

        huffman = tree_from_context(context)
        fixed_huffman = HuffmanCoding()
        counts = checkpoint['fixed_huffman_counts']
        fixed_huffman.make_heap_node(counts)
        fixed_huffman.merge_nodes()
        fixed_huffman.encode()
        fixed_huffman.reverse_mapping = {
            v: k
            for k, v in fixed_huffman.codes.items()
        }
        flag = None
        for bit in encoded_text:
            if flag == '0':
                current_code += bit
                if current_code in huffman.reverse_mapping:
                    next_char = huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            elif flag == '1':
                current_code += bit
                if current_code in fixed_huffman.reverse_mapping:
                    next_char = fixed_huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            else:
                flag = bit
        # write decompressed file
        with open(filename_split[0] + "_decompressed.txt", 'w') as f:
            f.writelines(decoded_text)
        print('Decompression Done!')
        end = time.time()
        print(round((end - start), 3), "s")