Beispiel #1
0
def slave(comm):
    mus, logvars = load_init_z()
    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt, map_location=lambda storage, loc: storage)['model'])
    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt, map_location=lambda storage, loc: storage)['model'])
    count = 1
    status = MPI.Status()

    gpuid = comm.rank % 4
    # device = torch.device('cuda:{}'.format(gpuid))
    # vae.to(device)
    # model.to(device)
    print('Worker {} Started, model on GPU {}'.format(comm.rank, gpuid))



    while True:
        solution = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
        tag = status.Get_tag()

        if tag == 1:
            print('Worker {} received solution {}'.format(comm.rank, count))
            zs = [sample_init_z(mus, logvars) for _ in range(cfg.trials_per_pop)]
            controller = deflatten_controller(solution)
            reward = rollout(model, controller, zs)
            print('Worker {} finished solution {}, reward: mean {} | max {} | min {} | std {}'.format(
                comm.rank, count, reward.mean(), reward.max(), reward.min(), reward.std()))
            comm.send(reward.mean(), dest=0, tag=2)
            count += 1
        elif tag == 3:
            print('Worker {} evaluate current solution'.format(comm.rank))
            controller = deflatten_controller(solution)
            reward = evaluate(model, vae, controller)
            comm.send(reward, dest=0, tag=2)
Beispiel #2
0
def test_real(epi):
    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    env = DoomTakeCover(True)
    obs = env.reset()
    model.reset()
    frames = []
    for step in range(cfg.max_steps):
        frames.append(cv2.resize(obs, (256, 256)))
        obs = torch.from_numpy(obs.transpose(2, 0,
                                             1)).unsqueeze(0).float() / 255.0
        mu, logvar, _, z = vae(obs)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        model.step(z.unsqueeze(0), action.unsqueeze(0))
        obs_next, reward, done, _ = env.step(action.item())
        obs = obs_next
        if done:
            break
    print('Episode {}: Real Reward {}'.format(epi, step))
    write_video(frames, 'real_{}.avi'.format(epi), (256, 256))
    os.system('mv real_{}.avi /home/bzhou/Dropbox/share'.format(epi))
Beispiel #3
0
def model_load(fn):
    model_opt = json.load(open(fn + '.opt'))
    model = RNNModel(model_opt['rnn_type'], model_opt['ntoken'],
                     model_opt['ninp'], model_opt['nhid'],
                     model_opt['nlayers'], model_opt['dropout'],
                     model_opt['dropouth'], model_opt['dropouti'],
                     model_opt['dropoute'], model_opt['wdrop'],
                     model_opt['tie_weights'])
    with open(fn + '.model', 'rb') as f:
        model_dict, criterion, optimizer = torch.load(f)
        model.load_state_dict(model_dict)
    return model, criterion, optimizer
Beispiel #4
0
def generate_flow(epoch=3):
    """读取存储的模型,生成新词"""
    corpus = Corpus(train_dir)
    config = Config()
    config.vocab_size = len(corpus.dictionary)

    model = RNNModel(config)
    model_file = os.path.join(save_dir, model_name.format(epoch))
    assert os.path.exists(model_file), 'File %s does not exist.' % model_file
    model.load_state_dict(
        torch.load(model_file, map_location=lambda storage, loc: storage))

    word_list = generate(model, corpus.dictionary.idx2word, word_len=50)
    print(''.join(word_list))
Beispiel #5
0
def main():
    parse = argparse.ArgumentParser()

    parse.add_argument("--batch_size", default=16, type=int)
    parse.add_argument("--do_train",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_eval",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--learnning_rate", default=1e-4, type=float)
    parse.add_argument("--num_epoch", default=5, type=int)
    parse.add_argument("--max_vocab_size", default=50000, type=int)
    parse.add_argument("--embed_size", default=300, type=int)
    parse.add_argument("--warmup_steps",
                       default=0,
                       type=int,
                       help="Linear warmup over warmup_steps.")
    parse.add_argument("--hidden_size", default=1000, type=int)
    parse.add_argument("--num_layers", default=2, type=int)
    parse.add_argument("--GRAD_CLIP", default=1, type=float)
    args = parse.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device

    setseed()

    VOCAB_SIZE, train_iter, dev_iter, test_iter, weight_matrix = preprocess_data(
        args)

    model = RNNModel(weight_matrix, 'GRU', VOCAB_SIZE, args.embed_size,
                     args.hidden_size, args.num_layers)
    model.to(device)

    loss_fn = nn.CrossEntropyLoss()  # 交叉熵损失
    if args.do_train:
        train(args, model, train_iter, dev_iter, loss_fn, VOCAB_SIZE)

    if args.do_eval:
        model.load_state_dict(torch.load('lm-best-GRU.th'))
        model.to(device)

        test_loss = evaluate(args, model, test_iter, loss_fn, VOCAB_SIZE)
        LOG_FILE = "language_model_GRU.log"
        with open(LOG_FILE, 'a') as fout:
            fout.write("test perplexity: {} ".format(np.exp(test_loss)))
        print("perplexity: ", np.exp(test_loss))
Beispiel #6
0
def slave(comm):

    vae = VAE()
    vae.load_state_dict(
        torch.load(cfg.vae_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    model = RNNModel()
    model.load_state_dict(
        torch.load(cfg.rnn_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    controller = Controller()
    controller.load_state_dict(
        torch.load(cfg.ctrl_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    env = DoomTakeCover(False)

    rewards = []
    for epi in range(cfg.trials_per_pop * 4):
        obs = env.reset()
        model.reset()
        for step in range(cfg.max_steps):
            obs = torch.from_numpy(obs.transpose(
                2, 0, 1)).unsqueeze(0).float() / 255.0
            mu, logvar, _, z = vae(obs)

            inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
            y = controller(inp)
            y = y.item()
            action = encode_action(y)

            model.step(z.unsqueeze(0), action.unsqueeze(0))
            obs_next, reward, done, _ = env.step(action.item())
            obs = obs_next
            if done:
                break
        rewards.append(step)
        print('Workder {} got reward {} at epi {}'.format(
            comm.rank, step, epi))
    rewards = np.array(rewards)
    comm.send(rewards, dest=0, tag=1)
    print('Worker {} sent rewards to master'.format(comm.rank))
Beispiel #7
0
def load_model_corpora(checkpoint):
    """ Load the model the checkpoint pointed at by `checkpoint' is for and the
        corpora indicated in the arguments within the checkpoint.
    """
    try:
        checkpoint = load_checkpoint(checkpoint)
        args = checkpoint['args']
        params = checkpoint['params']
    except Exception as e:
        print('The following exception ocurred:')
        print(e)
        raise RuntimeError('The first object in checkpoint must be a '
                           'dictionary containing at least [args,params].')
    # Use the arguments to create a model that is the same as the one we have
    # the parameters for.
    if args.load:
        with open(args.load, 'rb') as f:
            stored_dict = pickle.load(f)
        corpora = Corpus(args.corpus,
                         load=True,
                         vocab=stored_dict['vocabulary'],
                         vectors=stored_dict['vectors'])
    else:
        # I never do load = False.
        corpora = None
    if not hasattr(args, 'old_model'):
        args.old_model = False
    if args.old_model:
        model = old_model('LSTM', len(corpora.vocab), args.encoder_size,
                          args.hidden_size, args.layers, args.dropout)
    else:
        encoder = Encoder(50, len(corpora.vocab), corpora.vectors)
        model = RNNModel(encoder.encoding_size,
                         args.hidden_size,
                         len(corpora.vocab),
                         args.layers,
                         encoder,
                         dropout=args.dropout)
    # load the parameters from checkpoint
    model.load_state_dict(params)
    return model, corpora
Beispiel #8
0
def model_fn(model_dir):
    """Load the PyTorch model from the `model_dir` directory."""
    print("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    print("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = RNNModel(model_info['vocab_size'], model_info['embedding_dim'],
                     model_info['hidden_dim'], model_info['n_layers'],
                     model_info['drop_rate'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(
            torch.load(f, map_location=lambda storage, loc: storage))

    # Load the saved word_dict.
    word_dict_path = os.path.join(model_dir, 'char_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.char2int = pickle.load(f)

    word_dict_path = os.path.join(model_dir, 'int_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.int2char = pickle.load(f)

    model.to(device).eval()

    print("Done loading model.")
    return model
def main():
    # Add ckp
    parser = argparse.ArgumentParser(
        description='PyTorch PennTreeBank RNN/LSTM Language Model')
    parser.add_argument(
        '--data',
        type=str,
        default='/input',  # /input
        help='location of the data corpus')
    parser.add_argument('--checkpoint',
                        type=str,
                        default='',
                        help='model checkpoint to use')
    parser.add_argument(
        '--model',
        type=str,
        default='LSTM',
        help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
    parser.add_argument('--emsize',
                        type=int,
                        default=200,
                        help='size of word embeddings')
    parser.add_argument('--nhid',
                        type=int,
                        default=200,
                        help='number of hidden units per layer')
    parser.add_argument('--nlayers',
                        type=int,
                        default=2,
                        help='number of layers')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='initial learning rate')
    parser.add_argument('--clip',
                        type=float,
                        default=0.25,
                        help='gradient clipping')
    parser.add_argument('--epochs',
                        type=int,
                        default=40,
                        help='upper epoch limit')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        metavar='N',
                        help='batch size')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout applied to layers (0 = no dropout)')
    parser.add_argument('--tied',
                        action='store_true',
                        help='tie the word embedding and softmax weights')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--cuda', action='store_true', help='use CUDA')
    parser.add_argument('--log-interval',
                        type=int,
                        default=200,
                        metavar='N',
                        help='report interval')
    parser.add_argument(
        '--save',
        type=str,
        default='/output/model.pt',  # /output
        help='path to save the final model')
    args = parser.parse_args()

    # Set the random seed manually for reproducibility.
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else:
            torch.cuda.manual_seed(args.seed)

    # Load checkpoint
    build_vocab = False
    if args.checkpoint != '' and os.path.exists(args.checkpoint):
        print(f'Loading field from {args.checkpoint}')
        save_dict = torch.load(args.checkpoint)
        field = save_dict['field']
        start_epoch = save_dict['start_epoch']
    else:
        save_dict = None
        field = Field(tokenize=split_tokenize, init_token='<init>')
        build_vocab = True
        start_epoch = 0

    ###############################################################################
    # Load data
    ###############################################################################

    train_data, val_data, test_data = TabularDataset.splits(
        path=args.data,
        train='train.txt',
        validation='valid.txt',
        test='test.txt',
        format='tsv',
        fields=[('text', field)])
    print(train_data, len(train_data), val_data, len(val_data), test_data,
          len(test_data))
    if build_vocab:
        field.eos_token = '<eos>'
        field.build_vocab(train_data, val_data, min_freq=1000)
        field.eos_token = None
    eos_id = field.vocab.stoi['<eos>']
    pad_id = field.vocab.stoi[field.pad_token]

    train_iter = BucketIterator(train_data,
                                args.batch_size,
                                train=True,
                                repeat=False,
                                device='cuda:0' if args.cuda else 'cpu:0')
    val_iter = Iterator(val_data,
                        args.batch_size,
                        repeat=False,
                        device='cuda:0' if args.cuda else 'cpu:0')
    test_iter = Iterator(test_data,
                         args.batch_size,
                         repeat=False,
                         device='cuda:0' if args.cuda else 'cpu:0')
    print(train_iter, len(train_iter), val_iter, len(val_iter), test_iter,
          len(test_iter))

    ###############################################################################
    # Build the model
    ###############################################################################

    ntokens = len(field.vocab)
    model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers,
                     args.dropout, args.tied)

    if save_dict is not None:
        model.load_state_dict(save_dict['model'])

    if args.cuda:
        model.cuda()
    else:
        model.cpu()
    print(model)

    if save_dict:
        opt = save_dict['optimizer']
    else:
        opt = torch.optim.Adam(model.parameters(), lr=args.lr)

    if args.checkpoint:
        torch.save(
            dict(field=field,
                 model=model.state_dict(),
                 optimizer=opt,
                 start_epoch=start_epoch), args.checkpoint)

    ###############################################################################
    # Training code
    ###############################################################################

    criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id)

    def make_target(text):
        batch_size = text.size()[1]
        eos_vector = torch.full((1, batch_size),
                                eos_id,
                                dtype=text.dtype,
                                device='cuda:0' if args.cuda else 'cpu:0')
        target = torch.cat((text[1:], eos_vector), dim=0)
        return target

    def compute_loss(output, text):
        output_flat = output.view(-1, ntokens)
        target = make_target(text)
        target_flat = target.view(-1)

        return criterion(output_flat, target_flat)

    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        with torch.no_grad():
            model.eval()
            total_loss = 0
            for batch in data_source:
                output, hidden = model(batch.text)
                loss = compute_loss(output, batch.text)

                total_loss += loss.item()
            return total_loss / len(data_source)

    def train():
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0
        start_time = time.time()
        for i, batch in enumerate(train_iter):
            model.zero_grad()

            output, hidden = model(batch.text)
            target = make_target(batch.text)

            loss = compute_loss(output, batch.text)
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            opt.step()

            total_loss += loss.item()

            if i % args.log_interval == 0 and i > 0:
                cur_loss = total_loss / args.log_interval
                elapsed = time.time() - start_time
                print(
                    '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                        epoch, i, len(train_iter),
                        elapsed * 1000 / args.log_interval, cur_loss,
                        math.exp(cur_loss)))
                total_loss = 0
                start_time = time.time()

    # Loop over epochs.
    best_val_loss = None

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        for epoch in range(start_epoch, args.epochs):
            epoch_start_time = time.time()
            train()
            val_loss = evaluate(val_iter)
            print('-' * 89)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch,
                                           (time.time() - epoch_start_time),
                                           val_loss, math.exp(val_loss)))
            print('-' * 89)
            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                if args.checkpoint:
                    torch.save(
                        dict(field=field,
                             model=model.state_dict(),
                             optimizer=opt,
                             start_epoch=epoch), args.checkpoint)
                best_val_loss = val_loss
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    torch.save(
        dict(vocab=field.vocab.itos,
             model=model.state_dict(),
             settings=dict(rnn_type=args.model,
                           emsize=args.emsize,
                           nhid=args.nhid,
                           nlayers=args.nlayers)), args.save)

    # Load the best saved model.
    #with open(args.save, 'rb') as f:
    #    save_dict = torch.load(f)
    #    field = save_dict['field']
    #    if save_dict is not None:
    #        model.load_state_dict(save_dict['model'])
    #
    #    if args.cuda:
    #        model.cuda()
    #    else:
    #        model.cpu()

    # Run on test data.
    test_loss = evaluate(test_iter)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Beispiel #10
0
from flask import Flask, request
app = Flask(__name__)
import torch

from model import RNNModel, embed_size, hidden_dims, generate_poetry

with open("word2ix.pkl", 'rb') as outfile:
    word2ix = pickle.load(outfile)
with open("ix2word.pkl", 'rb') as outfile:
    ix2word = pickle.load(outfile)

device = torch.device('cpu')
model = RNNModel(len(word2ix), embed_size, hidden_dims)
init_checkpoint = 'model.bin'
model.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))


@app.route('/')
def hello():
    return 'Hello World!'


@app.route('/peom')
def predict():
    begin_word = request.args.get('text', '')
    ret = generate_poetry(model, word2ix, ix2word, device, begin_word)

    return ret

    args = parser.parse_args()
    return args


cl_args = parse_args()
dataset = Corpus()
dataset.process_data()
sos = dataset.target_dict.word2idx['<sos>']
eos = dataset.target_dict.word2idx['<eos>']
args = np.load(os.path.join(cl_args.load_path, 'args.npy')).tolist()

model = RNNModel(args).cuda()
model.eval()
if cl_args.load_path:
    file = os.path.join(cl_args.load_path, 'model.pt')
    model.load_state_dict(torch.load(file))

itr = dataset.create_epoch_iterator('test', 1)
for i in xrange(50):
    source, target = itr.next()
    output = model.sample(source, sos, eos)

    print "Source: ", ''.join([
        dataset.source_dict.idx2word[x]
        for x in source.cpu().data.numpy()[:, 0]
    ])

    print "Original: ", ''.join([
        dataset.target_dict.idx2word[x]
        for x in target.cpu().data.numpy()[:, 0]
    ])
Beispiel #12
0
dataset.process_data()

cf.ntokens_source = len(dataset.source_dict)
cf.ntokens_target = len(dataset.target_dict)

if not os.path.exists(args.save_path):
    os.makedirs(args.save_path)

criterion = nn.CrossEntropyLoss(
    ignore_index=dataset.target_dict.word2idx['<pad>'])

model = RNNModel(cf).cuda()
optimizer = torch.optim.Adam(model.parameters(), weight_decay=1e-4)

if args.load_path:
    model.load_state_dict(torch.load(os.path.join(args.load_path, 'model.pt')))


def loop(which_set, lr=None):
    if which_set is 'train':
        model.train()
    else:
        model.eval()

    total_loss = 0
    total_acc = 0.
    total_length = 0

    start_time = time.time()
    itr = dataset.create_epoch_iterator(which_set, cf.batch_size)
    for i, (source, lengths, target) in enumerate(itr):
Beispiel #13
0
from model import RNNModel
import sys
import pickle

if __name__ == '__main__':
    norm = pickle.load(open('norm.pkl', 'rb'))
    x_test, test_id = load_test_data('mfcc', sys.argv[1])
    x_test = x_test / norm
    output = []

    test_loader = DataLoader(dataset=TensorDataset(torch.FloatTensor(x_test),
                                                   torch.FloatTensor(x_test)),
                             batch_size=64,
                             shuffle=False,
                             num_workers=4)

    model = RNNModel(41, 40).cuda()
    model.load_state_dict(torch.load('model/model_rnn.pt'))
    model.eval()

    for i, (x, _) in enumerate(test_loader):
        y = model(Variable(x).cuda())
        y = torch.max(y, -1)[1].data.cpu().numpy()
        y = encode(y)
        output += y

    output_file = open(sys.argv[2], 'w')
    print('id,phone_sequence', file=output_file)
    for i in range(len(output)):
        print('{},{}'.format(test_id[i], output[i]), file=output_file)
Beispiel #14
0
            print(
                'Epoch: {:3d} | {:5d}/{:5d} batches | lr {:.6f} | ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f}'
                .format(epoch, batch, batch_len, scheduler.lr,
                        elapsed * 1000 / interval, loss.item(),
                        math.exp(loss.item())))
            start_time = time.time()
            sys.stdout.flush()
        #if (batch+1)/10>=1:
        #    break


# Load the saved model.
if args.save and os.path.isfile(args.save):
    print("Loading Saved Model")
    with open(args.save, 'rb') as f:
        net.load_state_dict(torch.load(f))
        net.rnn.flatten_parameters()
else:
    print("Random Initialization - No Saved Model")

# At any point you can hit Ctrl + C to break out of training early.
try:
    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train()
        if args.save:
            with open(args.save, 'wb') as f:
                torch.save(net.state_dict(), f)

#        test_loader = test_corpus.batch_generator(seq_length=1, batch_size=1, shuffle=False)
#        val_loss = evaluate(test_corpus, test_loader)
Beispiel #15
0
def train():
    # 载入数据与配置模型
    logger.info("Loading data...")
    corpus = Corpus(train_dir)
    logger.info(corpus)

    config = Config()
    config.vocab_size = len(corpus.dictionary)
    train_data = batchify(corpus.train, config.batch_size)
    train_len = train_data.size(0)
    seq_len = config.seq_len

    logger.info("Configuring model...")
    model = RNNModel(config)
    if use_cuda:
        model.cuda()
    logger.info(model)
    if args.load != 0:
        model_file = os.path.join(save_dir, model_name.format(args.load))
        assert os.path.exists(
            model_file), 'File %s does not exist.' % model_file
        model.load_state_dict(
            torch.load(model_file, map_location=lambda storage, loc: storage))
        logger.info("Loaded model file %s" % model_file)
    else:
        logger.info("No loaded model file...")

    criterion = nn.CrossEntropyLoss()
    lr = config.learning_rate  # 初始学习率
    start_time = time.time()

    logger.info("Training and generating...")
    for epoch in range(1, config.num_epochs + 1):  # 多轮次训练
        total_loss = 0.0
        model.train()  # 在训练模式下dropout才可用。
        hidden = model.init_hidden(config.batch_size)  # 初始化隐藏层参数

        for ibatch, i in enumerate(range(0, train_len - 1, seq_len)):
            data, targets = get_batch(train_data, i, seq_len)  # 取一个批次的数据
            # 在每批开始之前,将隐藏的状态与之前产生的结果分离。
            # 如果不这样做,模型会尝试反向传播到数据集的起点。
            logger.info("batch %s starting..." % ibatch)
            hidden = repackage_hidden(hidden)
            model.zero_grad()

            output, hidden = model(data, hidden)
            loss = criterion(output.view(-1, config.vocab_size), targets)
            loss.backward()  # 反向传播

            # `clip_grad_norm` 有助于防止RNNs/LSTMs中的梯度爆炸问题。
            torch.nn.utils.clip_grad_norm(model.parameters(), config.clip)
            for p in model.parameters():  # 梯度更新
                p.data.add_(-lr, p.grad.data)

            total_loss += loss.data  # loss累计

            if ibatch % config.log_interval == 0 and ibatch > 0:  # 每隔多少个批次输出一次状态
                cur_loss = total_loss[0] / config.log_interval
                elapsed = get_time_dif(start_time)
                logger.info(
                    "Epoch {:3d}, {:5d}/{:5d} batches, lr {:2.3f}, loss {:5.2f}, ppl {:8.2f}, time {}"
                    .format(epoch, ibatch, train_len // seq_len, lr, cur_loss,
                            math.exp(cur_loss), elapsed))
                total_loss = 0.0
        lr /= 4.0  # 在一轮迭代完成后,尝试缩小学习率

        # 每隔多少轮次保存一次模型参数
        #if epoch % config.save_interval == 0:
        torch.save(model.state_dict(),
                   os.path.join(save_dir, model_name.format(epoch)))

        logger.info(''.join(
            generate(model, corpus.dictionary.VocabID_to_vocab,
                     len(corpus.dictionary))))
                       hidden_size=args.hiddensz,
                       num_layers=args.numlayers,
                       dropout=args.dropout,
                       padid=padid,
                       tieweights=args.tieweights)
else:
    LMModel_start = torch.load(args.start_model).cpu()
    # Note: watch out if the model class has different methods from the loaded one to start with !!!
    LMModel = RNNModel(vocab_size=vocab_size,
                       embed_size=args.embedsz,
                       hidden_size=args.hiddensz,
                       num_layers=args.numlayers,
                       dropout=args.dropout,
                       padid=padid,
                       tieweights=args.tieweights)
    LMModel.load_state_dict(LMModel_start.state_dict())

# LMModel = torch.load(args.save).cpu()

model_size = sum(p.nelement() for p in LMModel.parameters())
logging('-' * 30, f_log=f_log)
logging(f'Model tatal parameters: {model_size}', f_log=f_log)
logging('-' * 30, f_log=f_log)

# print('-' * 30)
# print(f'Model tatal parameters: {model_size}')
# print('-' * 30)

if torch.cuda.is_available() and cuda_device is not 'cpu':
    LMModel = LMModel.cuda(cuda_device)
Beispiel #17
0
def test_rnn(epi):
    mus, logvars = load_init_z()

    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    model.reset()
    z = sample_init_z(mus, logvars)
    frames = []

    for step in range(cfg.max_steps):
        z = torch.from_numpy(z).float().unsqueeze(0)
        curr_frame = vae.decode(z).detach().numpy()

        frames.append(curr_frame.transpose(0, 2, 3, 1)[0] * 255.0)
        # cv2.imshow('game', frames[-1])
        # k = cv2.waitKey(33)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        logmix, mu, logstd, done_p = model.step(z.unsqueeze(0),
                                                action.unsqueeze(0))

        # logmix = logmix - reduce_logsumexp(logmix)
        logmix_max = logmix.max(dim=1, keepdim=True)[0]
        logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum(
            dim=1, keepdim=True).log() + logmix_max
        logmix = logmix - logmix_reduce_logsumexp

        # Adjust temperature
        logmix = logmix / cfg.temperature
        logmix -= logmix.max(dim=1, keepdim=True)[0]
        logmix = F.softmax(logmix, dim=1)

        m = Categorical(logmix)
        idx = m.sample()

        new_mu = torch.FloatTensor([mu[i, j] for i, j in enumerate(idx)])
        new_logstd = torch.FloatTensor(
            [logstd[i, j] for i, j in enumerate(idx)])
        z_next = new_mu + new_logstd.exp() * torch.randn_like(
            new_mu) * np.sqrt(cfg.temperature)

        z = z_next.detach().numpy()
        if done_p.squeeze().item() > 0:
            break

    frames = [cv2.resize(frame, (256, 256)) for frame in frames]

    print('Episode {}: RNN Reward {}'.format(epi, step))
    write_video(frames, 'rnn_{}.avi'.format(epi), (256, 256))
    os.system('mv rnn_{}.avi /home/bzhou/Dropbox/share'.format(epi))
Beispiel #18
0
    def decompress(self, compressedfile):
        start = time.time()
        filename_split = compressedfile.split('_')
        checkpoint = torch.load(compressedfile, map_location=self.device)
        body = checkpoint['bytes']
        dictionary = Dictionary()
        dictionary.word2idx = checkpoint['word2idx']
        dictionary.idx2word = checkpoint['idx2word']
        context_map = Context(dictionary)
        ntokens = len(dictionary)
        model = RNNModel('LSTM',
                         ntokens,
                         200,
                         200,
                         2,
                         dropout=0.2,
                         tie_weights=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(self.device)
        model.eval()
        bit_string = ''
        join_body = list(body)
        for i in join_body:
            bit_string += "{0:08b}".format(i)
        encoded_text = self.remove_padding(bit_string)
        # decompress start here
        current_code = ''
        decoded_text = ''
        # we define an initial context
        # then we predict the initial huffman tree
        # read bits until we get to a leaf
        # convert the leaf to a char and add it to decompressed text
        # update the context and repeat the process
        context = ['<s>'] * 10

        def tree_from_context(context):
            huffman = HuffmanCoding()
            prob = huffman.make_context_frequency_dict(
                context,
                model,
                context_map,
                self.device,
                threshold=self.args.threshold)
            huffman.make_heap_node(prob)
            huffman.merge_nodes()
            huffman.encode()
            huffman.reverse_mapping = {v: k for k, v in huffman.codes.items()}
            return huffman

        huffman = tree_from_context(context)
        fixed_huffman = HuffmanCoding()
        counts = checkpoint['fixed_huffman_counts']
        fixed_huffman.make_heap_node(counts)
        fixed_huffman.merge_nodes()
        fixed_huffman.encode()
        fixed_huffman.reverse_mapping = {
            v: k
            for k, v in fixed_huffman.codes.items()
        }
        flag = None
        for bit in encoded_text:
            if flag == '0':
                current_code += bit
                if current_code in huffman.reverse_mapping:
                    next_char = huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            elif flag == '1':
                current_code += bit
                if current_code in fixed_huffman.reverse_mapping:
                    next_char = fixed_huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            else:
                flag = bit
        # write decompressed file
        with open(filename_split[0] + "_decompressed.txt", 'w') as f:
            f.writelines(decoded_text)
        print('Decompression Done!')
        end = time.time()
        print(round((end - start), 3), "s")
Beispiel #19
0
if args.stdout:
    logging.basicConfig(format='%(asctime)s: %(message)s',
                        datefmt='%H:%M:%S',
                        level=logging.INFO)
else:
    logging.basicConfig(format='%(asctime)s: %(message)s',
                        datefmt='%H:%M:%S',
                        filename=os.path.join(args.out, 'train.log'),
                        level=logging.INFO)
tb.configure(args.out)
random.seed(1024)
torch.manual_seed(1024)
torch.cuda.manual_seed_all(1024)

model = RNNModel(123, 62, 250, 3, args.dropout, bidirectional=args.bi)
if args.init: model.load_state_dict(torch.load(args.init))
else:
    for param in model.parameters():
        torch.nn.init.uniform(param, -0.1, 0.1)
if args.cuda: model.cuda()

optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=.9)
criterion = CTCLoss()

# data set
trainset = SequentialLoader('train', args.batch_size)
devset = SequentialLoader('dev', args.batch_size)

tri = cvi = 0

Beispiel #20
0
        end = end + l
        loss.append(torch.min(losses[start:end]))
        start = start + l
    return loss


with open('./tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
ntokens = tokenizer.vocab_size
model = RNNModel(ntokens,
                 100,
                 100,
                 dropout=0.0,
                 pad_token=tokenizer.term2id['PAD'])
model = model.cuda()
model.load_state_dict(torch.load('./model.pt'))
model = model.eval()
loss_fn = nn.CrossEntropyLoss(reduction='none')

original_termset2select_path = {}
used_cache = 0

result = {}
for count, (story_id, img_ids) in enumerate(story_id2image_ids.items()):
    if count % 100 == 1:
        with open('./vist_scored_terms_6_path.json', 'w') as f:
            print('saving until', count)
            json.dump(result, f, indent=4)
    temp_results = []
    original_termsets = [img2terms[img_id] for img_id in img_ids]
    ppls = []
Beispiel #21
0
                     None,
                     args.rnndrop,
                     args.dropout,
                     reset=args.reset,
                     classnorm=args.classnorm,
                     tied=args.tied)
# Initialise with trained parameters
if args.load_from != '':
    pretrained_dict = torch.load(args.load_from).state_dict()
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

if args.classnorm:
    criterion = nn.NLLLoss()
    interpCrit = nn.NLLLoss(reduction='none')
else:
    criterion = nn.CrossEntropyLoss()
    interpCrit = nn.CrossEntropyLoss(reduction='none')
att_criterion = nn.CrossEntropyLoss(reduction='none', ignore_index=0)

if args.cuda:
    model.cuda()
###############################################################################
# Training code
###############################################################################