Exemple #1
0
def main(argv):
    logging.getLogger("transformers.tokenization_utils").setLevel(
        logging.ERROR)
    FLAGS = flags.FLAGS
    utils.print_flags(FLAGS)

    # Random seed initialization.
    random.seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    torch.manual_seed(FLAGS.random_seed)
    # Configuration and paths.
    cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader)
    PATH_DATA = cfg['path_data']
    PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus'])
    PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix'])

    # Set up the experimental environment.
    exp = experiment.Experiment(FLAGS, cfg, dumpflag=False)

    for i, layer in enumerate(exp.model.base.encoder.layer):
        layer.attention.self.attention_window = FLAGS.window_size

    # Load the corpus.
    corpus = utils.Corpus(PATH_CORPUS, FLAGS)
    # Load train/dev data.
    test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS)

    # Evaluate dev data.
    test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval,
                              'Evaluating test queries')
    print('Test Evaluation', test_eval, file=sys.stderr)
def main(argv):    
    logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR)
    FLAGS = flags.FLAGS
    utils.print_flags(FLAGS)

    # Random seed initialization.
    random.seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    torch.manual_seed(FLAGS.random_seed)
    # Configuration and paths.
    cfg = yaml.load(open(FLAGS.config, 'r'), Loader=yaml.BaseLoader)
    PATH_DATA = cfg['path_data'] 
    PATH_CORPUS = '{}/{}'.format(PATH_DATA, cfg['corpus'])
    PATH_DATA_PREFIX = '{}/{}'.format(PATH_DATA, cfg['data_prefix'])
    PATH_MODEL_PREFIX = '{}/{}'.format(cfg['path_model'], FLAGS.model_prefix)
    os.makedirs(PATH_MODEL_PREFIX, exist_ok=True)

    # Set up the experimental environment.
    exp = experiment.Experiment(FLAGS, cfg)
    
    # Change attention window size.
    for i, layer in enumerate(exp.model.base.encoder.layer):
        layer.attention.self.attention_window = FLAGS.window_size

    # Load the corpus.
    corpus = utils.Corpus(PATH_CORPUS, FLAGS)
    # Load train/dev data.
    train_data = utils.Data(PATH_DATA_PREFIX + 'train', corpus, FLAGS)
    dev_data = utils.Data(PATH_DATA_PREFIX + 'dev', corpus, FLAGS)
    test_data = utils.Data(PATH_DATA_PREFIX + 'test', corpus, FLAGS)
    
    for epoch in range(FLAGS.last_epoch, FLAGS.num_epochs):
        print('Epoch {}'.format(epoch + 1), file=sys.stderr)
        # Train the model.
        train_loss = exp.train(train_data, 
                eval_data=dev_data,
                test_data=test_data,
                num_sample_eval=FLAGS.num_sample_eval)
        print('Epoch {}, train_loss = {}'.format(
            epoch + 1, train_loss), file=sys.stderr)

        # Dump the model.
        print('Dump model for epoch {}.'.format(epoch + 1))
        exp.dump_model(PATH_MODEL_PREFIX, str(epoch + 1))
 
        # Evaluate dev data.
        test_eval = exp.eval_dump(test_data, FLAGS.num_sample_eval,
                'Evaluating test queries')
        print('Test Evaluation', test_eval, file=sys.stderr)

        # Dump tensorboard results.
        if exp.tb:
            exp.tb_writer.add_scalar('Epoch_Eval_cut10/NDCG', test_eval['ndcg10'], epoch + 1)
            exp.tb_writer.add_scalar('Epoch_Eval_cut10/MRR', test_eval['mrr10'],  epoch + 1)
            exp.tb_writer.add_scalar('Epoch_Eval_cut10/MAP', test_eval['map10'], epoch + 1)
            exp.tb_writer.add_scalar('Epoch_Eval_overall/NDCG', test_eval['ndcg'], epoch + 1)
            exp.tb_writer.add_scalar('Epoch_Eval_overall/MRR', test_eval['mrr'], epoch + 1)
            exp.tb_writer.add_scalar('Epoch_Eval_overall/MAP', test_eval['map'], epoch + 1)
Exemple #3
0
def gen(args, model=None, max_len=15, top_p=True):
    device = init_device()
    corpus = utils.Corpus(args.data, args.persona_data)
    if model is None:
        vocab = len(corpus.dictionary)
        model = models.CVAE(vocab, args.embedding, args.hidden, args.latent)
        model.load_model()
        model = model.to(device)
    model.eval()

    generated_verses = {}
    for persona in corpus.personas:
        print("Artist {}".format(persona))
        persona_tokens = corpus.personas[persona]
        p_len = torch.tensor([len(persona_tokens)]).long().to(device)
        p = torch.tensor([persona_tokens]).long().to(device)
        # 50 verses per artist, arbitrary
        artist_verses = []
        for _ in range(50):
            generated_verse = []
            ctxt = [1]
            # 16 bars per verse
            for _ in range(16):
                print(ctxt)
                out_sequence = ["S"]
                out_tokens = []
                x_len = torch.tensor([len(ctxt)]).long().to(device)
                x = torch.tensor([ctxt]).long().to(device)
                hidden = model.infer_hidden(x, x_len, p, p_len)
                word = torch.ones([1, 1],
                                  dtype=torch.long,
                                  device=model.device())
                while out_sequence[-1] != "L" and len(out_sequence) < max_len:
                    word = model.embedding(word)
                    outputs, hidden = model.decoder(word, hidden)
                    outputs = F.log_softmax(model.out(outputs),
                                            dim=-1).squeeze()
                    if top_p:
                        outputs = top_p_filtering(outputs).unsqueeze(0)
                    else:
                        outputs = outputs.unsqueeze(0)
                    # Get a random sample from output
                    word = torch.multinomial(F.softmax(outputs, dim=-1), 1)
                    out_tokens.append(word.item())
                    out_sequence.append(
                        corpus.dictionary.idx2word[word.item()])
                ctxt.extend(out_tokens)
                generated_verse.extend(out_sequence)
            artist_verses.append(generated_verse)
        generated_verses[persona] = artist_verses
    with open("verses.json", 'w') as verses_file:
        json.dump(generated_verses, verses_file)
Exemple #4
0
def perplexity(args, model=None):
    device = init_device()
    corpus = utils.Corpus(args.data, args.persona_data)
    if model is None:
        vocab = len(corpus.dictionary)
        model = models.VAE(vocab, args.embedding, args.hidden, args.latent)
        model.load_model()
        model = model.to(device)
    model.eval()
    valid = utils.load_data(corpus.valid,
                            batch_size=args.batch_size,
                            num_workers=4)
    print("Beginning eval")
    norm_losses = []
    norm_lens = []
    avg_loss = 0
    num_examples = 0
    for x, x_len, p, p_len, y, y_len in valid:
        x, x_len = x.to(device), x_len.to(device)
        y, y_len = y.to(device), y_len.to(device)
        if model.name is "cvae":
            p, p_len = p.to(device), p_len.to(device)
            res = model(x, x_len, p, p_len, y, y_len)
        else:
            res = model(x, x_len, y, y_len)
        pred = res[0]

        eos_tensor = torch.empty(x.shape[0], 1).to(device)
        eos_tensor.fill_(corpus.dictionary.word2idx["L"])
        gold = torch.cat([y, eos_tensor], dim=1).long()
        pred = pred.permute(0, 2, 1)
        BCE = F.nll_loss(pred, gold, reduction="none", ignore_index=0)
        avg_loss += torch.sum(BCE).item()
        # Norm by length
        norm_losses.append(torch.sum(BCE, dim=-1))
        norm_lens.append(y_len)
        num_examples += y.shape[0]
    exp = torch.cat(norm_losses) / torch.cat(norm_lens)
    ppl = torch.mean(torch.exp(exp)).item()
    avg_loss = avg_loss / num_examples

    print(
        "Validation set had a perplexity of {} and an average NLL of {} per example"
        .format(ppl, avg_loss))
batch_size = [20]
seq_lengths = [30]
learning_rate = [0.001]

#TODO: CHANGE THE MODEL NUMBER TO THE ONE YOU NEED
model_num = 4

# Load Penn Treebank Dataset
train_path = './data/train.txt'
valid_path = './data/valid.txt'
sample_path = './sample.txt'
save_dir = './saved models/'

for bs in batch_size:
    # Create corpuses
    train_corpus = utils.Corpus()
    ids_train = train_corpus.get_data(train_path, bs)
    ids_valid = train_corpus.get_data(valid_path, bs)
    train_vocab_size = len(train_corpus.dictionary)

    for seq_len in seq_lengths:
        num_train_batches = ids_train.size(1) // seq_len
        num_valid_batches = ids_valid.size(1) // seq_len

        for lr in learning_rate:
            model = utils.initialize_model(model_num, train_vocab_size,
                                           embed_size)
            model = utils.use_cuda(model)
            print('Training vocabulary size: {}'.format(train_vocab_size))
            print('Model: {}'.format(model.name))
            print('Number of parameters = {}'.format(
import utils

# Hyper Parameters
embed_size = 249
num_samples = 30  # number of words to be sampled
temperature = [0.5, 1.0, 2]

# Load Penn Treebank Dataset
train_path = './data/train.txt'
valid_path = './data/valid.txt'
sample_path = './sample.txt'
save_dir = './saved models/'
model_name = 'LSTM - 249 hidden cells, 1 layers'

# Create the corpus
corpus = utils.Corpus()
ids_train = corpus.get_data(train_path)
ids_valid = corpus.get_data(valid_path)
vocab_size = len(corpus.dictionary)

# Load best model
model_num = 4
model = utils.initialize_model(model_num, vocab_size, embed_size)
model = utils.use_cuda(model)
file_path = os.path.join(save_dir, model_name + '.pkl')
load_state = torch.load(file_path, lambda storage, loc: storage)
model.load_state_dict(load_state['state_dict'])
model.eval()  # Turn to eval mode - so there won't be any dropouts!

# Sampling
with open(sample_path, 'w') as f:
if __name__ == '__main__':
    for DATASET in ['movie', 'news', 'tweet']:
        WORD_DROP = 10
        MIN_LEN = 5
        MAX_LEN = 200

        data_path = '../../data/' + DATASET + '2020.txt'
        train_path = '../../data/train_' + DATASET
        test_path = '../../data/test_' + DATASET
        vocabulary = utils.Vocabulary(data_path,
                                      max_len=MAX_LEN,
                                      min_len=MIN_LEN,
                                      word_drop=WORD_DROP)

        test = utils.Corpus(test_path,
                            vocabulary,
                            max_len=MAX_LEN,
                            min_len=MIN_LEN)
        with open(test_path + '_nounk', 'w', encoding='utf8') as f:
            for sentence in test.corpus:
                if 1 not in sentence:
                    f.write(' '.join([
                        vocabulary.i2w[_]
                        for _ in sentence if _ not in [0, 2, 3]
                    ]) + '\n')

        train = utils.Corpus(train_path,
                             vocabulary,
                             max_len=MAX_LEN,
                             min_len=MIN_LEN)
        with open(train_path + '_nounk', 'w', encoding='utf8') as f:
            for sentence in train.corpus:
Exemple #8
0
def main():
    # ======================
    # 超参数
    # ======================
    CELL = "lstm"  # rnn, gru, lstm
    DATASET = 'movie'
    RATIO = 0.9
    WORD_DROP = 10
    MIN_LEN = 5
    MAX_LEN = 200
    BATCH_SIZE = 32
    SEQUENCE_LEN = 50
    EMBED_SIZE = 128
    HIDDEN_DIM = 256
    NUM_LAYERS = 2
    DROPOUT_RATE = 0.0
    EPOCH = 300
    LEARNING_RATE = 0.01
    MAX_GENERATE_LENGTH = 20
    GENERATE_EVERY = 5
    SEED = 100

    all_var = locals()
    print()
    for var in all_var:
        if var != "var_name":
            print("{0:15}   ".format(var), all_var[var])
    print()

    # ======================
    # 数据
    # ======================
    data_path = '../../__data/ROCStories.txt'
    train_path = 'train_roc'
    test_path = 'test_roc'
    vocabulary = utils.Vocabulary(data_path,
                                  max_len=MAX_LEN,
                                  min_len=MIN_LEN,
                                  word_drop=WORD_DROP)
    utils.split_corpus(data_path,
                       train_path,
                       test_path,
                       max_len=MAX_LEN,
                       min_len=MIN_LEN,
                       ratio=RATIO,
                       seed=SEED)
    train = utils.Corpus(train_path,
                         vocabulary,
                         max_len=MAX_LEN,
                         min_len=MIN_LEN)
    test = utils.Corpus(test_path,
                        vocabulary,
                        max_len=MAX_LEN,
                        min_len=MIN_LEN)
    train_generator = utils.Generator(train.corpus)
    test_generator = utils.Generator(test.corpus)

    # ======================
    # 构建模型
    # ======================
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = lm.LM(cell=CELL,
                  vocab_size=vocabulary.vocab_size,
                  embed_size=EMBED_SIZE,
                  hidden_dim=HIDDEN_DIM,
                  num_layers=NUM_LAYERS,
                  dropout_rate=DROPOUT_RATE)
    model.to(device)
    summary(model, (20, ))
    criteration = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
    # optimizer = torch.optim.Adam(textRNN.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    print()

    # ======================
    # 训练与测试
    # ======================
    best_loss = 1000000
    for epoch in range(EPOCH):
        train_g = train_generator.build_generator(BATCH_SIZE, SEQUENCE_LEN)
        test_g = test_generator.build_generator(BATCH_SIZE, SEQUENCE_LEN)
        train_loss = []
        while True:
            try:
                text = train_g.__next__()
            except:
                break
            optimizer.zero_grad()
            y = model(torch.from_numpy(text[:, :-1]).long().to(device))
            loss = criteration(
                y.reshape(-1, vocabulary.vocab_size),
                torch.from_numpy(text[:, 1:]).reshape(-1).long().to(device))
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())

        test_loss = []
        while True:
            with torch.no_grad():
                try:
                    text = test_g.__next__()
                except:
                    break
                y = model(torch.from_numpy(text[:, :-1]).long().to(device))
                loss = criteration(
                    y.reshape(-1, vocabulary.vocab_size),
                    torch.from_numpy(text[:,
                                          1:]).reshape(-1).long().to(device))
                test_loss.append(loss.item())

        print('epoch {:d}   training loss {:.4f}    test loss {:.4f}'.format(
            epoch + 1, np.mean(train_loss), np.mean(test_loss)))

        if np.mean(test_loss) < best_loss:
            best_loss = np.mean(test_loss)
            print('-----------------------------------------------------')
            print('saving parameters')
            os.makedirs('models', exist_ok=True)
            torch.save(model.state_dict(),
                       'models/' + DATASET + '-' + str(epoch) + '.pkl')
            print('-----------------------------------------------------')

        if (epoch + 1) % GENERATE_EVERY == 0:
            with torch.no_grad():
                # 生成文本
                x = torch.LongTensor([[vocabulary.w2i['_BOS']]] * 3).to(device)
                for i in range(MAX_GENERATE_LENGTH):
                    samp = model.sample(x)
                    x = torch.cat([x, samp], dim=1)
                x = x.cpu().numpy()
            print('-----------------------------------------------------')
            for i in range(x.shape[0]):
                print(' '.join([
                    vocabulary.i2w[_] for _ in list(x[i, :]) if _ not in [
                        vocabulary.w2i['_BOS'], vocabulary.w2i['_EOS'],
                        vocabulary.w2i['_PAD']
                    ]
                ]))
            print('-----------------------------------------------------')
Exemple #9
0
def main():
    # ======================
    # hyper-parameters
    # ======================
    CELL = "lstm"  # rnn, gru, lstm
    DATASET = 'tweet'  # movie, news, tweet
    RATIO = 0.9
    WORD_DROP = 10
    MIN_LEN = 5
    MAX_LEN = 200
    BATCH_SIZE = 32
    EMBED_SIZE = 350
    HIDDEN_DIM = 512
    NUM_LAYERS = 2
    DROPOUT_RATE = 0.0
    START_EPOCH = 0
    EPOCH = 30
    LEARNING_RATE = 0.001
    MAX_GENERATE_LENGTH = 20
    GENERATE_EVERY = 5
    PRINT_EVERY = 1
    SEED = 100

    all_var = locals()
    print()
    for var in all_var:
        if var != "var_name":
            print("{0:15}   ".format(var), all_var[var])
    print()

    # ======================
    # data
    # ======================
    data_path = 'data/' + DATASET + '2020.txt'
    train_path = 'data/train_' + DATASET
    test_path = 'data/test_' + DATASET
    vocabulary = utils.Vocabulary(data_path,
                                  max_len=MAX_LEN,
                                  min_len=MIN_LEN,
                                  word_drop=WORD_DROP)
    utils.split_corpus(data_path,
                       train_path,
                       test_path,
                       max_len=MAX_LEN,
                       min_len=MIN_LEN,
                       ratio=RATIO,
                       seed=SEED)
    train = utils.Corpus(train_path,
                         vocabulary,
                         max_len=MAX_LEN,
                         min_len=MIN_LEN)
    test = utils.Corpus(test_path,
                        vocabulary,
                        max_len=MAX_LEN,
                        min_len=MIN_LEN)
    train_generator = utils.Generator(train.corpus, vocabulary=vocabulary)
    test_generator = utils.Generator(test.corpus, vocabulary=vocabulary)

    # ======================
    # building model
    # ======================
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    model = lm.LM(cell=CELL,
                  vocab_size=vocabulary.vocab_size,
                  embed_size=EMBED_SIZE,
                  hidden_dim=HIDDEN_DIM,
                  num_layers=NUM_LAYERS,
                  dropout_rate=DROPOUT_RATE)
    model.to(device)
    total_params = sum(p.numel() for p in model.parameters())
    print("Total params: {:d}".format(total_params))
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)
    print("Trainable params: {:d}".format(total_trainable_params))
    criterion = nn.NLLLoss(ignore_index=vocabulary.w2i["_PAD"])
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0,
                           amsgrad=False)
    print()

    # ======================
    # training and testing
    # ======================
    best_loss = 1000000
    step = 0
    if START_EPOCH > 0:
        model.load_state_dict(
            torch.load('models/' + DATASET + '-' + str(START_EPOCH) + '.pkl',
                       map_location=device))
    for epoch in range(START_EPOCH + 1, EPOCH + 1):
        train_g = train_generator.build_generator(BATCH_SIZE)
        test_g = test_generator.build_generator(BATCH_SIZE)
        train_loss = []
        model.train()
        while True:
            try:
                text = train_g.__next__()
            except:
                break
            optimizer.zero_grad()
            text_in = text[:, :-1]
            text_target = text[:, 1:]
            y = model(torch.from_numpy(text_in).long().to(device))
            loss = criterion(
                y.reshape(-1, vocabulary.vocab_size),
                torch.from_numpy(text_target).reshape(-1).long().to(device))
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            step += 1
            torch.cuda.empty_cache()

            if step % PRINT_EVERY == 0:
                print('step {:d} training loss {:.4f}'.format(
                    step, loss.item()))

        test_loss = []
        model.eval()
        with torch.no_grad():
            while True:
                try:
                    text = test_g.__next__()
                except:
                    break
                text_in = text[:, :-1]
                text_target = text[:, 1:]
                y = model(torch.from_numpy(text_in).long().to(device))
                loss = criterion(
                    y.reshape(-1, vocabulary.vocab_size),
                    torch.from_numpy(text_target).reshape(-1).long().to(
                        device))
                test_loss.append(loss.item())
                torch.cuda.empty_cache()

        print('epoch {:d}   training loss {:.4f}    test loss {:.4f}'.format(
            epoch, np.mean(train_loss), np.mean(test_loss)))

        if np.mean(test_loss) < best_loss:
            best_loss = np.mean(test_loss)
            print('-----------------------------------------------------')
            print('saving parameters')
            os.makedirs('models', exist_ok=True)
            torch.save(model.state_dict(),
                       'models/' + DATASET + '-' + str(epoch) + '.pkl')
            print('-----------------------------------------------------')

        if (epoch + 1) % GENERATE_EVERY == 0:
            model.eval()
            with torch.no_grad():
                # generating text
                x = torch.LongTensor([[vocabulary.w2i['_BOS']]] * 3).to(device)
                for i in range(MAX_GENERATE_LENGTH):
                    samp = model.sample(x)
                    x = torch.cat([x, samp], dim=1)
                x = x.cpu().numpy()
            print('-----------------------------------------------------')
            for i in range(x.shape[0]):
                print(' '.join([
                    vocabulary.i2w[_] for _ in list(x[i, :]) if _ not in [
                        vocabulary.w2i['_BOS'], vocabulary.w2i['_EOS'],
                        vocabulary.w2i['_PAD']
                    ]
                ]))
            print('-----------------------------------------------------')
Exemple #10
0
def vae_train(args):
    """
    trains a model as specified by args
    """
    seed_random(args.rand_seed)
    device = init_device()
    train_log, valid_log = init_logger(log_dir=args.log_dir)

    corpus = utils.Corpus(args.data, args.persona_data)
    train_data = utils.load_data(corpus.train,
                                 batch_size=args.batch_size,
                                 num_workers=4)
    test_data = utils.load_data(corpus.test,
                                batch_size=args.batch_size,
                                num_workers=4)

    vocab = len(corpus.dictionary)
    model = models.VAE(vocab, args.embedding, args.hidden, args.latent)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[100, 150],
                                                     gamma=0.1)

    if args.continue_training:
        model.load_model()
    model = model.to(device)
    print("Training", model.name, "with #params:", model.num_params())
    loss = cvae_loss_function
    best = float("inf")
    global_step = 0
    for epoch in range(args.num_epoch):
        losses = []
        for x, x_len, p, p_len, y, y_len in train_data:
            # Now we need to make sure everything in the batch has same size
            x, x_len = x.to(device), x_len.to(device)
            y, y_len = y.to(device), y_len.to(device)
            # Should go from 1 to 0 of ~100k steps (after learned good LM)
            teach = 1 if global_step < 200_000 else .9995
            res = model(x, x_len, y, y_len, teach)
            pred, bow_log, r_mu, r_log_var, p_mu, p_log_var = res

            eos_tensor = torch.empty(x.shape[0], 1).to(device)
            eos_tensor.fill_(corpus.dictionary.word2idx["L"])
            gold = torch.cat([y, eos_tensor], dim=1).long()
            alph = min(max(0, (global_step - 10_000) / 60_000), 1)
            pred = pred.permute(0, 2, 1)
            # Get loss, normalized by batch size
            loss_val = loss(pred,
                            gold,
                            bow_log,
                            r_mu,
                            r_log_var,
                            p_mu,
                            p_log_var,
                            alpha=alph)

            optimizer.zero_grad()
            loss_val.backward()
            if args.grad_clip > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
            optimizer.step()
            scheduler.step()
            global_step += 1

            losses.append(loss_val.detach().cpu().numpy())
            if train_log is not None:
                train_log.add_scalar("loss", losses[-1], global_step)

        with torch.no_grad():
            validation = eval_inference(model, corpus, test_data, valid_log,
                                        global_step)
        avg_l = np.mean(losses)
        print("epoch %-3d \t loss = %0.3f \t" % (epoch, avg_l))
        if validation < best:
            print("Saving model!")
            best = validation
            model.save_model()

    print("Finished training, best model got: {} NLL".format(best))
Exemple #11
0
def twod_viz(args, model=None):
    device = init_device()
    corpus = utils.Corpus(args.data, args.persona_data)
    if model is None:
        vocab = len(corpus.dictionary)
        model = models.CVAE(vocab,
                            args.embedding,
                            args.hidden,
                            args.latent,
                            rnn=args.rnn)
        model.load_model()
        model = model.to(device)
    model.eval()
    artist_names = [
        "21 savage",
        '6ix9ine',
        'dr dre',
        'earl sweatshirt',
        'ice cube',
        'kanye west',
        'kendrick lamar',
        'kid cudi',
        'pusha t',
        'tyler the creator',
    ]
    artist_list = [2, 5, 23, 26, 36, 44, 46, 47, 67, 86]
    names = {}
    for name, id_ in zip(artist_names, artist_list):
        names[id_] = name
    latents = []
    labels = []
    for artist in artist_list:
        curr = []
        persona = corpus.personas[artist]
        print("Artist {}".format(artist))
        ctxt = ['S']
        ctxt = [corpus.dictionary.word2idx[word] for word in ctxt]
        p_len = torch.tensor([len(persona)]).long().to(device)
        p = torch.tensor([persona]).long().to(device)
        x_len = torch.tensor([len(ctxt)]).long().to(device)
        x = torch.tensor([ctxt]).long().to(device)
        x_emb = model.embedding(x)
        p_emb = model.embedding(p)

        c_enc = model.contextualize(x_emb, x_len, p_emb, p_len)
        out_prior = model.priorlnorm(model.tanh(model.prior(c_enc)))
        p = model.p_mu_log_var(out_prior)
        p_mu, p_log_var = torch.split(p, model.latent_dim, dim=-1)
        latents.append(p_mu.cpu().numpy().squeeze())
        labels.append(artist)
    latents = np.stack(latents)
    means = np.mean(latents, axis=0)
    print(means)
    latents = latents - means
    print(latents)
    print(latents.shape)
    labels = np.array(labels)
    print(labels.shape)
    # cm = plt.get_cmap('gist_rainbow')
    fig = plt.figure()
    # jet = cm = plt.get_cmap('gist_rainbow')
    # cNorm  = colors.Normalize(vmin=0, vmax=10)
    # scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
    for idx, cl in enumerate(np.unique(labels)):
        plt.scatter(x=latents[idx, 0] * 1000,
                    y=latents[idx, 1] * 1000,
                    label=artist_names[idx])
        # plt.text(x=latents[idx, 0]*1000,
        #             y=latents[idx, 1]*1000,
        #             s=artist_names[idx],
        #             alpha=0.9,
        #             )
    plt.legend(loc='upper left')
    plt.xlim(-2.75, 2.75)
    plt.ylim(-2.75, 2.75)
    plt.xlabel("Dim 1")
    plt.ylabel("Dim 2")
    plt.title("Artist Embeddings with IDs")
    plt.show()
    fig.savefig('my_figure.png')
Exemple #12
0
# config logging
logging.basicConfig(filename='{}logging{}.log'.format(args.data, args.expt_id), level=logging.INFO)

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)   #为CPU设置种子用于生成随机数,以使得结果是确定的
if args.gpu_device > 0:
    device = torch.device("cuda")
    torch.cuda.set_device(args.gpu_device) #id,默认从0开始使用GPU,这里是指定使用的GPU编号
    torch.cuda.manual_seed(args.seed)
    random.seed(args.seed)
else:
    device = torch.device("cpu")


# load data
corpus = utils.Corpus(args.data)
eval_batch_size = 10

train_data = utils.create_batches(corpus.train, args.batch_size, order='random', device=device) #为什么数据batch_size不同
val_data = utils.create_batches(corpus.valid, eval_batch_size, order='random', device=device)
test_data = utils.create_batches(corpus.test, eval_batch_size, order='random', device=device)


# build model
vocab_size = corpus.vocab_size  #idx2word的长度
model = model.RNNModel(args.model, vocab_size, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)
criterion = nn.CrossEntropyLoss()


# training code