Ejemplo n.º 1
0
            if args.anneal_lr and (len(all_val_ppls) > args.nonmono and
                                   val_ppl > min(all_val_ppls[:-args.nonmono])
                                   and lr > 1e-5):
                optimizer.param_groups[0]['lr'] /= args.lr_factor
        if epoch % args.visualize_every == 0:
            visualize(model)
        all_val_ppls.append(val_ppl)
    with open(ckpt, 'rb') as f:
        model = torch.load(f)
    model = model.to(device)
    val_ppl = evaluate(model, 'val')
else:
    with open(ckpt, 'rb') as f:
        model = torch.load(f)
    model = model.to(device)
    model.eval()

    with torch.no_grad():
        ## get document completion perplexities
        test_ppl = evaluate(model, 'test', tc=args.tc, td=args.td)

        ## get most used topics
        indices = torch.tensor(range(args.num_docs_train))
        indices = torch.split(indices, args.batch_size)
        thetaAvg = torch.zeros(1, args.num_topics).to(device)
        thetaWeightedAvg = torch.zeros(1, args.num_topics).to(device)
        cnt = 0
        for idx, ind in enumerate(indices):
            data_batch = data.get_batch(train_tokens, train_counts, ind,
                                        args.vocab_size, device)
            sums = data_batch.sum(1).unsqueeze(1)
Ejemplo n.º 2
0
        visualize(etm_model)

    # save perplexities
    all_val_ppls.append(val_ppl)

    print("Training finished.")

print("## -------------------------------------")
print("##\t TESTING THE MODEL ")
print("## -------------------------------------")

# load trained model and evaluate it
with open(ckpt, 'rb') as f:
    etm_model = torch.load(f)
etm_model = etm_model.to(device)
etm_model.eval()

with torch.no_grad():
    ## ---------------
    ## Idea : get document completion perplexities
    test_ppl = evaluate(etm_model, test_corpus, num_docs_test)

    ## ----------------
    ## Idea : get most used topics
    indices = torch.tensor(range(num_docs_test))  # training documents indices
    indices = torch.split(indices, _training_batch_size)
    # just initialising data structures
    thetaAvg = torch.zeros(1, num_topics).to(device)
    thetaWeightedAvg = torch.zeros(1, num_topics).to(device)
    cnt = 0
    for idx, ind in enumerate(indices):