コード例 #1
0
ファイル: main.py プロジェクト: dsi-idc/topic_models
        etm_model = etm_model.to(device)
        etm_model.eval()

        print('Visualizing model quality after training...')
        etm_model.eval()
        etm_model.print_words_per_topic(
            words_amount=config_dict['evaluation_params']['num_words'],
            vocab=vocab,
            lang='en')
        with torch.no_grad():
            # get document completion perplexities
            test_ppl = etm_model.evaluate(
                source='test',
                test_1_tokens=test_1_tokens,
                test_1_counts=test_1_counts,
                test_2_tokens=test_2_tokens,
                test_2_counts=test_2_counts,
                train_tokens=train_tokens,
                vocab=vocab,
                tc=config_dict['evaluation_params']['tc'],
                td=config_dict['evaluation_params']['td'])

            # get most used topics
            indices = torch.tensor(range(config_dict['num_docs_train']))
            indices = torch.split(indices, config_dict['batch_size'])
            thetaAvg = torch.zeros(
                1, config_dict['model_params']['num_topics']).to(device)
            thetaWeightedAvg = torch.zeros(
                1, config_dict['model_params']['num_topics']).to(device)
            cnt = 0
            for idx, ind in enumerate(indices):
                data_batch = data.get_batch(train_tokens, train_counts, ind,
コード例 #2
0
ファイル: main.py プロジェクト: adjidieng/ETM

tracemalloc.start()
if args.mode == 'train':
    ## train model on data 
    best_epoch = 0
    best_val_ppl = 1e9
    all_val_ppls = []
    print('\n')
    print('Visualizing model quality before training...', args.epochs)
    #model.visualize(args, vocabulary = vocab)
    print('\n')
    for epoch in range(0, args.epochs):
        print("I am training for epoch", epoch)
        model.train_for_epoch(epoch, args, training_set)
        val_ppl = model.evaluate(args, 'val', training_set, vocab,  test_1, test_2)
        print("The validation scores", val_ppl)
        if val_ppl < best_val_ppl:
            with open(ckpt, 'wb') as f:
                torch.save(model, f)
            best_epoch = epoch
            best_val_ppl = val_ppl
        else:
            ## check whether to anneal lr
            lr = optimizer.param_groups[0]['lr']
            if args.anneal_lr and (len(all_val_ppls) > args.nonmono and val_ppl > min(all_val_ppls[:-args.nonmono]) and lr > 1e-5):
                optimizer.param_groups[0]['lr'] /= args.lr_factor
        if epoch % args.visualize_every == 0:
            model.visualize(args, vocabulary = vocab)
        all_val_ppls.append(val_ppl)
    with open(ckpt, 'rb') as f: