Exemple #1
0
def calculate_probs_for_set(training_set):
    training_label, training_epoch, _, ws_label, bidirectional = training_set

    vocab_filename = lp.gen_params_all[ws_label]['vocab_filename']
    lean_vocab = lu.load_vocab(vocab_filename)

    test_filename = lp.gen_params_all[ws_label]['tensors_filename']
    probs_filename = f'./cache/probs_{ws_label}_{training_label}_{training_epoch}.pt'

    net_params = lp.net_params
    net_params['bidirectional'] = bidirectional

    network = LeanModel(lean_vocab, net_params)
    lu.load_network(network, training_label, training_epoch)
    network = network.to(device)

    test_data = lu.load_and_prepare_data(test_filename,
                                         lean_vocab.stoi['<eos>'])

    probs = calculate_probs(network, test_data, training_label)
    print(f'Saving {probs_filename}')
    torch.save(probs, probs_filename)
Exemple #2
0
import lean_utils as lu
import lean_params as lp

label = lu.get_cli_args().label

gen_params = lp.gen_params_all[label]

input_filename = gen_params['csv_filename']
vocab_filename = gen_params['vocab_filename']
tensors_filename = gen_params['tensors_filename']

line_size = 10

# import vocabulary
v = lu.load_vocab( vocab_filename )

def process_line( line ):
    ret = torch.zeros( line_size )
    c = 0
    for idx, col in enumerate(line):
        if idx == 0:
            continue
        if idx == 1 or idx == 2:
            labels = col.split( '@' )
            ret[c] = v.stoi[labels[0]]
            c+=1
            ret[c] = v.stoi[labels[1]]
            c+=1
        else:
            ret[c] = v.stoi[col]
Exemple #3
0
from termcolor import colored as clr

import lean_utils as lu
import lean_params as lp

label = lu.get_cli_args()

gen_params_test = lp.gen_params_all[label]

training_label = 'baseline-Jul27_16-18-30'
epoch = 4

lean_vocab = lu.load_vocab(gen_params_test['vocab_filename'])

for epoch_no in range(trainer_params['epochs']):
    epoch_samples = lu.load_epoch_samples(epoch_no, training_label)

    for s_input, s_output, s_probs in epoch_samples:
        for token in s_input:
            print(lean_vocab.itos[token], end=",")
        print()
        for pos, token in enumerate(s_output):
            if s_probs[pos] > 0.7:
                col = 'green'
            elif s_probs[pos] > 0.3:
                col = 'yellow'
            else:
                col = 'red'
            print(clr(lean_vocab.itos[token], col), end=",")
        print(f'{s_probs.prod() * 100:.2f}')
Exemple #4
0
cli_args = lu.get_cli_args()

gen_params_train = lp.gen_params_all[cli_args.train]
gen_params_test = lp.gen_params_all[cli_args.test]

# input filenames
vocab_filename = gen_params_train['vocab_filename']
train_filename = gen_params_train['tensors_filename']
test_filename = gen_params_test['tensors_filename']

print(f'Vocab tensors: {vocab_filename}')
print(f'Training tensors: {train_filename}')
print(f'Testing tensors: {test_filename}')

# load vocabulary
lean_vocab = lu.load_vocab(vocab_filename)

# load training and test
train_data, test_data = lu.load_data(lean_vocab.stoi['<eos>'], train_filename,
                                     test_filename)

#setup tensorboard & friends
training_label = lu.create_training_label('unidir7')
# training_label = 'baseline-Jul26_10-06-31'
print(f'Training label: {training_label}')
tb_train_writer, tb_test_writer = lu.setup_tensorboard(training_label)
lu.output_hparams(tb_train_writer, training_label, lp.net_params,
                  lp.trainer_params, gen_params_train, gen_params_test,
                  lean_vocab)

# output vocabulary freqs