def calculate_probs_for_set(training_set): training_label, training_epoch, _, ws_label, bidirectional = training_set vocab_filename = lp.gen_params_all[ws_label]['vocab_filename'] lean_vocab = lu.load_vocab(vocab_filename) test_filename = lp.gen_params_all[ws_label]['tensors_filename'] probs_filename = f'./cache/probs_{ws_label}_{training_label}_{training_epoch}.pt' net_params = lp.net_params net_params['bidirectional'] = bidirectional network = LeanModel(lean_vocab, net_params) lu.load_network(network, training_label, training_epoch) network = network.to(device) test_data = lu.load_and_prepare_data(test_filename, lean_vocab.stoi['<eos>']) probs = calculate_probs(network, test_data, training_label) print(f'Saving {probs_filename}') torch.save(probs, probs_filename)
import lean_utils as lu import lean_params as lp label = lu.get_cli_args().label gen_params = lp.gen_params_all[label] input_filename = gen_params['csv_filename'] vocab_filename = gen_params['vocab_filename'] tensors_filename = gen_params['tensors_filename'] line_size = 10 # import vocabulary v = lu.load_vocab( vocab_filename ) def process_line( line ): ret = torch.zeros( line_size ) c = 0 for idx, col in enumerate(line): if idx == 0: continue if idx == 1 or idx == 2: labels = col.split( '@' ) ret[c] = v.stoi[labels[0]] c+=1 ret[c] = v.stoi[labels[1]] c+=1 else: ret[c] = v.stoi[col]
from termcolor import colored as clr import lean_utils as lu import lean_params as lp label = lu.get_cli_args() gen_params_test = lp.gen_params_all[label] training_label = 'baseline-Jul27_16-18-30' epoch = 4 lean_vocab = lu.load_vocab(gen_params_test['vocab_filename']) for epoch_no in range(trainer_params['epochs']): epoch_samples = lu.load_epoch_samples(epoch_no, training_label) for s_input, s_output, s_probs in epoch_samples: for token in s_input: print(lean_vocab.itos[token], end=",") print() for pos, token in enumerate(s_output): if s_probs[pos] > 0.7: col = 'green' elif s_probs[pos] > 0.3: col = 'yellow' else: col = 'red' print(clr(lean_vocab.itos[token], col), end=",") print(f'{s_probs.prod() * 100:.2f}')
cli_args = lu.get_cli_args() gen_params_train = lp.gen_params_all[cli_args.train] gen_params_test = lp.gen_params_all[cli_args.test] # input filenames vocab_filename = gen_params_train['vocab_filename'] train_filename = gen_params_train['tensors_filename'] test_filename = gen_params_test['tensors_filename'] print(f'Vocab tensors: {vocab_filename}') print(f'Training tensors: {train_filename}') print(f'Testing tensors: {test_filename}') # load vocabulary lean_vocab = lu.load_vocab(vocab_filename) # load training and test train_data, test_data = lu.load_data(lean_vocab.stoi['<eos>'], train_filename, test_filename) #setup tensorboard & friends training_label = lu.create_training_label('unidir7') # training_label = 'baseline-Jul26_10-06-31' print(f'Training label: {training_label}') tb_train_writer, tb_test_writer = lu.setup_tensorboard(training_label) lu.output_hparams(tb_train_writer, training_label, lp.net_params, lp.trainer_params, gen_params_train, gen_params_test, lean_vocab) # output vocabulary freqs