Esempio n. 1
0
    state = torch.load(args.model)
else:
    state = torch.load(args.model, map_location=lambda storage, loc: storage)

parameters = state['parameters']
# Data parameters
lower = parameters['lower']
parameters['gpu'] = args.gpu == 1

data = pickle.load(open(args.data_path + '/dataset.pth', 'rb'))
words = data['words']
_, t_dataset = load_file_with_terms(args.data_path + '/test.txt')
try:
    mappings = state['mappings']
except:
    mappings, words_freq = prepare_mapping(words, lower, parameters['freq'])
state_dict = state['state_dict']

# print model parameters
print('Model parameters:')
for k, v in parameters.items():
    print('%s=%s' % (k, v))

# Index data
t_dataset = AssembleMem(t_dataset, mappings['word2id'], lower, 1, args.max_len,
                        parameters['gpu'])
print("Vocabulary size", t_dataset.vocab_size)
print("%i sentences in test." % (t_dataset.len))

word2id = mappings['word2id']
id2word = mappings['id2word']
Esempio n. 2
0
    t_dataset = state['t_dataset']
else:
    words = []
    r_words, r_dataset = load_file_with_terms(args.data_path + '/train.txt')
    words.extend(r_words)
    v_words, v_dataset = load_file_with_terms(args.data_path + '/valid.txt')
    t_words, t_dataset = load_file_with_terms(args.data_path + '/test.txt')
    state = {
        'words': words,
        'r_dataset': r_dataset,
        'v_dataset': v_dataset,
        't_dataset': t_dataset
    }
    pickle.dump(state, open(args.data_path + '/dataset.pth', "wb"))

mappings, words_freq = prepare_mapping(words, lower, args.freq)
parameters['unk_id'] = mappings['word2id']['<unk>']
parameters['sos_id'] = mappings['word2id']['<sos>']
parameters['eos_id'] = mappings['word2id']['<eos>']

# Index data
r_dataset = AssembleMem(r_dataset, mappings['word2id'], lower, args.batch_size,
                        args.max_len, parameters['gpu'])
v_dataset = AssembleMem(v_dataset, mappings['word2id'], lower, args.batch_size,
                        args.max_len, parameters['gpu'])
print("%i / %i pairs in train / dev." % (r_dataset.len, v_dataset.len))

word2id = mappings['word2id']
id2word = mappings['id2word']
vocab_size = len(mappings['id2word'])