def test(): provider = DataProvider() model, optimizer = get_model_and_optimizer() criterion = nn.CrossEntropyLoss() for docs, quests, begin_idxs, end_idxs in provider.dev_batch(batch_size=batch_size): if torch.cuda.is_available(): docs = docs.cuda() quests = quests.cuda() begin_idxs = begin_idxs.cuda() end_idxs = end_idxs.cuda() model.zero_grad() begin_idxs_out, end_idxs_out = model(docs, quests) # TODO How to calculate accuracy? begin_idxs_diff = torch.mean(torch.abs(torch.argmax(begin_idxs_out, dim=1) - begin_idxs).double()) end_idxs_diff = torch.mean(torch.abs(torch.argmax(end_idxs_out, dim=1) - end_idxs).double()) loss = criterion(begin_idxs_out, begin_idxs) + criterion(end_idxs_out, end_idxs) print (f'Loss: {loss}') print (f'begin/end idx diff: {begin_idxs_diff}, {end_idxs_diff}')
def submit(): data_provider = DataProvider() model, _ = get_model_and_optimizer() for quests, docs, raw_docs, idx_maps, id in data_provider.test_batch(): if torch.cuda.is_available(): quests = quests.cuda() docs = docs.cuda() begin_idxs_out, end_idxs_out = model(docs, quests) begin_idxs = torch.argmax(begin_idxs_out, dim=1).tolist() end_idxs = torch.argmax(end_idxs_out, dim=1).tolist() answers = [] for d, bi, ei, idx_map in zip(raw_docs, begin_idxs, end_idxs, idx_maps): raw_ans = d[idx_map[bi]:idx_map[ei] + 1] print(f'doc: {d}') print(f'answer: {raw_ans}') answers.append(raw_ans) input() # Construct json pred = {} pred['yesno_answers'] = [] pred['question'] = raw_question pred['question_type'] = raw_json
def train(epochs): e = 0 cnt = 0 provider = DataProvider() model, optimizer = get_model_and_optimizer() criterion = nn.CrossEntropyLoss() try: checkpoint = torch.load(checkpoint_path) e = checkpoint['epoch'] except: print('No checkpoint found.') while e < epochs: print(f'Epoch: {e}') for docs, quests, begin_idxs, end_idxs, in provider.train_batch( batch_size=batch_size): if torch.cuda.is_available(): docs = docs.cuda() quests = quests.cuda() begin_idxs = begin_idxs.cuda() end_idxs = end_idxs.cuda() model.zero_grad() print(f'docs len: {docs.shape[1]} ', end='') try: begin_idxs_out, end_idxs_out = model(docs, quests) loss = criterion(begin_idxs_out, begin_idxs) + criterion( end_idxs_out, end_idxs) loss.backward() optimizer.step() except: print('Error when feed into model.') continue print(f'Loss: {loss}') # FIXME with open('loss.log', 'a') as f: f.write(f'{loss}\n') cnt += 1 if cnt == save_per_steps: cnt = 0 torch.save( { 'epoch': e, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, checkpoint_path) print(f'Save model at epoch {e}') e += 1
def test(): new_ref = True if os.path.exists(ref_file): print('Find old ref file. Want to create a new one? (y/n)', end='') a = input() if a == 'y': os.remove(ref_file) print('Remove old ref file') elif a == 'n': new_ref = False else: print('Invalid input') return if os.path.exists(pred_file): os.remove(pred_file) print('Remove old pred file.') data_provider = DataProvider() model, _ = get_model_and_optimizer() yesorno_model, _ = get_model_and_optimizer(yesorno_checkpoint_path) for docs, quests, begin_idxs, end_idxs, raw_docs, idx_maps, raw_datas in data_provider.dev_batch( batch_size=2, get_raw=True): if torch.cuda.is_available(): quests = quests.cuda() docs = docs.cuda() # print (docs.shape, quests.shape) begin_idxs_out, end_idxs_out = model(docs, quests) begin_idxs_pred = torch.argmax(begin_idxs_out, dim=1).tolist() end_idxs_pred = torch.argmax(end_idxs_out, dim=1).tolist() begin_idxs_yn_pred, end_idxs_yn_pred = None, None answer_preds = [] if use_yesorno_model: begin_idxs_yn_out, end_idxs_yn_out = yesorno_model(docs, quests) begin_idxs_yn_pred = torch.argmax(begin_idxs_yn_out, dim=1).tolist() end_idxs_yn_pred = torch.argmax(end_idxs_yn_out, dim=1).tolist() for d, bi, ei, bi_yn, ei_yn, idx_map, raw_data in zip( raw_docs, begin_idxs_pred, end_idxs_pred, begin_idxs_yn_pred, end_idxs_yn_pred, idx_maps, raw_datas): raw_ans = '' # use another model to answer yes or no questions if raw_data['question_type'] == 'YES_NO': raw_ans = d[idx_map[bi_yn]:idx_map[ei_yn] + 1] else: raw_ans = d[idx_map[bi]:idx_map[ei] + 1] answer_preds.append(raw_ans) else: for d, bi, ei, idx_map, raw_data in zip(raw_docs, begin_idxs_pred, end_idxs_pred, idx_maps, raw_datas): raw_ans = '' raw_ans = d[idx_map[bi]:idx_map[ei] + 1] answer_preds.append(raw_ans) # TODO select the best answer according to softmax for answer, data in zip(answer_preds, raw_datas): # Construct pred.json if answer == '': continue pred = {} pred['yesno_answers'] = [] pred['question'] = data['question'] pred['question_type'] = data['question_type'] pred['answers'] = [answer] pred['question_id'] = data['question_id'] pred_s = json.dumps(pred, ensure_ascii=False) with open(pred_file, 'a') as f: f.write(pred_s + '\n') # Construct ref.json if new_ref: ref = {} ref['yesno_answers'] = [] ref['entity_answers'] = [[]] ref['source'] = 'search' ref['question'] = data['question'] ref['question_type'] = data['question_type'] ref['answers'] = data['answers'] ref['question_id'] = data['question_id'] ref_s = json.dumps(ref, ensure_ascii=False) with open(ref_file, 'a') as f: f.write(ref_s + '\n')
learning_rate = 0.01 embedding_dim = 256 hidden_size = 128 num_layers = 1 num_epochs = 10000 train_size = 0.8 batch_size = 128 save_per_num_steps = 20 included_extensions = ['.json'] # files = [directory + '/' + fn for fn in os.listdir(directory) if any(fn.endswith(ext) for ext in included_extensions)] files = ['raw_data/out.json'] random.shuffle(files) print('Preparing data...') provider = DataProvider(files, batch_size=batch_size, padding_value=0) vocab = provider.vocab vocab_size = len(vocab) print('Vocab size: ', vocab_size) model = CharRNN(vocab_size=vocab_size, target_size=vocab_size, embedding_dim=embedding_dim, hidden_size=hidden_size, num_layers=num_layers) criterion = nn.NLLLoss(ignore_index=vocab.padding_idx) optimizer = optim.Adam(model.parameters(), lr=learning_rate) # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) # optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
def submit(): if os.path.exists(submit_file): print ('Find old submit file. Want to create a new one? (y/n) ', end='') a = input () if a == 'y': os.remove(submit_file) print ('Remove old submit file') elif a == 'n': new_ref = False else: print ('Invalid input') return data_provider = DataProvider() model, _ = get_model_and_optimizer() yesorno_model = None if use_yesorno_model: yesorno_model, _ = get_model_and_optimizer(yesorno_checkpoint_path) cnt = 0 # TODO Actually test_batch batch size is 1 for quests, docs, raw_docs, idx_maps, raw_data in data_provider.test_batch(): if torch.cuda.is_available(): quests = quests.cuda() docs = docs.cuda() begin_idxs_out, end_idxs_out = None, None if use_yesorno_model and raw_data['question_type'] == 'YES_NO': begin_idxs_out, end_idxs_out = yesorno_model(docs, quests) else: begin_idxs_out, end_idxs_out = model(docs, quests) begin_idxs = torch.argmax(begin_idxs_out, dim=1).tolist() end_idxs = torch.argmax(end_idxs_out, dim=1).tolist() answers = [] for d, bi, ei, idx_map in zip(raw_docs, begin_idxs, end_idxs, idx_maps): raw_ans = d[idx_map[bi]: idx_map[ei] + 1] # print (f'doc: {d}') # print (f'answer: {raw_ans}') # input () answers.append(raw_ans) # TODO select the best answer according to softmax # Construct json pred = {} pred['yesno_answers'] = [] pred['question'] = raw_data['question'] pred['question_type'] = raw_data['question_type'] pred['answers'] = answers pred['question_id'] = raw_data['question_id'] pred_s = json.dumps(pred, ensure_ascii=False) with open(submit_file, 'a') as f: f.write(pred_s + '\n') cnt += 1 print (f'\r {cnt} ', end='')
sigmoid_logits = tf.squeeze(tf.nn.sigmoid(logits), -1) thresholded_logits = tf.cast(tf.cast(sigmoid_logits + 0.5, tf.uint8), tf.float32) loss = tf.losses.sigmoid_cross_entropy(label_placeholder, logits) accuracy = tf.reduce_sum( tf.cast(tf.equal(thresholded_logits, label_placeholder), tf.float32)) / batch_size tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) # optimizer train_op = tf.train.AdamOptimizer(eta).minimize(loss) data_provider = DataProvider(batch_size, [1, 5]) full_num_batches = data_provider.num_batches() if not os.path.isdir('summaries'): os.mkdir('summaries') merged = tf.summary.merge_all() with tf.Session() as sess: train_writer = tf.summary.FileWriter('summaries/model', sess.graph) sess.run(tf.global_variables_initializer()) # full training for epoch in range(num_epochs): for batch in range(full_num_batches): data, labels = data_provider.get_full_data()