def getData(recordId): q = RemoteModel.all() q.filter("rec_id =", recordId) results = q.fetch(1) return results
def passage_train_iters(n_words, t_text_tensor, t_text, t_sent_ids, t_pos, t_passages, pos_vocab, t_ent, ent_vocab, t_case): n_epoch = opts.epochs criterion = nn.NLLLoss() using_sub_model = True if debugging: model = RNNModel(n_words, pos_vocab.n_words, ent_vocab.n_words, use_pretrain=False).to(device) else: model = RNNModel(n_words, pos_vocab.n_words, ent_vocab.n_words, use_pretrain=use_embedding).to(device) a_model = AModel().to(device) label_model = LabelModel(labels).to(device) model_optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.9)) a_model_optimizer = optim.Adam(a_model.parameters(), betas=(0.9, 0.9)) label_model_optimizer = optim.Adam(label_model.parameters(), betas=(0.9, 0.9)) if using_sub_model: s_model = SubModel(pos_vocab.n_words, ent_vocab.n_words).to(device) s_model_optimizer = optim.Adam(s_model.parameters(), betas=(0.9, 0.9)) else: s_model = s_model_optimizer = "sub_lstm_model" if predict_remote: rm_model = RemoteModel().to(device) rm_model_optimizer = optim.Adam(rm_model.parameters(), betas=(0.9, 0.9)) rm_lstm_model = copy.deepcopy(model) rm_lstm_model.lstm.flatten_parameters() rm_lstm_optimizer = optim.Adam(rm_lstm_model.parameters(), betas=(0.9, 0.9)) else: rm_model = rm_model_optimizer = "remote_model" rm_lstm_model = rm_lstm_optimizer = "rm_lstm_model" best_score = 0 split_num = 3701 # split_num = 52 train_dev_split = 4113 training_data = list(zip(t_sent_ids, t_text_tensor, t_text, t_passages, t_pos, t_ent, t_case)) if testing_phase: cr_training = training_data[:train_dev_split] cr_validaton = training_data[train_dev_split:] logger.info("num of training: %d" % len(cr_training)) logger.info("num of dev: %d" % len(cr_validaton)) elif not debugging: if opts.shuffle_val: random.shuffle(training_data) # validation cr_training = training_data[:split_num] cr_validaton = training_data[split_num:] logger.info("num of training: %d" % len(cr_training)) logger.info("num of validation: %d" % len(cr_validaton)) else: # debugging if opts.do_val: debugging_split = int(len(t_passages) * 0.9) cr_training = training_data[:debugging_split] cr_validaton = cr_training[debugging_split:] else: cr_training = training_data[:] cr_validaton = cr_training logger.info("num of training: %d" % len(cr_training)) logger.info("num of validation: %d" % len(cr_validaton)) sent_ids, train_text_tensor, train_text, train_passages, train_pos, train_ent, train_case = zip(*cr_training) val_ids, val_text_tensor, val_text, val_passages, val_pos, val_ent, val_case = zip(*cr_validaton) # prepare pos tagging data train_pos_tensor = get_pos_tensor(pos_vocab, train_pos) val_pos_tensor = get_pos_tensor(pos_vocab, val_pos) train_ent_tensor = get_ent_tensor(ent_vocab, train_ent) val_ent_tensor = get_ent_tensor(ent_vocab, val_ent) train_case_tensor = get_case_tensor(train_case) val_case_tensor = get_case_tensor(val_case) for epoch in range(1, n_epoch + 1): start_i = time.time() # TODO: add batch total_loss = 0 num = 0 training_data = list(zip(sent_ids, train_text_tensor, train_text, train_passages, train_pos, train_pos_tensor, train_ent, train_ent_tensor, train_case_tensor)) if not debugging: random.shuffle(training_data) sent_ids, train_text_tensor, train_text, train_passages, train_pos,\ train_pos_tensor, train_ent, train_ent_tensor, train_case_tensor = zip(*training_data) model.train() a_model.train() label_model.train() if using_sub_model: s_model.train() if predict_remote: rm_model.train() rm_lstm_model.train() for sent_id, sent_tensor, train_passage, ori_sent, pos, pos_tensor, ent, ent_tensor, case_tensor in \ tqdm(zip(sent_ids, train_text_tensor, train_passages, train_text, train_pos, train_pos_tensor, train_ent, train_ent_tensor, train_case_tensor), total=len(train_passages)): # debugging # print(train_passage.layers) # print(sent_id) if testing_phase: assert int(sent_id) < 672010, "training data only" if not debugging or opts.ignore_error: try: loss = train_f_passage(train_passage, sent_tensor, model, model_optimizer, a_model, a_model_optimizer, label_model, label_model_optimizer, s_model, s_model_optimizer, rm_model, rm_model_optimizer, rm_lstm_model, rm_lstm_optimizer, criterion, ori_sent, pos, pos_tensor, ent, ent_tensor, case_tensor, unroll) total_loss += loss num += 1 except Exception as e: # logger.info("sent: %s has training error: %s" % (str(sent_id), e)) pass else: loss = train_f_passage(train_passage, sent_tensor, model, model_optimizer, a_model, a_model_optimizer, label_model, label_model_optimizer, s_model, s_model_optimizer, rm_model, rm_model_optimizer, rm_lstm_model, rm_lstm_optimizer, criterion, ori_sent, pos, pos_tensor, ent, ent_tensor, case_tensor, unroll) total_loss += loss num += 1 # if num % 1000 == 0: # logger.info("%d finished" % num) logger.info("Loss for epoch %d: %.4f" % (epoch, total_loss / num)) end_i = time.time() logger.info("training time elapsed: %.2fs" % (end_i - start_i)) writer.add_scalar('loss', total_loss / num, epoch) # writer.add_text('loss', 'loss at epoch %d: %d' % (total_loss / num, epoch)) model.eval() a_model.eval() label_model.eval() if using_sub_model: s_model.eval() if predict_remote: rm_model.eval() rm_lstm_model.eval() labeled_f1, unlabeled_f1, labeled_f1_remote, unlabeled_f1_remote = \ get_validation_accuracy(val_text_tensor, model, a_model, label_model, s_model, rm_model, rm_lstm_model, val_text, val_passages, val_pos, val_pos_tensor, labels, label2index, val_ent, val_ent_tensor, val_case_tensor, unroll, eval_type="labeled") logger.info("validation f1 labeled: %.4f" % labeled_f1) logger.info("validation f1 unlabeled: %.4f" % unlabeled_f1) logger.info("validation f1 labeled_remote: %.4f" % labeled_f1_remote) logger.info("validation f1 unlabeled_remote: %.4f" % unlabeled_f1_remote) logger.info("") writer.add_scalar('labeled_f1', labeled_f1 * 100, epoch) # writer.add_text('labeled_f1', 'labeled_f1 at epoch %d: %d' % (labeled_f1, epoch)) writer.add_scalar('unlabeled_f1', unlabeled_f1 * 100, epoch) # writer.add_text('unlabeled_f1', 'unlabeled_f1 at epoch %d: %d' % (unlabeled_f1, epoch)) writer.add_scalar('labeled_f1_remote', labeled_f1_remote * 100, epoch) # writer.add_text('labeled_f1_remote', 'labeled_f1_remote at epoch %d: %d' % (labeled_f1_remote, epoch)) writer.add_scalar('unlabeled_f1_remote', unlabeled_f1_remote * 100, epoch) # writer.add_text('unlabeled_f1_remote', 'unlabeled_f1_remote at epoch %d: %d' % (unlabeled_f1_remote, epoch)) if not opts.not_save: if labeled_f1 > 0: best_score = labeled_f1 save_test_model(model, a_model, label_model, s_model, rm_model, rm_lstm_model, n_words, pos_vocab.n_words, ent_vocab.n_words, epoch, labeled_f1, opts.save_dir) # # save every 10 epochs # if testing_phase: # if epoch % 10 == 0: # save_test_model(model, a_model, label_model, s_model, rm_model, n_words, pos_vocab.n_words, # ent_vocab.n_words, epoch, labeled_f1, opts.save_dir) writer.close()
def putData(recordId, direction): d = RemoteModel(key_name='%s_%s' % (recordId,direction)) d.rec_id = recordId d.rec_direction = direction d.put()