Esempio n. 1
0
def getData(recordId):
	q = RemoteModel.all()
	q.filter("rec_id =", recordId)
	results = q.fetch(1)
	return results
Esempio n. 2
0
def passage_train_iters(n_words, t_text_tensor, t_text, t_sent_ids, t_pos, t_passages, pos_vocab, t_ent, ent_vocab,
                        t_case):
    n_epoch = opts.epochs
    criterion = nn.NLLLoss()

    using_sub_model = True

    if debugging:
        model = RNNModel(n_words, pos_vocab.n_words, ent_vocab.n_words, use_pretrain=False).to(device)
    else:
        model = RNNModel(n_words, pos_vocab.n_words, ent_vocab.n_words, use_pretrain=use_embedding).to(device)
    a_model = AModel().to(device)
    label_model = LabelModel(labels).to(device)

    model_optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.9))
    a_model_optimizer = optim.Adam(a_model.parameters(), betas=(0.9, 0.9))
    label_model_optimizer = optim.Adam(label_model.parameters(), betas=(0.9, 0.9))

    if using_sub_model:
        s_model = SubModel(pos_vocab.n_words, ent_vocab.n_words).to(device)
        s_model_optimizer = optim.Adam(s_model.parameters(), betas=(0.9, 0.9))
    else:
        s_model = s_model_optimizer = "sub_lstm_model"

    if predict_remote:
        rm_model = RemoteModel().to(device)
        rm_model_optimizer = optim.Adam(rm_model.parameters(), betas=(0.9, 0.9))
        rm_lstm_model = copy.deepcopy(model)
        rm_lstm_model.lstm.flatten_parameters()
        rm_lstm_optimizer = optim.Adam(rm_lstm_model.parameters(), betas=(0.9, 0.9))
    else:
        rm_model = rm_model_optimizer = "remote_model"
        rm_lstm_model = rm_lstm_optimizer = "rm_lstm_model"

    best_score = 0

    split_num = 3701
    # split_num = 52
    train_dev_split = 4113

    training_data = list(zip(t_sent_ids, t_text_tensor, t_text, t_passages, t_pos, t_ent, t_case))

    if testing_phase:
        cr_training = training_data[:train_dev_split]
        cr_validaton = training_data[train_dev_split:]
        logger.info("num of training: %d" % len(cr_training))
        logger.info("num of dev: %d" % len(cr_validaton))
    elif not debugging:
        if opts.shuffle_val:
            random.shuffle(training_data)
        # validation
        cr_training = training_data[:split_num]
        cr_validaton = training_data[split_num:]
        logger.info("num of training: %d" % len(cr_training))
        logger.info("num of validation: %d" % len(cr_validaton))
    else:
        # debugging
        if opts.do_val:
            debugging_split = int(len(t_passages) * 0.9)
            cr_training = training_data[:debugging_split]
            cr_validaton = cr_training[debugging_split:]
        else:
            cr_training = training_data[:]
            cr_validaton = cr_training
        logger.info("num of training: %d" % len(cr_training))
        logger.info("num of validation: %d" % len(cr_validaton))

    sent_ids, train_text_tensor, train_text, train_passages, train_pos, train_ent, train_case = zip(*cr_training)
    val_ids, val_text_tensor, val_text, val_passages, val_pos, val_ent, val_case = zip(*cr_validaton)

    # prepare pos tagging data
    train_pos_tensor = get_pos_tensor(pos_vocab, train_pos)
    val_pos_tensor = get_pos_tensor(pos_vocab, val_pos)

    train_ent_tensor = get_ent_tensor(ent_vocab, train_ent)
    val_ent_tensor = get_ent_tensor(ent_vocab, val_ent)

    train_case_tensor = get_case_tensor(train_case)
    val_case_tensor = get_case_tensor(val_case)

    for epoch in range(1, n_epoch + 1):
        start_i = time.time()

        # TODO: add batch
        total_loss = 0
        num = 0

        training_data = list(zip(sent_ids, train_text_tensor,
                                 train_text, train_passages, train_pos, train_pos_tensor, train_ent,
                                 train_ent_tensor, train_case_tensor))

        if not debugging:
            random.shuffle(training_data)

        sent_ids, train_text_tensor, train_text, train_passages, train_pos,\
            train_pos_tensor, train_ent, train_ent_tensor, train_case_tensor = zip(*training_data)

        model.train()
        a_model.train()
        label_model.train()
        if using_sub_model:
            s_model.train()
        if predict_remote:
            rm_model.train()
            rm_lstm_model.train()

        for sent_id, sent_tensor, train_passage, ori_sent, pos, pos_tensor, ent, ent_tensor, case_tensor in \
                tqdm(zip(sent_ids, train_text_tensor, train_passages, train_text, train_pos, train_pos_tensor,
                    train_ent, train_ent_tensor, train_case_tensor), total=len(train_passages)):

            # debugging
            # print(train_passage.layers)
            # print(sent_id)
            if testing_phase:
                assert int(sent_id) < 672010, "training data only"

            if not debugging or opts.ignore_error:
                try:
                    loss = train_f_passage(train_passage, sent_tensor, model, model_optimizer, a_model,
                                           a_model_optimizer, label_model, label_model_optimizer, s_model,
                                           s_model_optimizer, rm_model, rm_model_optimizer, rm_lstm_model,
                                           rm_lstm_optimizer, criterion, ori_sent,
                                           pos, pos_tensor, ent, ent_tensor, case_tensor, unroll)
                    total_loss += loss
                    num += 1
                except Exception as e:
                    # logger.info("sent: %s has training error: %s" % (str(sent_id), e))
                    pass
            else:
                loss = train_f_passage(train_passage, sent_tensor, model, model_optimizer, a_model,
                                       a_model_optimizer, label_model, label_model_optimizer, s_model,
                                       s_model_optimizer, rm_model, rm_model_optimizer, rm_lstm_model,
                                       rm_lstm_optimizer, criterion, ori_sent,
                                       pos, pos_tensor, ent, ent_tensor, case_tensor, unroll)
                total_loss += loss
                num += 1

            # if num % 1000 == 0:
            #     logger.info("%d finished" % num)

        logger.info("Loss for epoch %d: %.4f" % (epoch, total_loss / num))
        end_i = time.time()
        logger.info("training time elapsed: %.2fs" % (end_i - start_i))

        writer.add_scalar('loss', total_loss / num, epoch)
        # writer.add_text('loss', 'loss at epoch %d: %d' % (total_loss / num, epoch))

        model.eval()
        a_model.eval()
        label_model.eval()
        if using_sub_model:
            s_model.eval()
        if predict_remote:
            rm_model.eval()
            rm_lstm_model.eval()

        labeled_f1, unlabeled_f1, labeled_f1_remote, unlabeled_f1_remote = \
            get_validation_accuracy(val_text_tensor, model, a_model, label_model, s_model,
                                    rm_model, rm_lstm_model, val_text, val_passages, val_pos, val_pos_tensor,
                                    labels, label2index, val_ent, val_ent_tensor,
                                    val_case_tensor, unroll, eval_type="labeled")

        logger.info("validation f1 labeled: %.4f" % labeled_f1)
        logger.info("validation f1 unlabeled: %.4f" % unlabeled_f1)
        logger.info("validation f1 labeled_remote: %.4f" % labeled_f1_remote)
        logger.info("validation f1 unlabeled_remote: %.4f" % unlabeled_f1_remote)
        logger.info("")

        writer.add_scalar('labeled_f1', labeled_f1 * 100, epoch)
        # writer.add_text('labeled_f1', 'labeled_f1 at epoch %d: %d' % (labeled_f1, epoch))
        writer.add_scalar('unlabeled_f1', unlabeled_f1 * 100, epoch)
        # writer.add_text('unlabeled_f1', 'unlabeled_f1 at epoch %d: %d' % (unlabeled_f1, epoch))
        writer.add_scalar('labeled_f1_remote', labeled_f1_remote * 100, epoch)
        # writer.add_text('labeled_f1_remote', 'labeled_f1_remote at epoch %d: %d' % (labeled_f1_remote, epoch))
        writer.add_scalar('unlabeled_f1_remote', unlabeled_f1_remote * 100, epoch)
        # writer.add_text('unlabeled_f1_remote', 'unlabeled_f1_remote at epoch %d: %d' % (unlabeled_f1_remote, epoch))

        if not opts.not_save:
            if labeled_f1 > 0:
                best_score = labeled_f1
                save_test_model(model, a_model, label_model, s_model, rm_model, rm_lstm_model, n_words,
                                pos_vocab.n_words, ent_vocab.n_words, epoch, labeled_f1, opts.save_dir)

            # # save every 10 epochs
            # if testing_phase:
            #     if epoch % 10 == 0:
            #         save_test_model(model, a_model, label_model, s_model, rm_model, n_words, pos_vocab.n_words,
            #                         ent_vocab.n_words, epoch, labeled_f1, opts.save_dir)
    writer.close()
Esempio n. 3
0
def putData(recordId, direction):
	d = RemoteModel(key_name='%s_%s' % (recordId,direction))
	d.rec_id = recordId
	d.rec_direction = direction
	d.put()