Exemple #1
0
    def run(cls, eval_flag=True):
        """
        @param eval_flag: if eval_flag is True, the evaluation output is given.
        """
        ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        wsd = cls()
        TRAIN_DIR = os.path.join(ROOT, 'train/')
        TEST_DIR = os.path.join(ROOT, 'test/')
        TEST_NAME_FILE = os.path.join(ROOT, 'test/namefile')
        RESULT_PATH = os.path.join(ROOT, 'result/%s_result.txt' % cls.__name__)
        cls.result_path = RESULT_PATH

        # clear the file RESULT_PATH
        with open(RESULT_PATH, 'wb') as f:
            pass

        result_obj = open(RESULT_PATH, 'ab')

        count = 0
        test_words = cls.get_words(TEST_NAME_FILE)
        for word in test_words:
            test_path = os.path.join(TEST_DIR, word)
            train_path = os.path.join(TRAIN_DIR, word)
            features_label = wsd.load_features(train_path)
            wsd.train(features_label)
            result = wsd.classify(test_path)
            wsd.dump_result(result, result_obj)
            count += 1
            print 'Finish %d of %d: %s' % (count, len(test_words), word)
        result_obj.close()
        print 'Write testing results to %s' % RESULT_PATH
        if eval_flag:
            answerfile = os.path.join(ROOT, 'result/test_answer')
            evaluate(RESULT_PATH, answerfile)
        return None
Exemple #2
0
def main():

    HiddenNum = 15
    learningRate = 0.3
    itNum = 40

    bpt = bpTest()
    outfile = bpt.predict("../train/","../test/","../result/",HiddenNum,itNum,learningRate)
    util.evaluate(outfile, "../result/test_answer")
Exemple #3
0
def main():
    #trainfile = "../train/中医"
    #testfile = "../test/中医"
    #bpt = bpTest(trainfile, testfile)


    bpt = bpTest()
    outfile = bpt.predict("../train/","../test/","../result/")
    util.evaluate(outfile, "../result/test_answer")
Exemple #4
0
def evaluate_batch(data_source, model, max_batches, eval_file):
    answer_dict = {}
    total_loss, step_cnt = 0, 0
    for step, data in enumerate(data_source):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)

        graph = data['graph']
        graph_q = data['graph_q']

        elmo = data['elmo']
        elmo_q = data['elmo_q']
        if elmo is not None:
            elmo.volatile = True
            elmo_q.volatile = True

        logit1, logit2, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, return_yp=True, pre_att=graph, pre_att_q=graph_q, elmo=elmo, elmo_q=elmo_q)
        loss = criterion(logit1, y1) + criterion(logit2, y2)
        answer_dict_, _ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist())
        answer_dict.update(answer_dict_)

        total_loss += loss.data[0]
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss
    return metrics
Exemple #5
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle,
                   str_handle):
    answer_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2],
            feed_dict={handle: str_handle})
        answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(),
                                         yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])
    return metrics, [loss_sum, f1_sum, em_sum]
Exemple #6
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)
    graph_handler = GraphHandler(config, model)
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        #saver = tf.train.Saver()
        graph_handler.initialize(sess)
        #saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        ensember_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            start_logits, stop_logits, qa_id, loss, yp1, yp2 = sess.run([
                model.start_logits, model.stop_logits, model.qa_id, model.loss,
                model.yp1, model.yp2
            ])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            start_logits.tolist()
            stop_logits.tolist()
            for id, start, stop in zip(qa_id, start_logits, stop_logits):
                ensember_dict[str(id)] = {'yp1': start, 'yp2': stop}
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        answer_path = config.answer_file + "_" + str(config.load_step)
        with open(answer_path, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
        ensember_dict['loss'] = loss
        ensember_dict['exact_math'] = metrics['exact_match']
        ensember_dict['f1'] = metrics['f1']
        file_name = config.model_name + '_' + config.run_id + '.pklz'
        save_path = os.path.join(config.result_path, file_name)
        with gzip.open(save_path, 'wb', compresslevel=3) as fh:
            pickle.dump(ensember_dict, fh)
Exemple #7
0
def test(config, dataset="test"):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)

    if dataset == "test":
        test_eval_file = config.test_eval_file
        test_meta = config.test_meta
        test_record_file = config.test_record_file
    elif dataset == "addsent":
        print('HELLO')
        test_eval_file = config.addsent_eval_file
        test_meta = config.addsent_meta
        test_record_file = config.addsent_record_file
    elif dataset == "addonesent":
        test_eval_file = config.addonesent_eval_file
        test_meta = config.addonesent_meta
        test_record_file = config.addonesent_record_file

    with open(test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
Exemple #8
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, step_cnt = 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)
        q_type = Variable(data['q_type'], volatile=True)
        is_support = Variable(data['is_support'], volatile=True)
        start_mapping = Variable(data['start_mapping'], volatile=True)
        end_mapping = Variable(data['end_mapping'], volatile=True)
        all_mapping = Variable(data['all_mapping'], volatile=True)
        #
        subject_y1 = Variable(data['subject_y1'])
        subject_y2 = Variable(data['subject_y2'])
        object_y1 = Variable(data['object_y1'])
        object_y2 = Variable(data['object_y2'])
        relations = Variable(data['relations'])
        #
        #
        model_results = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, relations, \
            context_lens, start_mapping, end_mapping, all_mapping, return_yp=True)

        (logit1, logit2, predict_type, predict_support, logit_subject_start, logit_subject_end, \
            logit_object_start, logit_object_end, k_relations, loss_relation, yp1, yp2, sy1, sy2, oy1, oy2) = model_results
        loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                  nll_sum(logit2, y2)) / context_idxs.size(0)
        loss_2 = nll_average(predict_support.view(-1, 2), is_support.view(-1))
        loss_3_r = torch.sum(loss_relation)
        loss_3_s = (nll_sum(logit_subject_start, subject_y1) + nll_sum(
            logit_subject_end, subject_y2)) / context_idxs.size(0)
        loss_3_o = (nll_sum(logit_object_start, object_y1) + nll_sum(
            logit_object_end, object_y2)) / context_idxs.size(0)

        loss = loss_1 + config.sp_lambda * loss_2 + config.evi_lambda * (
            loss_3_s + loss_3_r + loss_3_o)

        answer_dict_ = convert_tokens(
            eval_file, data['ids'],
            yp1.data.cpu().numpy().tolist(),
            yp2.data.cpu().numpy().tolist(),
            np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)

        total_loss += loss.item()  # total_loss += loss.data[0]
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss

    return metrics
Exemple #9
0
def test(config):

    gpu_options = tf.GPUOptions(visible_device_list="2")
    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 gpu_options=gpu_options)
    sess_config.gpu_options.allow_growth = True

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file,
                             get_record_parser(config, is_test=True),
                             config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}

        # tqdm
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_, outlier = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
            print("\n", loss)
            if (loss > 50):
                for i, j, k in zip(qa_id.tolist(), yp1.tolist(), yp2.tolist()):
                    print(answer_dict[str(i)], j, k)
                #print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\
                #	loss.tolist(), yp1.tolist(), yp2.tolist()))
        loss = np.mean(losses)

        # evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
        # since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
         metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
         metrics['rouge-l-r']))
Exemple #10
0
def train_model():
    rnn_clf = RNNSequenceClassifier(num_classes=2,
                                    embedding_dim=300 + 1024 + 50,
                                    hidden_size=300,
                                    num_layers=1,
                                    bidir=True,
                                    dropout1=0.3,
                                    dropout2=0.2,
                                    dropout3=0.2)
    # Move the model to the GPU if available
    if using_GPU:
        rnn_clf = rnn_clf.cuda()
    # Set up criterion for calculating loss
    nll_criterion = nn.NLLLoss()
    # Set up an optimizer for updating the parameters of the rnn_clf
    rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01, momentum=0.9)
    # Number of epochs (passes through the dataset) to train the model for.
    num_epochs = 20
    '''
    3. 2
    train model
    '''
    training_loss = []
    val_loss = []
    training_f1 = []
    val_f1 = []
    # A counter for the number of gradient updates
    num_iter = 0
    for epoch in tqdm(range(num_epochs)):
        # print("Starting epoch {}".format(epoch + 1))
        for (example_text, example_lengths, labels) in train_dataloader_vua:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            labels = Variable(labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                labels = labels.cuda()
            # predicted shape: (batch_size, 2)
            predicted = rnn_clf(example_text, example_lengths)
            batch_loss = nll_criterion(predicted, labels)
            rnn_clf_optimizer.zero_grad()
            batch_loss.backward()
            rnn_clf_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 200 == 0:
                avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(
                    val_dataloader_vua, rnn_clf, nll_criterion, using_GPU)
                val_loss.append(avg_eval_loss)
                val_f1.append(f1)
                print(
                    "Iteration {}. Validation Loss {}. Accuracy {}. Precision {}. Recall {}. F1 {}. class-wise F1 {}."
                    .format(num_iter, avg_eval_loss, eval_accuracy, precision,
                            recall, f1, fus_f1))
                filename = f'../models/classification/VUA_iter_{str(num_iter)}.pt'
                torch.save(rnn_clf.state_dict(), filename)
    # print("Training done!")
    return rnn_clf, nll_criterion
Exemple #11
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle, config):
	answer_dict = {}
	losses_esp = losses_pr = losses_ee = []
	outlier_count = 0
	for _ in tqdm(range(1, num_batches + 1)):
		if config.with_passage_ranking:
			qa_id, loss_esp, loss_pr, loss_ee, yp1, yp2, = sess.run(
				[model.qa_id, model.loss, model.pr_loss, model.e_loss, model.yp1, model.yp2],
				feed_dict={handle: str_handle})
		else:
			qa_id, loss_esp, yp1, yp2, = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2],
				feed_dict={handle: str_handle})
		answer_dict_, _, outlier = convert_tokens(
			config, eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
		if outlier:
			outlier_count += 1
			continue
		answer_dict.update(answer_dict_)
		if loss_esp<100:
			losses_esp.append(loss_esp)
		print(loss_esp)
		if config.with_passage_ranking:
			losses_pr.append(loss_pr)
			losses_ee.append(loss_ee)
	#print("outlier_count:",outlier_count)
	loss_esp = np.mean(losses_esp)
	print("dev_loss:",loss_esp)
	if config.with_passage_ranking:
		loss_pr = np.mean(losses_pr)
		loss_ee = np.mean(losses_ee)
	metrics = evaluate(eval_file, answer_dict)
	metrics["loss_esp"] = loss_esp
	metrics["loss_ee"] = loss_esp
	if config.with_passage_ranking:
		metrics["loss_pr"] = loss_pr
		metrics["loss_ee"] = loss_ee
	loss_sum1 = tf.Summary(value=[tf.Summary.Value(
		tag="{}/loss_esp".format(data_type), simple_value=metrics["loss_esp"]), ])
	if config.with_passage_ranking:
		loss_sum2 = tf.Summary(value=[tf.Summary.Value(
			tag="{}/loss_pr".format(data_type), simple_value=metrics["loss_pr"]), ])
		loss_sum3 = tf.Summary(value=[tf.Summary.Value(
			tag="{}/loss_ee".format(data_type), simple_value=metrics["loss_ee"]), ])
	f1_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
	em_sum = tf.Summary(value=[tf.Summary.Value(
		tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
	rouge_l_f = tf.Summary(value=[tf.Summary.Value(
		tag="{}/ROUGE-L".format(data_type), simple_value=metrics["rouge-l-f"]), ])
	rouge_l_p = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-p".format(data_type), simple_value=metrics["rouge-l-p"]), ])
	rouge_l_r = tf.Summary(value=[tf.Summary.Value(
		tag="{}/rouge-l-r".format(data_type), simple_value=metrics["rouge-l-r"]), ])
	outlier_c = tf.Summary(value=[tf.Summary.Value(
		tag="{}/outlier_count".format(data_type), simple_value=outlier_count), ])
	if config.with_passage_ranking:
		return metrics, [loss_sum1, loss_sum2, loss_sum3, rouge_l_f]
	return metrics, [loss_sum1, rouge_l_f]
Exemple #12
0
def main(args: argparse.Namespace) -> None:
    classifier = model.BorrowingsClassifier(args.modeltype)
    model_path = args.modelpath if args.modelpath else "model"
    if args.train:
        classifier.train(args.train)
        with open(model_path, "wb") as sink:
            pickle.dump(classifier, sink)
    else:
        with open(model_path, "rb") as source:
            classifier = pickle.load(source)
    if args.dev or args.test:
        eval_path = args.dev if args.dev else args.test
        predictions, gold = classifier.predict(eval_path)
    if args.dev:
        util.evaluate(gold, predictions)
    if args.test:
        util.write_file(predictions, args.test)
Exemple #13
0
 def train(self,
           X,
           Y,
           use_attention,
           att_context,
           bidirectional,
           cv=True,
           folds=5):
     if cv:
         cv_folds = make_folds(X, Y, folds)
         accuracies = []
         fscores = []
         for fold_num, ((train_fold_X, train_fold_Y),
                        (test_fold_X, test_fold_Y)) in enumerate(cv_folds):
             tagger = self.fit_model(train_fold_X, train_fold_Y,
                                     use_attention, att_context,
                                     bidirectional)
             pred_probs, pred_label_seqs, x_lens = self.predict(
                 test_fold_X, bidirectional, tagger=tagger)
             pred_inds = numpy.argmax(pred_probs, axis=2)
             flattened_preds = []
             flattened_targets = []
             for x_len, pred_ind, test_target in zip(
                     x_lens, pred_inds, test_fold_Y):
                 flattened_preds.extend(pred_ind[-x_len:])
                 flattened_targets.extend(
                     [list(tt).index(1) for tt in test_target[-x_len:]])
             assert len(flattened_preds) == len(flattened_targets)
             accuracy, weighted_fscore, all_fscores = evaluate(
                 flattened_targets, flattened_preds)
             print >> sys.stderr, "Finished fold %d. Accuracy: %f, Weighted F-score: %f" % (
                 fold_num, accuracy, weighted_fscore)
             print >> sys.stderr, "Individual f-scores:"
             for cat in all_fscores:
                 print >> sys.stderr, "%s: %f" % (self.rev_label_ind[cat],
                                                  all_fscores[cat])
             accuracies.append(accuracy)
             fscores.append(weighted_fscore)
         accuracies = numpy.asarray(accuracies)
         fscores = numpy.asarray(fscores)
         print >> sys.stderr, "Accuracies:", accuracies
         print >> sys.stderr, "Average: %0.4f (+/- %0.4f)" % (
             accuracies.mean(), accuracies.std() * 2)
         print >> sys.stderr, "Fscores:", fscores
         print >> sys.stderr, "Average: %0.4f (+/- %0.4f)" % (
             fscores.mean(), fscores.std() * 2)
     self.tagger = self.fit_model(X, Y, use_attention, att_context,
                                  bidirectional)
     model_ext = "att=%s_cont=%s_bi=%s" % (str(use_attention), att_context,
                                           str(bidirectional))
     model_config_file = open("model_%s_config.json" % model_ext, "w")
     model_weights_file_name = "model_%s_weights" % model_ext
     model_label_ind = "model_%s_label_ind.json" % model_ext
     model_rep_reader = "model_%s_rep_reader.pkl" % model_ext
     print >> model_config_file, self.tagger.to_json()
     self.tagger.save_weights(model_weights_file_name, overwrite=True)
     json.dump(self.label_ind, open(model_label_ind, "w"))
     pickle.dump(self.rep_reader, open(model_rep_reader, "wb"))
Exemple #14
0
    def client_update(self, global_model, global_init_model, round_index):
        self.elapsed_comm_rounds += 1
        print(f'***** Client #{self.client_id} *****', flush=True)
        self.model = copy_model(global_model,
                                self.args.dataset, self.args.arch,
                                dict(self.model.named_buffers()))

        num_pruned, num_params = get_prune_summary(self.model)
        cur_prune_rate = num_pruned / num_params
        #prune_step = math.floor(num_params * self.args.prune_step)

        eval_score = evaluate(self.model,
                              self.test_loader,
                              verbose=self.args.test_verbosity)

        if eval_score['Accuracy'][
                0] > self.args.acc_thresh and cur_prune_rate < self.args.prune_percent:
            # I'm adding 0.001 just to ensure we go clear the target prune_percent. This may not be needed
            prune_fraction = min(
                self.args.prune_step,
                0.001 + self.args.prune_percent - cur_prune_rate)
            prune_fixed_amount(self.model,
                               prune_fraction,
                               verbose=self.args.prune_verbosity,
                               glob=True)
            self.model = copy_model(global_init_model, self.args.dataset,
                                    self.args.arch,
                                    dict(self.model.named_buffers()))
        losses = []
        accuracies = []
        for i in range(self.args.client_epoch):
            train_score = train(round_index,
                                self.client_id,
                                i,
                                self.model,
                                self.train_loader,
                                lr=self.args.lr,
                                verbose=self.args.train_verbosity)

            losses.append(train_score['Loss'][-1].data.item())
            accuracies.append(train_score['Accuracy'][-1])

        mask_log_path = f'{self.args.log_folder}/round{round_index}/c{self.client_id}.mask'
        client_mask = dict(self.model.named_buffers())
        log_obj(mask_log_path, client_mask)

        num_pruned, num_params = get_prune_summary(self.model)
        cur_prune_rate = num_pruned / num_params
        prune_step = math.floor(num_params * self.args.prune_step)
        print(
            f"num_pruned {num_pruned}, num_params {num_params}, cur_prune_rate {cur_prune_rate}, prune_step: {prune_step}"
        )

        self.losses[round_index:] = np.array(losses)
        self.accuracies[round_index:] = np.array(accuracies)
        self.prune_rates[round_index:] = cur_prune_rate

        return copy_model(self.model, self.args.dataset, self.args.arch)
def evaluate(lang='pt'):
    X, Y = util.get_X_Y(data_type='keras_tokenized_tri', lang=lang, file_type="dump")
    X = np.asarray(X)
    data_generator = DataGenerator(X,Y, lang=lang, process_x=process_x, batch_size=PARAMS['batch_size'])
    model, epoch = load_lastest(lang=lang)
    x_val, y_val = data_generator.get_validation_data()
    y_pred = model.predict(x_val)
    y_pred = y_pred.argmax(axis=-1)
    print('Model '+NAME+' val score on '+lang+': ', util.evaluate(y_val, y_pred))
Exemple #16
0
def evaluate_batch(config, model, num_batches, eval_file, sess, data_type, handle, str_handle):
    answer_dict = {}
    remapped_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches+1 )):
        try:    
            qa_id, loss, yp1, yp2 , y1, y2, is_select_p, is_select= sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2, model.y1, model.y2, model.is_select_p, model.is_select], feed_dict={ handle:str_handle })
        except tf.errors.OutOfRangeError:
            break

        y1 = np.argmax(y1, axis=-1)
        y2 = np.argmax(y2, axis=-1)
        sp = np.argmax(is_select_p, axis=-1)
        s = np.argmax(is_select, axis=-1)
        sp = [ n+i*config.passage_num for i,n in enumerate(sp.tolist()) ]
        s = [ m+i*config.passage_num for i,m in enumerate(s.tolist()) ]

        answer_dict_, remapped_dict_ = convert_tokens(
            eval_file, [qa_id[n] for n in sp], [yp1[n] for n in sp], [yp2[n] for n in sp], [y1[n] for n in sp], [y2[n] for n in sp], sp, s)

        answer_dict.update(answer_dict_)
        remapped_dict.update(remapped_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict, filter=False)
    sp_metrics = evaluate(eval_file, remapped_dict, filter=False)

    metrics["loss"] = loss

    loss_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])

    f1_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
    em_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
    
    sp_f1_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/sp_f1".format(data_type), simple_value=sp_metrics["f1"]), ])
    sp_em_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/sp_em".format(data_type), simple_value=sp_metrics["exact_match"]), ])
   
    return metrics, [loss_sum, f1_sum, em_sum, sp_f1_sum, sp_em_sum]
Exemple #17
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, step_cnt = 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        with torch.no_grad():
            if config.cuda:
                data = {
                    k: (data[k].cuda() if k != 'ids' else data[k])
                    for k in data
                }
            context_idxs = data['context_idxs']
            ques_idxs = data['ques_idxs']
            context_char_idxs = data['context_char_idxs']
            ques_char_idxs = data['ques_char_idxs']
            context_lens = data['context_lens']
            y1 = data['y1']
            y2 = data['y2']
            q_type = data['q_type']
            is_support = data['is_support']
            start_mapping = data['start_mapping']
            end_mapping = data['end_mapping']
            all_mapping = data['all_mapping']

            logit1, logit2, predict_type, predict_support, yp1, yp2 = model(
                context_idxs,
                ques_idxs,
                context_char_idxs,
                ques_char_idxs,
                context_lens,
                start_mapping,
                end_mapping,
                all_mapping,
                context_lens.sum(1).max().item(),
                return_yp=True)
            loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                    nll_sum(logit2, y2)
                    ) / context_idxs.size(0) + config.sp_lambda * nll_average(
                        predict_support.view(-1, 2), is_support.view(-1))
            answer_dict_ = convert_tokens(
                eval_file, data['ids'],
                yp1.data.cpu().numpy().tolist(),
                yp2.data.cpu().numpy().tolist(),
                np.argmax(predict_type.data.cpu().numpy(), 1))
            answer_dict.update(answer_dict_)

            total_loss += loss.item()
        step_cnt += 1
    loss = total_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss

    return metrics
Exemple #18
0
 def pipeline(self, df):
     df_preprocess = self.preprocess(df)
     df_train = df_preprocess.groupby('id').apply(
         util.get_trainset).reset_index().drop('level_1', axis=1)
     df_test = df_preprocess.groupby('id').apply(
         util.get_testset).reset_index().drop('level_1', axis=1)
     model = self.model(df_train)
     df_pred = self.prediction(model, df_test)
     eval_scores = util.evaluate(df_pred)
     return df_pred, eval_scores
Exemple #19
0
 def test_epoch_end(self, test_step_outputs):
     pred_tags, tags = zip(*test_step_outputs)
     result = evaluate(self._to_ix, self.test_data, list(chain(*pred_tags)),
                       self.m_type)
     self.log('char precision', result['char_level']['precision'])
     self.log('char recall', result['char_level']['recall'])
     self.log('char f1', result['char_level']['f1'])
     self.log('word precision', result['word_level']['precision'])
     self.log('word recall', result['word_level']['recall'])
     self.log('word f1', result['word_level']['f1'])
Exemple #20
0
def evaluate_batch(data_source, model, max_batches, eval_file, config):
    answer_dict = {}
    sp_dict = {}
    total_loss, total_ans_loss, total_sp_loss, step_cnt = 0, 0, 0, 0
    iter = data_source
    for step, data in enumerate(iter):
        if step >= max_batches and max_batches > 0: break

        context_idxs = Variable(data['context_idxs'], volatile=True)
        ques_idxs = Variable(data['ques_idxs'], volatile=True)
        context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
        ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
        context_lens = Variable(data['context_lens'], volatile=True)
        y1 = Variable(data['y1'], volatile=True)
        y2 = Variable(data['y2'], volatile=True)
        q_type = Variable(data['q_type'], volatile=True)
        is_support = Variable(data['is_support'], volatile=True)
        start_mapping = Variable(data['start_mapping'], volatile=True)
        end_mapping = Variable(data['end_mapping'], volatile=True)
        all_mapping = Variable(data['all_mapping'], volatile=True)

        logit1, logit2, predict_type, predict_support, yp1, yp2 = model(
            context_idxs,
            ques_idxs,
            context_char_idxs,
            ques_char_idxs,
            context_lens,
            start_mapping,
            end_mapping,
            all_mapping,
            return_yp=True)
        loss_1 = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) +
                  nll_sum(logit2, y2)) / context_idxs.size(0)
        loss_2 = nll_average(predict_support.view(-1, 2), is_support.view(-1))
        loss = loss_1 + config.sp_lambda * loss_2
        answer_dict_ = convert_tokens(
            eval_file, data['ids'],
            yp1.data.cpu().numpy().tolist(),
            yp2.data.cpu().numpy().tolist(),
            np.argmax(predict_type.data.cpu().numpy(), 1))
        answer_dict.update(answer_dict_)

        total_loss += loss.data[0]
        total_ans_loss += loss_1.data[0]
        total_sp_loss += loss_2.data[0]
        step_cnt += 1
    loss = total_loss / step_cnt
    ans_loss = total_ans_loss / step_cnt
    sp_loss = total_sp_loss / step_cnt
    metrics = evaluate(eval_file, answer_dict)
    metrics['loss'] = loss
    metrics['ans_loss'] = ans_loss
    metrics['sp_loss'] = sp_loss

    return metrics
Exemple #21
0
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file,
                                 get_record_parser(config, is_test=True),
                                 config).make_one_shot_iterator()

        model = QANet(config,
                      test_batch,
                      word_mat,
                      char_mat,
                      trainable=False,
                      graph=g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                                   metrics['f1']))
Exemple #22
0
    def SelfEvaluate(self,
                     batches,
                     eval_file=None,
                     answer_file=None,
                     drop_file=None,
                     dev=None):
        print('Starting evaluation')

        with open(eval_file, 'r', encoding='utf-8') as f:
            eval_file = json.load(f)
        with open(dev, 'r', encoding='utf-8') as f:
            dev = json.load(f)

        answer_dict = {}
        mapped_dict = {}

        for batch in batches:
            data = prepare_data(batch)
            full_p_states, p_mask, full_q_states, q_mask = self.encode(data)
            logits1, logits2, ans_log = self.decode(full_p_states, p_mask,
                                                    full_q_states, q_mask)
            y1, y2, has_ans = get_predictions(logits1, logits2, ans_log)
            qa_id = data['id']
            answer_dict_, mapped_dict_ = convert_tokens(
                eval_file, qa_id, y1, y2, has_ans)
            answer_dict.update(answer_dict_)
            mapped_dict.update(mapped_dict_)

            del full_p_states, p_mask, full_q_states, q_mask, y1, y2, answer_dict_, mapped_dict_, has_ans, ans_log, logits1, logits2

        with open(drop_file, 'r', encoding='utf-8') as f:
            drop = json.load(f)
        for i in drop['drop_ids']:
            uuid = eval_file[str(i)]["uuid"]
            answer_dict[str(i)] = ''
            mapped_dict[uuid] = ''

        with open(answer_file, 'w', encoding='utf-8') as f:
            json.dump(mapped_dict, f)
        metrics = evaluate(dev, mapped_dict)

        # sub_path = join('./result/', "submit.csv")
        # #log.info('Writing submission file to {}...'.format(sub_path))
        # with open(sub_path, 'w') as csv_fh:
        #     csv_writer = csv.writer(csv_fh, delimiter=',')
        #     csv_writer.writerow(['Id', 'Predicted'])
        #     for uuid in sorted(mapped_dict):
        #         csv_writer.writerow([uuid, mapped_dict[uuid]])

        print("EM: {}, F1: {}, Has answer: {}, No answer: {}".format(
            metrics['exact'], metrics['f1'], metrics['HasAns_f1'],
            metrics['NoAns_f1']))

        return metrics['exact'], metrics['f1']
Exemple #23
0
def run_itemknn(X_train, X_test, test_dict):
    # Compute item-item matrix with cosine similarities
    S_cosine = util.compute_cosine(X_train)

    # Compute prediction scores for all test users - subtract already seen items
    test_users = list(test_dict.keys())
    test_scores = X_test[test_users, :] @ S_cosine - 987654321 * X_test[
        test_users, :]

    # Evaluate and pretty print
    results_cosine = util.evaluate(X_test, test_scores, test_dict)
    return results_cosine
def test_epoch(model, loader, criterion):
    model.eval()
    test_losses = []
    outs = []
    gts = []
    for data in loader:
        for label in data[1].numpy().tolist():
            gts.append(label)
        inputs, labels = transform_data(data, True)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_losses.append(loss.data[0])
        out = torch.sigmoid(outputs).data.cpu().numpy()
        outs.extend(out)
    avg_loss = np.mean(test_losses)
    print('Validation Loss: {:.6f}'.format(avg_loss))

    outs = np.array(outs)
    gts = np.array(gts)
    util.evaluate(gts, outs)
    return avg_loss
Exemple #25
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle,
                   str_handle):
    answer_dict = {}
    losses = []
    outlier_count = 0
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, is_selected = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2, model.logits_s],
            feed_dict={handle: str_handle})

        if is_selected > 0.5:
            answer_dict_, _, outlier = convert_tokens(eval_file,
                                                      qa_id.tolist(),
                                                      yp1.tolist(),
                                                      yp2.tolist())
            if outlier:
                outlier_count += 1

        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])
    rouge_l_f = tf.Summary(value=[
        tf.Summary.Value(tag="{}/rouge-l-f".format(data_type),
                         simple_value=metrics["rouge-l-f"]),
    ])
    rouge_l_p = tf.Summary(value=[
        tf.Summary.Value(tag="{}/rouge-l-p".format(data_type),
                         simple_value=metrics["rouge-l-p"]),
    ])
    rouge_l_r = tf.Summary(value=[
        tf.Summary.Value(tag="{}/rouge-l-r".format(data_type),
                         simple_value=metrics["rouge-l-r"]),
    ])
    outlier_c = tf.Summary(value=[
        tf.Summary.Value(tag="{}/outlier_count".format(data_type),
                         simple_value=outlier_count),
    ])
    return metrics, [
        loss_sum, f1_sum, em_sum, rouge_l_f, rouge_l_p, rouge_l_r, outlier_c
    ]
Exemple #26
0
    def end_of_epoch_hook(trainer):
        nonlocal i_epoch, best_dev_eer

        logger.info(f"EPOCH\t{i_epoch}")

        if i_epoch % args.eval_freq == 0:
            train_eer, train_eer_std = evaluate(args, trainer.models["trunk"],
                                                trainer.models["embedder"],
                                                eval_train_dataloaders)
            dev_eer, dev_eer_std = evaluate(args, trainer.models["trunk"],
                                            trainer.models["embedder"],
                                            eval_dev_dataloaders)
            logger.info("Eval EER (mean, std):\t{}\t{}".format(
                train_eer, train_eer_std))
            logger.info("Eval EER (mean, std):\t{}\t{}".format(
                dev_eer, dev_eer_std))
            if dev_eer < best_dev_eer:
                logger.info("New best model!")
                best_dev_eer = dev_eer

        i_epoch += 1
Exemple #27
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["num_batches"]

    print("Loading model...")
    test_batch = get_batch_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config, is_test=True).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        select_right = []
        for step in tqdm(range(1, total + 1)):
            qa_id, loss, yp1, yp2 , y1, y2, is_select_p, is_select= sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2, model.y1, model.y2, model.is_select_p, model.is_select])
            y1 = np.argmax(y1, axis=-1)
            y2 = np.argmax(y2, axis=-1)
            sp = np.argmax(is_select_p, axis=-1)
            s = np.argmax(is_select, axis=-1)
            sp = [ n+i*config.passage_num for i,n in enumerate(sp.tolist()) ]
            s = [ m+i*config.passage_num for i,m in enumerate(s.tolist()) ]
            select_right.append(len(set(s).intersection(set(sp))))

            answer_dict_, _ = convert_tokens(
                eval_file, [qa_id[n] for n in sp], [yp1[n] for n in sp], [yp2[n] for n in sp], [y1[n] for n in sp], [y2[n] for n in sp], sp, s)
            answer_dict.update(answer_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        select_accu = sum(select_right)/ (len(select_right)*(config.batch_size/config.passage_num))
        write_prediction(eval_file, answer_dict, 'answer_for_evl.json', config)
        metrics = evaluate(eval_file, answer_dict, filter=False)
        metrics['Selection Accuracy'] = select_accu
        
        print("Exact Match: {}, F1: {}, selection accuracy: {}".format(
            metrics['exact_match'], metrics['f1'], metrics['Selection Accuracy']))
def prediction(features_test, labels_test, model_prediction):

    # print ("features test are ",features_test)
    predictions = np.array([])

    # load the model from disk
    clf = joblib.load(model_prediction)

    for i in range(features_test.shape[0]):
        # X_test = scalingFactor.transform(featureMatrix[i, :])
        X_test = [features_test[i, :]]
        # print("x test is", X_test)
        predictions = np.append(predictions, clf.predict(X_test))
        #print predictions
    # print("prediction is",predictions)

    # print("y_test is",labels_test)
    # print ("Classification Report:")
    # print (metrics.classification_report(labels_test, predictions))
    # print ("Confusion Matrix:")
    # print(metrics.confusion_matrix(labels_test, predictions))
    util.evaluate(labels_test, predictions)
def predict():
    if request.method == 'POST':
        ocr_text = request.get_json()
        print(ocr_text)

        original_sent = ocr_text['ocr_text']
        output_sent = decode(original_sent)[0][0]
        evals = evaluate(original_sent, output_sent)
        result = {'predicted': output_sent, 'evaluated': evals}

        print(result)
        return jsonify(result)
    else:
        return '<h1>Error</h1>'
Exemple #30
0
def train(show_baseline=False, continue_train=False, \
    learn_freq= 5, memory_size = 20000, total_time=20,\
    memory_warmup_size = 2000, batch_size = 32, critic_lr = 0.001, \
    encoder_lr=0.0001, gamma = 0.9, alpha = 0.9, max_episode=1000, \
    critic_path='dqn_critic', encoder_path='dqn_encoder',\
    evaluate_env_list_path = 'env_list_set1'):

    if show_baseline:
        print(evaluate_reject_when_full(evaluate_env_list_path))
        print(evaluate_totally_random(evaluate_env_list_path))
    env = produce_env(total_time=total_time)
    action_dim = 4
    obs_dim_1 = 45
    request_dim = 17
    obs_dim_2 = 10
    obs_dim = obs_dim_1 + obs_dim_2 * 7
    encoder = Encoder(input_size=request_dim, output_size=obs_dim_2, \
        use_rnn=False, use_gru=True, use_lstm=False)
    rpm = ReplayMemory(memory_size)  # DQN的经验回放池
    critic = Critic(obs_dim=obs_dim, action_dim=action_dim)
    critic.to(device)
    agent = Agent(critic=critic,
                  encoder=encoder,
                  obs_dim=obs_dim,
                  action_dim=action_dim,
                  critic_lr=critic_lr,
                  encoder_lr=encoder_lr,
                  gamma=gamma,
                  alpha=alpha)

    if continue_train:
        agent.load(critic_path=critic_path, encoder_path=encoder_path)

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < memory_warmup_size:
        run_episode(env, agent, rpm, memory_warmup_size, learn_freq,
                    batch_size)

    # start train
    episode = 0
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 100):
            total_reward = run_episode(env, agent, rpm, memory_warmup_size,
                                       learn_freq, batch_size)
            episode += 1
        eval_reward = evaluate(evaluate_env_list_path, agent, render=False)
        print('episode:{}  Test reward:{}'.format(episode, eval_reward))
    agent.save(critic_path=critic_path, encoder_path=encoder_path)
Exemple #31
0
def model_evaluation(model, dataset, device):
    examples, predicted_s, predicted_e, token_to_orig_maps = list(), list(
    ), list(), list()
    for batch in dataset.get_tqdm(device, shuffle=False):
        input_ids, input_mask, segment_ids, _, _, token_to_orig_map, example = batch

        inputs = {
            'input_ids': input_ids,
            'attention_mask': input_mask,
            'token_type_ids': segment_ids
        }
        outputs = model(**inputs)

        examples.extend(example)
        predicted_s.extend(outputs[0].cpu().numpy())
        predicted_e.extend(outputs[1].cpu().numpy())
        token_to_orig_maps.extend(token_to_orig_map)

    evaluate(examples,
             predicted_s,
             predicted_e,
             token_to_orig_maps,
             entity_refine=False)
    evaluate(examples, predicted_s, predicted_e, token_to_orig_maps)
Exemple #32
0
def main():

    smooth_rate = 0.0001

    '''
    #------------------------------For Feature Extractor------------------------------
    extractor = Extractor()
    extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ")
    extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ")
    '''
    names = util.readNames("../test/namefile")
    

    '''
    #------------------------------For Random Validation-------------------------------
    fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a")
    results = []
    for name in names:
        infile = "../train/"+name

        nb = NaiveBayes(infile)
        print "---------",name,"----------"
        result = nb.Random_Cross_Validation(20,4,smooth_rate)
        results.append(result)
        reStr = name + "," + str(result) + "\n"
        fout.write(reStr)
    fout.close()
    print "Macro AVG:",sum(results)/len(results)

    '''
    #------------------------------------For Test--------------------------------------
    for name in names:
        print name
    nb2 = NaiveBayes()
    resultfile = nb2.predict("../train/", "../test/", "../result/",smooth_rate)
    util.evaluate(resultfile, "../result/test_answer")
Exemple #33
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle,
                   str_handle):
    """
    Evaluate a 
    """

    all_yp3 = []
    conter_high = 0

    answer_dict = {}
    losses = []
    for numb_b in (range(1, num_batches + 1)):

        qa_id, loss, yp1, yp2, yp3, y1, y2, y3, logging, logging2, q = sess.run(
            [
                model.qa_id, model.loss, model.yp1, model.yp2, model.yp3,
                model.y1, model.y2, model.y3, model.logging, model.logging2,
                model.q
            ],
            feed_dict={handle: str_handle})

        answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(),
                                         yp1.tolist(), yp2.tolist(),
                                         yp3.tolist())

        answer_dict.update(answer_dict_)
        losses.append(loss)

    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    print(metrics)
    metrics["loss"] = loss

    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])

    return metrics, [loss_sum, f1_sum, em_sum]
Exemple #34
0
    def train(self, train_data, valid_data):
        for epoch in range(self.epoch_num):
            self.model.train()
            train_loss = 0
            train_wmae, train_nae = 0, 0
            for step, (train_x, train_y) in enumerate(train_data):
                train_x = train_x.to(self.device)
                train_y = train_y.to(self.device)
                out = self.model(train_x)
                self.optimizer.zero_grad()
                loss = self.loss_func(out, train_y)
                loss.backward()
                self.optimizer.step()
                train_loss += loss.item()

                out = out.detach().cpu().numpy()
                train_y = train_y.detach().cpu().numpy()
                wmae, nae = evaluate(out, train_y)
                train_wmae += wmae 
                train_nae += nae
            
            train_loss /= (step + 1)
            train_wmae /= (step + 1)
            train_nae /= (step + 1)
            valid_loss, valid_wmae, valid_nae = self.validate(valid_data)

            best_info = ''
            if valid_loss < self.best['loss']:
                self.best['loss'] = valid_loss
                best_info += ' Loss '
            if valid_wmae < self.best['wmae']:
                self.best['wmae'] = valid_wmae
                best_info += ' WMAE '
            if valid_nae < self.best['nae']:
                self.best['nae'] = valid_nae
                best_info += ' NAE '

            self.record('\n------------  Epoch {} ----------- Best: {}'.format(epoch, best_info))
            self.record('Train => Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(train_loss, train_wmae, train_nae))
            self.record('Valid => Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(valid_loss, valid_wmae, valid_nae))
            
            if self.save and 'NAE' in best_info:
                torch.save(self.model.state_dict(), self.save)

        self.record('\n========== Best record ==========')
        self.record('Loss: {:.5f} | WMAE: {:.5f} | NAE: {:.5f}'.format(self.best['loss'], self.best['wmae'], self.best['nae']))
Exemple #35
0
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1']))
Exemple #36
0
def test(prototypes, network_path = 'network_eyes.np'):
    """
    Tests a saved network
    :param prototypes:
    :param network_path:
    :return:
    """
    network = load_network(prototypes)    
    
    input_var = T.fmatrix()
    target_var = T.fmatrix()
    val_prediction = layers.get_output(network, inputs = input_var, deterministic = True)
    val_loss = lasagne.objectives.squared_error(val_prediction, target_var)
    val_loss = val_loss.mean()
    val_fn = theano.function([input_var, target_var], [val_prediction, val_loss])    
    
    total_mean = 0
    total_std = 0
    n = 0
    print "validation data in test"
    for inp_val, out_val in tqdm(iterate_data(data_file = 'validation_data_eyes.p')):
        predictions, loss = val_fn(inp_val, out_val)
        dist, mean, std = evaluate(predictions, out_val)
        n+=1
        total_mean+=mean
        total_std+= std
    
    
    
    
    #eyes = Eyes(origin = 0, visualize = True)
    #eyes.set_dominance(0)
    for i, [left, right] in enumerate(predictions):
        #print left, right
        x, y = calc_intersect(left, right)
        print "predicted \t x: {} y: {}".format(x, y)
        #eyes.redraw()
        #point_target = eyes.move_eyes(out_val[i][0], out_val[i][1])
        
        x1, y1 = calc_intersect(out_val[i][0], out_val[i][1])
        print "target \t\t x: {} y: {}".format(x1,y1)
        print "should be \t x: {} y: {}".format(inp_val[i][0], inp_val[i][1])
        #eyes.redraw()
        embed()
Exemple #37
0
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
    answer_dict = {}
    losses = []
    for _ in tqdm(range(1, num_batches + 1)):
        qa_id, loss, yp1, yp2, = sess.run(
            [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle})
        answer_dict_, _ = convert_tokens(
            eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
    f1_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
    em_sum = tf.Summary(value=[tf.Summary.Value(
        tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
    return metrics, [loss_sum, f1_sum, em_sum]
Exemple #38
0
 def train(self, X, Y, use_attention, att_context, bidirectional, cv=True, folds=5):
   if cv:
     cv_folds = make_folds(X, Y, folds)
     accuracies = []
     fscores = []
     for fold_num, ((train_fold_X, train_fold_Y), (test_fold_X, test_fold_Y)) in enumerate(cv_folds):
       tagger = self.fit_model(train_fold_X, train_fold_Y, use_attention, att_context, bidirectional)
       pred_probs, pred_label_seqs, x_lens = self.predict(test_fold_X, bidirectional, tagger=tagger)
       pred_inds = numpy.argmax(pred_probs, axis=2)
       flattened_preds = []
       flattened_targets = []
       for x_len, pred_ind, test_target in zip(x_lens, pred_inds, test_fold_Y):
         flattened_preds.extend(pred_ind[-x_len:])
         flattened_targets.extend([list(tt).index(1) for tt in test_target[-x_len:]])
       assert len(flattened_preds) == len(flattened_targets)
       accuracy, weighted_fscore, all_fscores = evaluate(flattened_targets, flattened_preds)
       print >>sys.stderr, "Finished fold %d. Accuracy: %f, Weighted F-score: %f"%(fold_num, accuracy, weighted_fscore)
       print >>sys.stderr, "Individual f-scores:"
       for cat in all_fscores:
         print >>sys.stderr, "%s: %f"%(self.rev_label_ind[cat], all_fscores[cat])
       accuracies.append(accuracy)
       fscores.append(weighted_fscore)
     accuracies = numpy.asarray(accuracies)
     fscores = numpy.asarray(fscores)
     print >>sys.stderr, "Accuracies:", accuracies
     print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(accuracies.mean(), accuracies.std() * 2)
     print >>sys.stderr, "Fscores:", fscores
     print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(fscores.mean(), fscores.std() * 2)
   self.tagger = self.fit_model(X, Y, use_attention, att_context, bidirectional)
   model_ext = "att=%s_cont=%s_bi=%s"%(str(use_attention), att_context, str(bidirectional))
   model_config_file = open("model_%s_config.json"%model_ext, "w")
   model_weights_file_name = "model_%s_weights"%model_ext
   model_label_ind = "model_%s_label_ind.json"%model_ext
   model_rep_reader = "model_%s_rep_reader.pkl"%model_ext
   print >>model_config_file, self.tagger.to_json()
   self.tagger.save_weights(model_weights_file_name, overwrite=True)
   json.dump(self.label_ind, open(model_label_ind, "w"))
   pickle.dump(self.rep_reader, open(model_rep_reader, "wb"))
Exemple #39
0
 def train(self, trainfile_name):
   train_X, train_Y, num_classes = self.make_data(trainfile_name)
   accuracies = []
   fscores = []
   if self.cv:
     cv_folds = make_folds(train_X, train_Y, self.folds)
     for i, ((train_fold_X, train_fold_Y), (test_fold_X, test_fold_Y)) in enumerate(cv_folds):
       classifier = self.fit_model(train_fold_X, train_fold_Y, num_classes)
       predictions = self.classify(classifier, test_fold_X)
       accuracy, weighted_fscore, _ = evaluate(test_fold_Y, predictions)
       print >>sys.stderr, "Finished fold %d. Accuracy: %f, F-score: %f"%(i, accuracy, weighted_fscore)
       accuracies.append(accuracy)
       fscores.append(weighted_fscore)
     accuracies = numpy.asarray(accuracies)
     fscores = numpy.asarray(fscores)
     print >>sys.stderr, "Accuracies:", accuracies
     print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(accuracies.mean(), accuracies.std() * 2)
     print >>sys.stderr, "Fscores:", fscores
     print >>sys.stderr, "Average: %0.4f (+/- %0.4f)"%(fscores.mean(), fscores.std() * 2)
   #self.classifier = self.fit_model(train_X, train_Y, num_classes)
   #cPickle.dump(classifier, open(self.trained_model_name, "wb"))
   #pickle.dump(tagset, open(self.stored_tagset, "wb"))
   print >>sys.stderr, "Done"
Exemple #40
0
 def test(self, shared_x, data_y, out_path=None):
     pred_y = self.predict(shared_x)
     if out_path:
         with codecs.open(out_path, 'wb') as f:
             f.writelines(['%s\t%s\n' % (x, y) for x, y in zip(data_y, pred_y)])
     return evaluate(data_y, pred_y)
Exemple #41
0
def train_network(prototypes, train_data = 'train_data.p', val_data = 'validation_data.p'):
    """
    Legacy Code, doesnt work anymore
    Trains a single network (eithers arm of eye model)
    Also plots some of information about loss and accuracy.
    Input: prototypes of the model, train data, validation data
    Output: -
    """
    
    network, train_fn, val_fn = create_network(prototypes)

    epochs = 150
    means = np.zeros(epochs)
    stds = np.zeros(epochs)
    train_losses = np.zeros(epochs)
    val_losses = np.zeros(epochs)
    dists = np.zeros(epochs)

    arm = Arm(origin=0, visualize=False)

    print "Train network"

    for e in tqdm(range(epochs)):
        #Train epoch

        for input_batch, output_batch in iterate_data(data_file = train_data):
            pred, train_loss = train_fn(input_batch, output_batch)


        total_mean = 0
        total_std = 0
        total_dist = 0
        n = 0
        for inp_val, out_val in iterate_data(data_file = val_data):

            #validation epoch
            predictions, loss = val_fn(inp_val, out_val)
            dist, mean, std = evaluate(predictions, out_val)
            arm_positions = np.array([arm.move_arm(shoudler, elbow) for [shoudler, elbow] in predictions])
            eye_error_dist, mean_eye_error, std_eye_error = evaluate(arm_positions, inp_val)

            n += 1
            total_mean += mean
            total_std += std
            total_dist += mean_eye_error


        means[e] = total_mean/n
        stds[e] = total_std/n
        train_losses[e] = train_loss
        val_losses[e] = loss
        dists[e] = total_dist/n
        np.save('network_epoch' + str(e), layers.get_all_param_values(network))   

    #Plots
    plt.figure()
    distplot, = plt.plot(dists, label = 'arm distance error')
    plt.legend(handles = [distplot])
    plt.savefig('../images/arm_error.png')
    plt.show()

    plt.figure()
    meanplot, = plt.plot(means, label = 'mean')
    stdplot, = plt.plot(stds, label = 'std')
    plt.legend(handles = [meanplot, stdplot])
    plt.savefig('../images/arm_angles.png')
    plt.show()

    plt.figure()
    trainplot, = plt.plot(train_losses, label = 'train loss')
    valplot, = plt.plot(val_losses, label = 'val loss')
    plt.legend(handles = [trainplot, valplot])
    plt.savefig('../images/arm_losses.png')
    plt.show()
    
    print "saving network"
    np.save('network_arm', layers.get_all_param_values(network))
    print "done saving"


    
    return network, predictions
Exemple #42
0
def train_network_scenario1(prototypes1, prototypes2, origin, train_data='train_data.p', val_data='validation_data.p'):
    """
    Combines the networks for the arm and the eye. Arm is dominant over the eye, as in scenario 2, so the eye recieves its input from the arm and its target.
    Also plots a lot of information about loss and accuracy.
    Saves the weights of the network
    Input: prototypes for the arm, prototypes for the eye, point of origin of both models, trainingdata for the arm, validationdata for the eye
    Output: -
    """

    epochs = 150  # number of epochs

    print 'network1'
    network1, train_fn1, val_fn1 = create_network(prototypes1)
    print 'network2'
    network2, train_fn2, val_fn2 = create_network(prototypes2, n_inputs=4)

    print 'Networks done'
    eyes = Eyes(origin=origin, visualize=False)
    arm = Arm(origin=origin, visualize=False)

    print 'moare stuff'
    # Arrays for saving performance after each epoch
    means_arm = np.zeros(epochs)
    stds_arm = np.zeros(epochs)
    train_losses_arm = np.zeros(epochs)
    val_losses_arm = np.zeros(epochs)
    means_eye = np.zeros(epochs)
    stds_eye = np.zeros(epochs)
    train_losses_eye = np.zeros(epochs)
    val_losses_eye = np.zeros(epochs)
    dists_eye = np.zeros(epochs)
    dists_arm = np.zeros(epochs)

    print "Train network"

    for e in tqdm(range(epochs)):

        total_mean_arm = 0
        total_std_arm = 0
        total_mean_eye = 0
        total_std_eye = 0
        total_error_arm = 0
        total_error_eye = 0
        train_loss_arm = 0
        val_loss_arm = 0
        train_loss_eye = 0
        val_loss_eye = 0

        # training epoch
        i = 0
        for input_batch, output_batch in iterate_data(data_file=train_data):
            pred1, train_loss1 = train_fn1(input_batch, output_batch)
            arm_angles = np.array([arm.calculate_angles(x, y) for [x, y] in input_batch], dtype='float32')  # same targets as arm
            eye_positions = [calc_intersect(left, right) for [left, right] in pred1]  # get x,y from predicted eye angels
            arm_input = np.hstack((input_batch, eye_positions)).astype('float32')  # first the eye coordinates, take care when combining prototypes
            pred2, train_loss2 = train_fn2(arm_input, arm_angles)

            train_loss_arm += train_loss2
            train_loss_eye += train_loss1
            i += 1
        # Take average loss of this epoch
        train_loss_arm = train_loss_arm / i
        train_loss_eye = train_loss_eye / i

        n = 0
        # Validation Epoch
        for inp_val, out_val in iterate_data(data_file=val_data):
            predictions_eye, loss_eye = val_fn1(inp_val, out_val)
            dist_eye, mean_eye, std_eye = evaluate(predictions_eye, out_val)  # dist_arm is for debugging

            arm_angles = np.array([arm.calculate_angles(x, y) for [x, y] in inp_val], dtype='float32')
            eye_positions = [calc_intersect(left, right) for [left, right] in predictions_eye]

            arm_input = np.hstack((inp_val, eye_positions)).astype('float32')
            prediction_arm, loss_arm = val_fn2(arm_input, arm_angles)
            dist_arm, mean_arm, std_arm = evaluate(prediction_arm, inp_val)
            arm_positions = np.array([arm.move_arm(shoulder, shoulder) for [shoulder, elbow] in prediction_arm])

            arm_error_dist, mean_arm_error, std_arm_error = evaluate(arm_positions, inp_val)
            eye_error_dist, mean_eye_error, std_eye_error = evaluate(eye_positions, inp_val)

            total_error_arm += mean_arm_error
            total_error_eye += mean_eye_error

            n += 1
            total_mean_arm += mean_arm
            total_std_arm += std_arm
            total_mean_eye += mean_eye
            total_std_eye += std_eye
            val_loss_arm += loss_arm
            val_loss_eye += loss_eye


        # Save epoch data
        means_arm[e] = total_mean_arm / n
        stds_arm[e] = total_std_arm / n
        train_losses_arm[e] = train_loss_arm
        val_losses_arm[e] = val_loss_arm / n
        means_eye[e] = total_mean_eye / n
        stds_eye[e] = total_std_eye / n
        train_losses_eye[e] = train_loss_eye
        val_losses_eye[e] = val_loss_eye / n
        dists_eye[e] = total_error_eye / n
        dists_arm[e] = total_error_arm / n

    # Plots
    # Plot mean and std
    plt.figure()
    meanplot_arm, = plt.plot(means_arm, label='mean arm')
    stdplot_arm, = plt.plot(stds_arm, label='std arm')
    meanplot_eye, = plt.plot(means_eye, label='mean eye')
    stdplot_eye, = plt.plot(stds_eye, label='std eye')

    plt.legend(handles=[meanplot_arm, stdplot_arm, meanplot_eye, stdplot_eye])
    plt.savefig('../images/scenario1/accuracy_combined.png')
    plt.show()

    # Plot just the means
    plt.figure()
    meanplot_arm, = plt.plot(means_arm, label='mean arm')
    meanplot_eye, = plt.plot(means_eye, label='mean eye')
    plt.legend(handles=[meanplot_arm, meanplot_eye])
    plt.savefig('../images/scenario1/accuracy_combined_arm.png')

    # Ploot the train and validations losses
    plt.figure()
    trainplot_arm, = plt.plot(train_losses_arm, label='train loss arm')
    valplot_arm, = plt.plot(val_losses_arm, label='val loss arm')
    trainplot_eye, = plt.plot(train_losses_eye, label='train loss eye')
    valplot_eye, = plt.plot(val_losses_eye, label='val loss eye')

    plt.legend(handles=[trainplot_arm, valplot_arm, trainplot_eye, valplot_eye])
    plt.savefig('../images/scenario1/loss_combined.png')
    plt.show()

    # Plot distance errors
    plt.figure()
    distsplot_arm, = plt.plot(dists_arm, label='Distance Error arm')
    plt.legend(handles=[distsplot_arm])
    plt.savefig('../images/scenario1/distance_error_arm.png')
    plt.show()
    np.save('../images/scenario1/distance_arm', dists_arm)

    # Plot Distance error ot the eye
    plt.figure()
    distsplot_eye, = plt.plot(dists_eye, label='Distance Error eye')
    plt.legend(handles=[distsplot_eye])
    plt.savefig('../images/scenario1/distance_error_eye.png')
    plt.show()
    np.save('../images/scenario1/distance_eye', dists_eye)

    # Save the weights
    np.save('network_arm_s1', layers.get_all_param_values(network1))
    np.save('network_eye_s1', layers.get_all_param_values(network2))

    return  # network, predictions
Exemple #43
0
      denominator = freqs[n-1][history] + absSigma * Lambda
    else:
      denominator = absSigma * Lambda

  return float(numerator)/denominator


## --- EVALUATION ---
##

# Evaluating unsmoothed maximum likelohood estimats will show you instances 
# in which the model fails due to datasparsness. We don't want to see those 
# anymore since we have already fixed this adding smoothing. 
# evaluate(MLE, POSfreqs[0:4], NEGfreqs[0:4], POStest, NEGtest, "MLE with 3-grams", validate=True)

evaluate(AddOne, POSfreqs[0:2], NEGfreqs[0:2], POStest, NEGtest, "1-grams with add-1 smoothing", validate=True)

evaluate(AddOne, POSfreqs[0:3], NEGfreqs[0:3], POStest, NEGtest, "2-grams with add-1 smoothing", validate=True)

evaluate(AddOne, POSfreqs[0:4], NEGfreqs[0:4], POStest, NEGtest, "3-grams with add-1 smoothing", validate=True)

evaluate(AddOne, POSfreqs[0:5], NEGfreqs[0:5], POStest, NEGtest, "4-grams with add-1 smoothing", validate=True)

evaluate(AddOne, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-1 smoothing", validate=True)

Lambda = 6
evaluate(AddLambda, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-lambda smoothing", validate=True)

Lambda = 2
evaluate(AddLambda, POSfreqs[0:6], NEGfreqs[0:6], POStest, NEGtest, "5-grams with add-lambda smoothing", validate=True)
Exemple #44
0
import util
from rbfn import RBFN

if __name__ == '__main__':
    # dataset_file = raw_input('Training dataset location: ')
    dataset_file = 'dataset.csv'
    training_dataset, test_dataset = util.split_dataset(dataset_file)

    training_inputs, training_outputs = util.separate_dataset_io(
        training_dataset, is_training=True)
    rbfn = RBFN(n_centroids=8)
    rbfn.train(training_inputs, training_outputs)

    test_inputs, test_outputs = util.separate_dataset_io(
        test_dataset, is_training=True)
    results = rbfn.predict(test_inputs)
    print util.evaluate(test_outputs, results)
Exemple #45
0
def main():
    result, answer = parse_cmd_args()
    util.evaluate(result, answer)
    return None
Exemple #46
0
def main():
    wsd = NaiveBayesWSD()
    wsd.run()
    util.evaluate("../result/NaiveBayesWSD_result.txt", "../result/test_answer")
def main():
    hl = HingeLossClassifier()
    hl.learn_boundary(pickled_training_file)
    util.evaluate(pickled_testing_file, hl.predict)
    print hl.weights 
# we want to weight the distances appropriately
# See time to predict for 1 element in the testing set. 
# Then, take a call on speeding it up
#options include:
#1. k-d tree's (but its difficult cause of the high possible dimensionality, but low real dimensionality) 
#2.cache the test set and if new test ele within some small distance of it then use the previous prediction
#3. Run the test queries in parallel. 

#def evaluate(pickled_test_file,predictor):
#    test_examples = pickle.load(open(pickled_test_file, 'rb'))
#    errors = 0 
#    false_positive = 0 
#    false_negative = 0 
#    N = len(test_examples) 
#    for feature,label in test_examples:
#        print "about to begin running the predictor"
#        result = predictor(feature)
#        print "finished"
#        if result != label:
#            errors += 1
#        if result == 1 and label == -1 :
#            false_positive += 1 
#        if result == -1  and label == 1:
#            false_negative += 1
#        print "label = %d and result = %d" %(label,result)
#        break 

if __name__ == "__main__":
  nn = NearestNeighbors(5,"trainingSet.p")
  util.evaluate("testingSet.p", nn.predict)