Exemple #1
0
    def __init__(self, training_properties, train_iter, dev_iter, test_iter,
                 device):
        self.optimizer_type = training_properties["optimizer"]
        self.learning_rate = training_properties["learning_rate"]
        self.weight_decay = training_properties["weight_decay"]
        self.momentum = training_properties["momentum"]
        self.norm_ratio = training_properties["norm_ratio"]
        self.epoch = training_properties["epoch"]
        self.topk = training_properties["topk"]
        self.print_every = training_properties["print_every_batch_step"]
        self.save_every = training_properties["save_every_epoch"]
        self.eval_every = training_properties["eval_every"]
        self.save_path = training_properties["save_path"]

        self.openAIAdamSchedulerType = training_properties["scheduler_type"]
        self.amsgrad = training_properties["amsgrad"]
        self.partial_adam = training_properties["partial_adam"]

        self.train_iter = train_iter
        self.dev_iter = dev_iter
        self.test_iter = test_iter

        self.device = device

        self.dev_evaluator, self.test_evaluator = Evaluator(
        ).evaluator_factory("single_model_evaluator", self.device)
Exemple #2
0
 def test_evaluate(self):
     evaluator = Evaluator(model_dir=self.model_dir,
                           input_shape=self.input_shape)
     BAR, EAR = evaluator.evaluate(basic_model=self.basic_model,
                                   evaluate_model=self.evaluate_model,
                                   valid_stocks=self.valid_stocks,
                                   rounds=3)
     print BAR, EAR
     self.assertNotEqual(BAR, EAR)
def prec_recall(data, gt):
    search_engine = SearchEngine(data)

    print('\n> Running Evaluation...\n', end='')
    evaluator = Evaluator(search_engine, gt)
    prec, avg_prec_recall = evaluator.evaluate()

    mkdir(EVALUATION_PATH)
    save_to_csv(prec, os.path.join(EVALUATION_PATH, 'precision.csv'))
    save_to_csv(avg_prec_recall, os.path.join(EVALUATION_PATH, 'avg_prec_recall.csv'), index=True)
    print('\n Results of evaluation saved to directory "%s"' % os.path.relpath(EVALUATION_PATH, PROJ_ROOT))
Exemple #4
0
def eval_run_func(params):
    from evaluation.evaluator import Evaluator

    # get input parameters
    model_dir = params['model_dir']
    basic_model = params['basic_model']
    evaluate_model = params['evaluate_model']
    input_shape = params['input_shape']
    rounds = params['rounds']
    valid_stocks = params['valid_stocks']
    _evaluator = Evaluator(model_dir=model_dir, input_shape=input_shape)
    BAR, EAR = _evaluator.evaluate(basic_model, evaluate_model, valid_stocks,
                                   rounds)
    return BAR, EAR
Exemple #5
0
    def __init__(self, output_dir):
        if cfg.TRAIN.FLAG:
            self.model_dir = os.path.join(output_dir, 'Model')
            self.image_dir = os.path.join(output_dir, 'Image')
            self.log_dir = os.path.join(output_dir, 'Log')
            mkdir_p(self.model_dir)
            mkdir_p(self.image_dir)
            mkdir_p(self.log_dir)
            self.summary_writer = FileWriter(self.log_dir)

        self.max_epoch = cfg.TRAIN.MAX_EPOCH
        self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL

        s_gpus = cfg.GPU_ID.split(',')
        self.gpus = [int(ix) for ix in s_gpus]
        self.num_gpus = len(self.gpus)
        self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus
        torch.cuda.set_device(self.gpus[0])
        cudnn.benchmark = True
        
        # load fasttext embeddings (e.g., birds.en.vec)
        path = os.path.join(cfg.DATA_DIR, cfg.DATASET_NAME + ".en.vec")
        txt_dico, _txt_emb = load_external_embeddings(path)
        txt_emb = nn.Embedding(len(txt_dico), 300, sparse=False)
        txt_emb.weight.data.copy_(_txt_emb)
        txt_emb.weight.requires_grad = False
        self.txt_dico = txt_dico
        self.txt_emb = txt_emb
        
        # load networks and evaluator
        self.networks = self.load_network()
        self.evaluator = Evaluator(self.networks, self.txt_emb)
        
        # visualizer to visdom server
        self.vis = Visualizer(cfg.VISDOM_HOST, cfg.VISDOM_PORT, output_dir)
        self.vis.make_img_window("real_im")
        self.vis.make_img_window("fake_im")
        self.vis.make_txt_window("real_captions")
        self.vis.make_txt_window("genr_captions")        
        self.vis.make_plot_window("G_loss", num=7, 
                                  legend=["errG", "uncond", "cond", "latent", "cycltxt", "autoimg", "autotxt"])
        self.vis.make_plot_window("D_loss", num=4, 
                                  legend=["errD", "uncond", "cond", "latent"])
        self.vis.make_plot_window("KL_loss", num=4, 
                                  legend=["kl", "img", "txt", "fakeimg"])
        
        self.vis.make_plot_window("inception_score", num=2,
                                 legend=["real", "fake"])
        self.vis.make_plot_window("r_precision", num=1)
def evaluate_multiple_experiments(name_pattern, config_base_dir,
                                  user_dir_override):
    for config_path in glob(path.join(config_base_dir, name_pattern)):
        print('Evaluating experiment ',
              path.basename(config_path).replace('.json', ''))
        params = get_params_from_config(config_path, user_dir_override)
        model = Model(checkpoint_interval=10, model_params=params)
        evaluator = Evaluator(model)
        all_flows = evaluator.flows_over_epochs(every_nth=10)

        result_path = path.join(
            user_dir_override, 'training', 'results',
            path.basename(config_path).replace('.json', '.pkl'))
        with open(result_path, 'wb') as f:
            pkl.dump(all_flows, f)
    def __init__(self, use_old_model, use_time, port, python_port, train, evaluate):
        self._use_old_model = use_old_model
        self._use_time = use_time
        self._port = port
        self._python_port = python_port
        self._train = train
        self._evaluate = evaluate

        if use_old_model:
            self._models_folder = 'old_model'
        else:
            self._models_folder = 'new_model'

        self._evaluator = Evaluator(self._port, self._python_port)
        print self._models_folder
    def run_nrt_experiment(self):

        self.history_logs = edict()
        self.history_logs['Train'] = []
        self.history_logs['Val'] = []

        for dataName in sorted(os.listdir(Path(self.cfg.nrt_data_folder) / self.cfg.input_folder)):
            self.dataName = dataName
            print(f"\n==> {self.dataName}")

            self.dataloaders = self.train_val_loader(num=self.cfg.size_of_train)
    
            self.optimizer = torch.optim.Adam([dict(params=self.model.parameters(), lr=self.cfg.learning_rate, weight_decay=self.cfg.weight_decay)])
            # self.history_logs = {'Train': np.zeros((len(metrics)+1, self.cfg.max_epoch)), 
                            # 'Val': np.zeros((len(metrics)+1, self.cfg.max_epoch))}

            # --------------------------------- Train -------------------------------------------
            for epoch in range(0, self.cfg.max_epoch):
                print(f"\n==> train epoch: {epoch}/{self.cfg.max_epoch}")
                valid_logs = self.train_one_epoch(epoch)
                    
                # do something (save model, change lr, etc.)
                if self.cfg.max_score < valid_logs['iou_score']:
                    max_score = valid_logs['iou_score']
                    torch.save(self.model, self.model_url)
                    print('Model saved!')
                    
                if epoch == 10:
                    self.optimizer.param_groups[0]['lr'] = self.cfg.learning_rate * 0.1
                    print(f"Decrease decoder learning rate to {self.optimizer.param_groups[0]['lr']}!")

                # save learning history
                self.plot_and_save_learnHistory()

            self.cfg.data_folder = self.cfg.nrt_data_folder
            self.cfg.modelPath = self.savePath
            self.evaluator = Evaluator(self.cfg)

            url = Path(self.cfg.nrt_data_folder) / self.cfg.input_folder / self.dataName
            print(url)
            predMap = self.evaluator.inference(url, self.savePath)
def test(cls, X_train, y_train, X_test, y_test, X_syn, y_syn, train_on, 
         title, repeated, save_path=None):
    """
    Train and test classification model for inner-corpus evaluation
    @param cls: Initialized classifier given hyperparameters
    @param X_train: Real training data
    @param y_train: Labels for real training data
    @param X_test: Test data
    @param y_test: Labels for test data
    @param X_syn: Synthetic training data
    @param y_syn: Labels for synthetic training data
    @param train_on: Which training data will be used
    @param title: Title for generated image of confusion matrix 
    @param repeated: Number of repeated times for each classification
    @save_path: File path to save the generated image
    @return: Mean and standard deviation of test recall
    """
    start = time.time()
    eva = Evaluator(X_train, y_train, X_test, y_test, X_syn, y_syn, cls=cls, 
                    repeated=repeated)
    if train_on == 'real':
        mean_recall, std_recall, mean_cm = eva.real()
    elif train_on == 'syn':
        mean_recall, std_recall, mean_cm = eva.syn()
    elif train_on == 'real+syn':
        mean_recall, std_recall, mean_cm = eva.real_plus_syn()    
    end = time.time()
    print("time used: {} s".format(time_converter(start, end)))
    print("inner-corpus test - mean: {}, std: {}".format(mean_recall, 
                                                         std_recall))
    plot_confusion_matrix(mean_cm, title=title)
    if save_path:
        plt.savefig(save_path, bbox_inches='tight')
        print("Successfully generated {}".format(save_path))
    else:
        plt.show() 
    plt.close()
    return mean_recall, std_recall
Exemple #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--file_name', type=str, help='File name in manual_src to run.', default=None)
    parser.add_argument('-g', '--gen_src_count', type=int, help='Number of automatically generated source to run.', default=0)
    parser.add_argument('-n', '--num_seeds', type=int, help='Number of seeds that will run per source file.', default=2)
    args = parser.parse_args()

    seeds = [10 * s for s in range(args.num_seeds)]
    evaluator = Evaluator(seeds, 'evaluation/manual_src', 'evaluation/__genned')

    if args.file_name is not None:
        evaluator.eval_with_manual_src_file(args.file_name)
    elif args.gen_src_count > 0:
        for _ in range(args.gen_src_count):
            evaluator.eval_with_gen_src()
    else:
        evaluator.eval_with_manual_src_all()

    dnn_approx_result, dnn_result, vanila_result = evaluator.get_all_results()

    print('All results.')
    print('DNN + Approx')
    print_result(dnn_approx_result)
    print('DNN')
    print_result(dnn_result)
    print('Vanila')
    print_result(vanila_result)

    dnn_approx_avg, dnn_avg, vanila_avg = evaluator.get_all_avg_results()

    print(print_bar_double)
    print('Overall avg results.')
    print('DNN + Approx')
    print(dnn_approx_avg)
    print('DNN')
    print(dnn_avg)
    print('Vanila')
    print(vanila_avg)
Exemple #11
0
def evaluate_labeling(dir_path, labeling: Dict[str, Dict[str, int]], key_path: str = None, maxLabels= 2) \
        -> Dict[str, Dict[str, float]]: #RL maxLabels added
    """
    labeling example : {'become.v.3': {'become.sense.1':3,'become.sense.5':17} ... }
    means instance become.v.3' is 17/20 in sense 'become.sense.5' and 3/20 in sense 'become.sense.1'
    :param key_path: write produced key to this file
    :param dir_path: SemEval dir
    :param labeling: instance id labeling
    :return: FNMI, FBC as calculated by SemEval provided code
    """
    logging.info('starting evaluation key_path: %s' % key_path)

    def get_scores(gold_key, eval_key):

        ret = {}
        # for metric, jar, column in [
        #     #         ('jaccard-index','SemEval-2013-Task-13-test-data/scoring/jaccard-index.jar'),
        #     #         ('pos-tau', 'SemEval-2013-Task-13-test-data/scoring/positional-tau.jar'),
        #     #         ('WNDC', 'SemEval-2013-Task-13-test-data/scoring/weighted-ndcg.jar'),
        #     ('FNMI', os.path.join(dir_path, 'scoring/fuzzy-nmi.jar'), 1),
        #     ('FBC', os.path.join(dir_path, 'scoring/fuzzy-bcubed.jar'), 3),
        # ]:
        #     logging.info('calculating metric %s' % metric)
        #     res = subprocess.Popen(['java', '-jar', jar, gold_key, eval_key], stdout=subprocess.PIPE).stdout.readlines()
        #     # columns = []
        #     for line in res:
        #         line = line.decode().strip()
        #         if line.startswith('term'):
        #             # columns = line.split('\t')
        #             pass
        #         else:
        #             split = line.split('\t')
        #             if len(split) > column:
        #                 word = split[0]
        #                 # results = list(zip(columns[1:], map(float, split[1:])))
        #                 result = split[column]
        #                 if word not in ret:
        #                     ret[word] = {}
        #                 ret[word][metric] = float(result)

        #+RL
        script = [
            "python2.7", "./spanish-lex-sample/score/score", eval_key,
            gold_key, './spanish-lex-sample/test/emptysensemap'
        ]
        res = subprocess.Popen(" ".join(script),
                               shell=True,
                               env={
                                   "PYTHONPATH": "."
                               },
                               stdout=subprocess.PIPE).stdout.readlines()

        ret['all'] = {}
        splitted = res[2].strip().split()
        ret['all']['precision'] = float(splitted[1])
        ret['all']['correct'] = float(
            str(splitted[2].decode()).replace('(', ''))
        ret['all']['attempted'] = float(splitted[5])
        splitted = res[3].strip().split()
        ret['all']['recall'] = float(splitted[1])
        ret['all']['total'] = float(splitted[5])
        splitted = res[4].strip().split()
        ret['all']['attemptedPct'] = float(splitted[1])
        #-
        return ret

    def getGoldKeySENSEVAL2(goldPath):  #+RL
        with open(os.path.join(dir_path, goldPath), 'r') as fgold:
            goldKey = dict()
            for line in fgold.readlines():
                splitted = line.strip().split()
                #if splitted[0] == lemma:
                instance = dict()
                graded = dict()
                rest = splitted[2:]
                for index in rest:
                    graded[splitted[0] + '.' + index] = 1.0 / len(rest)
                instance[splitted[1]] = graded
                if not splitted[0] in goldKey:
                    goldKey[splitted[0]] = instance
                else:
                    goldKey[splitted[0]].update(instance)
        return goldKey

    def dictToJ(dictionary):  #+RL
        HashMap = autoclass('java.util.HashMap')
        String = autoclass('java.lang.String')
        Double = autoclass('java.lang.Double')
        map = HashMap()
        for token, instances in dictionary.items():
            jToken = String(token)
            instanceMap = HashMap()
            for instance, labels in instances.items():
                jInstance = String(instance)
                labelMap = HashMap()
                sum_applicabilities = sum([a for _, a in labels.items()])
                for label, applicability in labels.items():
                    if sum_applicabilities > 1:
                        applicability /= sum_applicabilities
                    jLabel = String(label)
                    jApplicability = Double(applicability)
                    labelMap.put(jLabel, jApplicability)
                instanceMap.put(jInstance, labelMap)
            map.put(jToken, instanceMap)
        return map

    def getTrainingInstances(trainingSets):  #+RL

        HashSet = autoclass('java.util.HashSet')
        String = autoclass('java.lang.String')
        listJTrainingSets = []
        for trainingSet in trainingSets:
            jTrainingSet = HashSet()
            for instance in trainingSet:
                jInstance = String(instance)
                jTrainingSet.add(jInstance)
            listJTrainingSets.append(jTrainingSet)
        return listJTrainingSets

    def printTrainingSets(listJTrainingSets):  #+RL
        trainingSet = 1

        for trainingInstances in listJTrainingSets:
            print(
                '---------------------------------------------Training set %d \n'
                % trainingSet)
            entrySetIterator = trainingInstances.iterator()
            string = ''
            while entrySetIterator.hasNext():
                e = entrySetIterator.next()
                string += e + ', '
            print(string)
            trainingSet += 1

    def mapSenses(trainingInstances, goldMap, labelingMap, maxLabels):  #+RL
        GradedReweightedKeyMapper = autoclass(
            'edu.ucla.clustercomparison.GradedReweightedKeyMapper')
        mapper = GradedReweightedKeyMapper()
        allRemappedTestKey = {}
        remappedTestKey = mapper.convert(goldMap, labelingMap,
                                         trainingInstances)
        #print(remappedTestKey)
        convertedSet = remappedTestKey.entrySet()
        convertedIterator = convertedSet.iterator()
        while convertedIterator.hasNext():
            e = convertedIterator.next()
            doc = e.getKey()
            instRatings = e.getValue()
            instanceIterator = instRatings.entrySet().iterator()
            while instanceIterator.hasNext():
                i = instanceIterator.next()
                instance = i.getKey()
                labelIterator = i.getValue().entrySet().iterator()
                labelList = []
                while labelIterator.hasNext():
                    l = labelIterator.next()
                    label = l.getKey()
                    applicability = l.getValue()
                    # print(f'{label} -----{applicability}')
                    labelList.append((label, applicability))
                labelList.sort(key=lambda x: x[1], reverse=True)
                allRemappedTestKey[instance] = labelList[0:maxLabels]
        return allRemappedTestKey

    with tempfile.NamedTemporaryFile('wt') as fout:
        lines = []
        #+RL
        goldPath = 'key'
        goldKey = getGoldKeySENSEVAL2(goldPath)
        allInstances = []
        for _, v in goldKey.items():
            for k1, _ in v.items():
                allInstances.append(k1)
        indices = list(range(0, len(allInstances)))
        random.seed(18)
        random.shuffle(indices)
        trainingSets = [set() for _ in range(0, 5)]
        for i in range(0, len(allInstances)):
            instance = allInstances[i]
            toExclude = i % len(trainingSets)
            for j in range(0, len(trainingSets)):
                if j != toExclude:
                    trainingSets[j].add(instance)
        #print(trainingSets)
        # termToNumberSenses = {}
        # for e in goldKey.items():
        #     term = e[0]

        #     senses = set()
        #     for ratings in goldKey[term].values():
        #         for sense in ratings.keys():
        #             senses.update(sense)

        #     termToNumberSenses[term] = len(senses)

        listJTrainingInstances = getTrainingInstances(trainingSets)
        #TrainingSets(listJTrainingInstances)
        goldMap = dictToJ(goldKey)
        lemmaLabeling = {}
        # print(labeling)
        for k, v in labeling.items():
            lemma = k.split('.')[0]
            if not lemma in lemmaLabeling:
                lemmaLabeling[lemma] = {k: v}
            else:
                lemmaLabeling[lemma][k] = v
        labelingMap = dictToJ(lemmaLabeling)

        lines = []
        global_test_key = {}
        for jTrainingInstances in listJTrainingInstances:
            testKey = mapSenses(jTrainingInstances, goldMap, labelingMap,
                                maxLabels)
            # print(sorted(testKey.items(), key= lambda x: x[0]))
            global_test_key.update(testKey)
            for instance, label in testKey.items():

                clusters_str = ' '.join(x[0].split('.')[1]
                                        for x in label[0:maxLabels])

                lines.append('%s %s %s' %
                             (instance.split('.')[0], instance, clusters_str))
        evaluator = Evaluator(goldKey, global_test_key)
        evals = evaluator.semeval_2013_task_13_metrics()
        evalKey = key_path
        logging.info('writing key to file %s' % evalKey)

        with open(evalKey, 'w', encoding="utf-8") as fout2:
            lines = sorted(lines)
            fout2.write('\n'.join(lines))
        scores = get_scores(
            os.path.join(dir_path,
                         goldPath),  #'keys/gold/all.key'), RL goldPath added
            evalKey)  #RL  task added
        scores['all'].update(evals)
        print(scores)
        #-
        # goldPath = 'keys/gold/all.key'
        # for instance_id, clusters_dict in labeling.items():
        #     clusters = sorted(clusters_dict.items(), key=lambda x: x[1])
        #     clusters_str = ' '.join([('%s/%d' % (cluster_name, count)) for cluster_name, count in clusters])
        #     lemma_pos = instance_id.rsplit('.', 1)[0]
        #     lines.append('%s %s %s' % (lemma_pos, instance_id, clusters_str))
        # fout.write('\n'.join(lines))
        # fout.flush()

        # scores = get_scores(os.path.join(dir_path, goldPath), #'keys/gold/all.key'), RL goldPath added
        #                 fout.name,task) #RL  task added

        # if key_path:
        #     logging.info('writing key to file %s' % key_path)
        #     with open(key_path, 'w', encoding="utf-8") as fout2:
        #         fout2.write('\n'.join(lines))

    return scores
    def __init__(self, training_properties, datasetloader, device):
        super(SingleModelNerTrainer, self).__init__(training_properties, datasetloader, device)

        self.scorer = NerScorer(datasetloader.ner_vocab)
        self.dev_evaluator, self.test_evaluator = Evaluator().evaluator_factory("single_model_ner_evaluator",
                                                                                self.device)
Exemple #13
0
import json

from evaluation.evaluator import Evaluator

if __name__ == '__main__':
    with open('dataset/dev-predictions-final-it4.json', 'r') as f:
        bad_format_predictions = json.loads(f.read())
        predictions = {}
        for question_id, predictions_list in bad_format_predictions.iteritems(
        ):
            predictions[question_id] = predictions_list[0]

    evaluator = Evaluator('dataset/dev.json')
    print evaluator.ExactMatch(predictions)
    print evaluator.F1(predictions)
    datasets = {}

    dataset_splits = DatasetSplitter.generate_splits(config)
    transformations = TransformsGenerator.get_final_transforms(config)

    for key in dataset_splits:
        path, batching_config, split = dataset_splits[key]
        transform = transformations[key]

        datasets[key] = VideoDataset(path, batching_config, transform, split)

    trainer = Trainer(config, model, datasets["train"], logger)
    evaluator = Evaluator(config,
                          datasets["validation"],
                          logger,
                          action_sampler=None,
                          logger_prefix="validation")

    # Resume training
    try:
        trainer.load_checkpoint(model)
    except Exception as e:
        logger.print(e)
        logger.print("Cannot play without loading checkpoint")
        exit(1)

    model.eval()
    dataloader = evaluator.dataloader  # Uses validation dataloader
    #dataset_index = int(input(f"- Insert start sample index in [0, {len(dataloader)}): "))
    dataset_index = 0
Exemple #15
0
    # Used to compute the number of weights to use.
    feature_counter = FeatureCounter()

    training_titles = set()
    training_examples = ReadExamples(FLAGS.input_train_features,
                                     feature_counter, FLAGS.max_train_articles,
                                     training_titles)
    random.shuffle(training_examples)

    dev_titles = set()
    dev_examples = ReadExamples(FLAGS.input_dev_features, feature_counter,
                                FLAGS.max_dev_articles, dev_titles)
    dev_question_annotations = ReadQuestionAnnotations(
        FLAGS.input_dev_articles)
    dev_evaluator = Evaluator(path=FLAGS.input_dev,
                              restrict_to_titles=dev_titles)

    # Use a small set of articles for computing the metrics on the training set.
    training_metric_titles = set(
        random.sample(training_titles, len(dev_titles))
    ) if len(training_titles) > len(dev_titles) else training_titles
    training_metric_examples = [
        example for example in training_examples
        if example.article_title in training_metric_titles
    ]
    training_question_annotations = ReadQuestionAnnotations(
        FLAGS.input_train_articles)
    training_evaluator = Evaluator(path=FLAGS.input_train,
                                   restrict_to_titles=training_metric_titles)

    logger.info('Using %d features.', feature_counter.NumFeatures())
Exemple #16
0
    # raw_input("done")

    editDistGroup = dict()
    for dist, qaIdByDist in groupby(sorted(editDist.iteritems(),
                                           key=lambda x: x[1]),
                                    key=lambda x: x[1]):
        editDistGroup[dist] = list(qaIdByDist)

    predFile = "./output/dev-predictions-it3.json"
    jsonDataFile = "./dataset/json/dev.json"
    # predFile = "./dev-predictions-it3.json"
    # jsonDataFile = "./dev.json"
    with open(predFile, "r") as fp:
        predDict = json.load(fp)

    evaluator = Evaluator(jsonDataFile)
    exactMatchRateList = list()
    F1List = list()
    for dist in sorted(editDistGroup.keys()):
        predSubDict = dict()
        for qaId, _ in editDistGroup[dist]:
            predSubDict[qaId] = predDict[qaId]
        exactMatchRate = evaluator.ExactMatch(predSubDict)
        F1 = evaluator.F1(predSubDict)
        exactMatchRateList.append(exactMatchRate)
        F1List.append(F1)
        print "edit dist ", dist
        print "number of sample ", len(editDistGroup[dist])
        print "exact match ", exactMatchRate
        print "F1 ", F1
        print
Exemple #17
0
    # raw_input("done")

    editDistGroup = dict()
    for dist, qaIdByDist in groupby(sorted(editDist.iteritems(),
                                           key=lambda x: x[1]),
                                    key=lambda x: x[1]):
        editDistGroup[dist] = list(qaIdByDist)

    predFile = "./output/dev-predictions-it3.json"
    jsonDataFile = "./dataset/json/dev.json"
    # predFile = "./dev-predictions-it3.json"
    # jsonDataFile = "./dev.json"
    with open(predFile, "r") as fp:
        predDict = json.load(fp)

    evaluator = Evaluator(jsonDataFile)
    exactMatchRateList = list()
    F1List = list()
    HumanF1List = list()

    human_predictions = {}
    with open(jsonDataFile, "r") as fp:
        human_articles = json.load(fp)['data']
    for article in human_articles:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                if len(qa['answers']) > 1:
                    human_predictions[qa['id']] = qa['answers'].pop(1)['text']
    human_evaluator = Evaluator(articles=human_articles)

    for dist in sorted(editDistGroup.keys()):
Exemple #18
0
        dictionary = Dictionary(
            FLAGS.input_featuredict,
            FLAGS.ablate_features.split(',') if FLAGS.ablate_features else [])
        logger.info('Using %d features.', dictionary.NumFeatures())

    training_titles = set()
    training_examples = ReadExamples(FLAGS.input_train_features, dictionary,
                                     FLAGS.max_train_articles, training_titles)
    random.shuffle(training_examples)

    dev_titles = set()
    dev_examples = ReadExamples(FLAGS.input_dev_features, dictionary,
                                FLAGS.max_dev_articles, dev_titles)
    dev_question_annotations = ReadQuestionAnnotations(
        FLAGS.input_dev_articles)
    dev_evaluator = Evaluator(FLAGS.input_dev, dev_titles)

    # Use a small set of articles for computing the metrics on the training set.
    training_metric_titles = set(
        random.sample(training_titles, len(dev_titles))
    ) if len(training_titles) > len(dev_titles) else training_titles
    training_metric_examples = [
        example for example in training_examples
        if example.article_title in training_metric_titles
    ]
    training_question_annotations = ReadQuestionAnnotations(
        FLAGS.input_train_articles)
    training_evaluator = Evaluator(FLAGS.input_train, training_metric_titles)

    # Filter the training questions for the learning curve.
    num_training_questions = 0
Exemple #19
0
                    '')
flags.DEFINE_string('input-features', 'dataset/test-featuresbucketized.proto',
                    '')
flags.DEFINE_integer('num-features', 186194776, '')
flags.DEFINE_string('input-model', 'dataset/model', '')
flags.DEFINE_integer('min-articles', None, '')

if __name__ == '__main__':
    feature_counter = FeatureCounter(num_features=FLAGS.num_features)

    titles = set()
    examples = ReadExamples(FLAGS.input_features, feature_counter,
                            FLAGS.min_articles, titles)
    random.shuffle(examples)
    question_annotations = ReadQuestionAnnotations(FLAGS.input_articles)
    evaluator = Evaluator(path=FLAGS.input, restrict_to_titles=titles)

    inputs = GetInputPlaceholders()
    variables = GetVariables(feature_counter)
    logits = GetLogits(inputs, variables)
    _, predict_op = tf.nn.top_k(logits, 1)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, FLAGS.input_model)

        ComputeAndDisplayMetrics(sess, inputs, None, predict_op, examples,
                                 question_annotations, evaluator, '')

        # if FLAGS.print_errors:
        #     for example in examples:
Exemple #20
0
                        default=None,
                        help="Number of sequences to evaluate (default: all).")
    parser.add_argument("-mp",
                        action="store_true",
                        help="Use multiprocessing.")
    parser.add_argument(
        "--out",
        required=False,
        type=str,
        help="Evaluation output file (file endings will be attached).")
    args = parser.parse_args()

    words = set(read_lines(args.words))
    print(len(words))

    evaluator = Evaluator(words)

    start = time.monotonic()

    correct_sequences = read_file(args.correct)[:args.n]
    corrupt_sequences = read_file(args.misspelled)[:args.n]
    predicted_sequences = read_file(args.predictions)[:args.n]

    n_cpus = mp.cpu_count() if args.mp else 1

    with mp.Pool(n_cpus) as pool:
        results = pool.starmap(
            evaluator.evaluate_sample,
            list(zip(correct_sequences, corrupt_sequences,
                     predicted_sequences)))
Exemple #21
0
                    '')
flags.DEFINE_string('input-featuredict', 'dataset/featuredictbucketized.proto',
                    '')
flags.DEFINE_string('input-model', 'dataset/model13-it3', '')
flags.DEFINE_integer('min-articles', None, '')
flags.DEFINE_boolean('print-errors', False, '')

if __name__ == '__main__':
    dictionary = Dictionary(FLAGS.input_featuredict, [])

    titles = set()
    examples = ReadExamples(FLAGS.input_features, dictionary,
                            FLAGS.min_articles, titles)
    random.shuffle(examples)
    question_annotations = ReadQuestionAnnotations(FLAGS.input_articles)
    evaluator = Evaluator(FLAGS.input, titles)

    inputs = GetInputPlaceholders()
    variables = GetVariables(dictionary)
    logits = GetLogits(inputs, variables)
    _, predict_op_top_1 = tf.nn.top_k(logits, 1)
    _, predict_op_top_3 = tf.nn.top_k(logits, 3)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, FLAGS.input_model)

        ComputeAndDisplayMetrics(sess, inputs, None, predict_op_top_3,
                                 examples, question_annotations, evaluator, '')

        if FLAGS.print_errors:
Exemple #22
0
    predictions = {}
    num_same_counts = Counter()
    for article in articles:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                if len(qa['answers']) >= 3:
                    num_same_counts[3 - len(
                        set([
                            Evaluator.CleanAnswer(answer['text'])
                            for answer in qa['answers'][0:3]
                        ])) + 1] += 1

                if len(qa['answers']) > 1:
                    predictions[qa['id']] = qa['answers'].pop(1)['text']

    evaluator = Evaluator(articles=articles)
    print 'Exact match:', round(evaluator.ExactMatch(predictions), 1)
    print 'F1:', round(evaluator.F1(predictions), 1)
    total_num_same_count = sum(num_same_counts.values())
    for num_same, count in sorted(num_same_counts.items()):
        print num_same, 'same:', round(100.0 * count / total_num_same_count, 1)

    with open('dataset/dev-answertypetags.json') as fileobj:
        tags = json.loads(fileobj.read())

    print len(tags), 'tagged questions'
    for tag, _ in Counter(tags.values()).most_common():
        num_correct = 0
        total_f1 = 0
        num_total = 0
        for question_id, _ in filter(lambda x: x[1] == tag, tags.items()):
Exemple #23
0
    if extractions is not None:
        for i, extraction in enumerate(extractions):
            points = str(extraction.points)
            correct_points = str(extraction.correct_points)
            file.write('Segment ' + str(i + 1) + '\n')
            file.write('Points: ' + points + '\n')
            file.write('Given points: ' + correct_points + '\n')
            file.write('------------\n')

    file.close()


stop = False
roads = get_roads_from_xml_file(path_roads)
evaluator = Evaluator()
total_statistics = {}
total_len_extracted_previous = 0
total_len_reference_previous = 0
evaluation_count = 0

for image_name in os.listdir(path_images):
    if not stop and image_name[-3:] == 'png':
        image_name_list = image_name.split('_')

        if len(image_name_list) < 3:
            continue

        road_name = image_name_list[0]
        segment_number = int(image_name_list[1])
        zoom_level = int(image_name_list[-2][1:])