Exemple #1
0
    def train_or_dev_or_test_2_eachworld_splitter(self, train_dev_testflag):
        assert train_dev_testflag in ["train", "dev", "test"]
        mention_path = self.args.mentions_dir + train_dev_testflag + '.json' if train_dev_testflag != "dev" else self.args.mentions_dir + "val" + ".json"
        mentions = mentions_in_train_dev_test_loader(mention_path)
        print('\n{0} mentions are now preprocessed...\n'.format(
            train_dev_testflag))

        world_2_idx2mention = {}
        skipped = 0
        for mention_data in tqdm(mentions):
            try:
                mention_json = self.mentionConverter(
                    one_line_mention=mention_data)
            except:
                skipped += 1
                print("mention id", mention_data["mention_id"],
                      "is skipped because gold cannot be found")
                continue
            world_belongingto = mention_json["gold_world"]
            if world_belongingto not in world_2_idx2mention:
                world_2_idx2mention.update({world_belongingto: {}})
            world_2_idx2mention[world_belongingto].update(
                {len(world_2_idx2mention[world_belongingto]): mention_json})

        for world, its_preprocesseddata in world_2_idx2mention.items():
            jdump(
                its_preprocesseddata,
                self.args.mentions_splitbyworld_dir + world + "/mentions.json")
def devEvalExperimentEntireDevWorldLog(experiment_logdir,
                                       t_entire_h1c,
                                       t_entire_h10c,
                                       t_entire_h50c,
                                       t_entire_h64c,
                                       t_entire_h100c,
                                       t_entire_h500c,
                                       t_entire_datapoints,
                                       epoch=0):
    l = [
        t_entire_h1c, t_entire_h10c, t_entire_h50c, t_entire_h64c,
        t_entire_h100c, t_entire_h500c
    ]
    devEvalResultWithPercent = [
        round(hits_c / t_entire_datapoints * 100, 4) for hits_c in l
    ]
    print('\nt_h1c, h10c, h50c, h64c, h100c, h500c @ Percent:\n',
          devEvalResultWithPercent)
    dump_dir = experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/'
    if not os.path.exists(dump_dir):
        os.mkdir(path=dump_dir)
    jpath = dump_dir + 'ep' + str(epoch) + 'devEntireEvalResult.json'
    j = {
        't_h1c_Dev': devEvalResultWithPercent[0],
        'h10c_Dev': devEvalResultWithPercent[1],
        'h50c_Dev': devEvalResultWithPercent[2],
        'h64c_Dev': devEvalResultWithPercent[3],
        'h100c_Dev': devEvalResultWithPercent[4],
        'h500c_Dev': devEvalResultWithPercent[5]
    }
    jdump(j=j, path=jpath)
    def logDevEvaluationOfOneWorldDuringTrain(self, h1count, h10count,
                                              h50count, h64count, h100count,
                                              h500count, data_points,
                                              trainEpoch):
        if not os.path.exists(self.experiment_logdir +
                              DevEvalDuringTrainDirForEachExperiment + '/'):
            os.mkdir(self.experiment_logdir +
                     DevEvalDuringTrainDirForEachExperiment + '/')
        if not os.path.exists(self.experiment_logdir +
                              DevEvalDuringTrainDirForEachExperiment + '/' +
                              self.world_name + '/'):
            os.mkdir(self.experiment_logdir +
                     DevEvalDuringTrainDirForEachExperiment + '/' +
                     self.world_name + '/')
        dumped_jsonpath = self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/' + self.world_name + '/' + 'devEval_ep' + str(
            trainEpoch) + '.json'

        jdump(j={
            'h1_percent': h1count / data_points * 100,
            'h10_percent': h10count / data_points * 100,
            'h50_percent': h50count / data_points * 100,
            'h64_percent': h64count / data_points * 100,
            'h100_percent': h100count / data_points * 100,
            'h500_percent': h500count / data_points * 100,
            'data_points': data_points,
            'world_name': self.world_name,
            'train_ep': trainEpoch
        },
              path=dumped_jsonpath)
Exemple #4
0
    def log_one_world(self, h1count, h10count, h50count, h64count, h100count, h500count, data_points):
        if not os.path.exists(self.experiment_logdir + 'final_' + self.dev_or_test):
            os.mkdir(self.experiment_logdir + 'final_' + self.dev_or_test)
        dumped_jsonpath = self.experiment_logdir + 'final_' + self.dev_or_test + '/' + self.world_name + '_eval.json'

        jdump(j={'h1_percent': h1count / data_points * 100,
                 'h10_percent': h10count / data_points * 100,
                 'h50_percent': h50count / data_points * 100,
                 'h64_percent': h64count / data_points * 100,
                 'h100_percent': h100count / data_points * 100,
                 'h500_percent': h500count / data_points * 100,
                 'data_points':data_points
                 }, path=dumped_jsonpath)
Exemple #5
0
    def from_oneworld_dump_preprocessed_world(self, world_name):
        '''
        :param world_name:
        :return: dump dui2title, dui2desc, dui2idx, idx2dui into ./data/worlds/$world_name/
        '''
        try:
            assert world_name in ALL_WORLDS
        except:
            AssertionError(world_name + "is not included in datasets")

        each_world_dir = self.args.dataset_dir + 'worlds/' + world_name + '/'

        if not os.path.exists(each_world_dir):
            os.mkdir(each_world_dir)

        one_world_json_path = self.args.documents_dir + world_name + '.json'
        one_world_json = oneworld_opener(one_world_json_path)

        dui2title, idx2dui, dui2idx = self.oneworld_json_2_docid2title(
            one_world_json=one_world_json)
        dui2title = self.dui2tokens_tokentrimmer(
            dui2tokens=dui2title,
            max_token=self.args.extracted_first_token_for_title)
        dui2desc_raw = self.oneworld_json_2_dui2desc_raw(
            one_world_json=one_world_json)
        dui2desc = self.dui2tokens_tokentrimmer(
            dui2tokens=dui2desc_raw,
            max_token=self.args.extracted_first_token_for_description)

        jdump(j=dui2title, path=each_world_dir + 'dui2title.json')
        jdump(j=idx2dui, path=each_world_dir + 'idx2dui.json')
        jdump(j=dui2idx, path=each_world_dir + 'dui2idx.json')
        jdump(j=dui2desc, path=each_world_dir + 'dui2desc.json')
        jdump(j=dui2desc_raw, path=each_world_dir + 'dui2desc_raw.json')

        print('\n==={0} was preprocessed==='.format(world_name))
        print('total entities in {0}:'.format(world_name), len(dui2title))
Exemple #6
0
    def on_get(self, req, resp, page_id):
        """Get request

           Keyword arguments:
           req - HTTP request
           resp - HTTP response
           page_id - Faceboook page ID
        """

        if self._check_last_access(page_id):
            self._request_events(page_id)
            self._save_last_access(page_id)

        else:
            data = self.db.get_items(page_id, self.table)

        # Remove pagination dict
        del(data['pages'])

        # Add expiration cache time in seconds
        data['cache_expiration_time'] = self.cache_expiration_time

        resp.body = u.jdump(data)
Exemple #7
0
 def dump(self):
     print(utils.jdump(self._data))