Esempio n. 1
0
    def gen_train_set(gt, query_file, train_set):
        """Trains LTR model for entity linking."""
        entity, elastic, fcache = Entity(), ElasticCache(
            ELASTIC_INDICES[0]), FeatureCache()
        inss = Instances()
        positive_annots = set()

        # Adds groundtruth instances (positive instances)
        PLOGGER.info("Adding groundtruth instances (positive instances) ....")
        for item in sorted(gt):  # qid, query, en_id, mention
            ltr = LTR(Query(item[1], item[0]), entity, elastic, fcache)
            ins = ltr.__gen_raw_ins(item[2], item[3])
            ins.features = ltr.get_features(ins)
            ins.target = 1
            inss.add_instance(ins)
            positive_annots.add((item[0], item[2]))

        # Adds all other instances
        PLOGGER.info("Adding all other instances (negative instances) ...")
        for qid, q in sorted(json.load(open(query_file, "r")).items()):
            PLOGGER.info("Query [" + qid + "]")
            ltr = LTR(Query(q, qid), entity, elastic, fcache)
            q_inss = ltr.get_candidate_inss()
            for ins in q_inss.get_all():
                if (qid, ins.get_property("en_id")) in positive_annots:
                    continue
                ins.target = 0
                inss.add_instance(ins)
        inss.to_json(train_set)
Esempio n. 2
0
    def batch_linking(self):
        """Scores queries in a batch and outputs results."""
        results = {}

        if self.__config["step"] == "linking":
            queries = json.load(open(self.__query_file))
            for qid in sorted(queries):
                results[qid] = self.link(queries[qid], qid)
            json.dump(results,
                      open(self.__output_file, "w"),
                      indent=4,
                      sort_keys=True)

        # only ranking step
        if self.__config["step"] == "ranking":
            queries = json.load(open(self.__query_file))
            for qid in sorted(queries):
                linker = self.__get_linker(Query(queries[qid], qid))
                results[qid] = linker.rank_ens()
            ranked_inss = Instances(
                sum([inss.get_all() for inss in results.values()], []))
            ranked_inss.to_json(self.__output_file)

        # only disambiguation step
        if self.__config["step"] == "disambiguation":
            inss = Instances.from_json(self.__config["test_set"])
            inss_by_query = inss.group_by_property("qid")
            for qid, q_inss in sorted(inss_by_query.items()):
                linker = self.__get_linker("")
                results[qid] = linker.disambiguate(Instances(q_inss))
            to_elq_eval(results, self.__output_file)

        PLOGGER.info("Output file: " + self.__output_file)
Esempio n. 3
0
 def tem(self):
     """Returns True if title of entity equals mention."""
     self.__load_en()
     tem = 0
     en_title = Query(self.__en_doc.get(TITLE, [""])[0]).query
     if self.__mention == en_title:
         tem = 1
     return tem
Esempio n. 4
0
 def tcm(self):
     """Returns True if title of entity contains mention """
     self.__load_en()
     tcm = 0
     en_title = Query(self.__en_doc.get(TITLE, [""])[0]).query
     if self.__mention in en_title:
         tcm = 1
     return tcm
Esempio n. 5
0
 def mct(self):
     """Returns True if mention contains the title of entity """
     self.__load_en()
     mct = 0
     en_title = Query(self.__en_doc.get(TITLE, [""])[0]).query
     if en_title in self.__mention:
         mct = 1
     return mct
Esempio n. 6
0
    def load_yerd(gt_file):
        """
        Reads the Y-ERD collection and returns a dictionary.

        :param gt_file: Path to the Y-ERD collection
        :return: dictionary {(qid, query, en_id, mention) ...}
        """
        PLOGGER.info("Loading the ground truth ...")
        gt = set()
        with open(gt_file, "r") as tsvfile:
            reader = csv.DictReader(tsvfile,
                                    delimiter="\t",
                                    quoting=csv.QUOTE_NONE)
            for line in reader:
                if line["entity"] == "":
                    continue
                query = Query(line["query"]).query
                mention = Query(line["mention"]).query
                gt.add((line["qid"], query, line["entity"], mention))
        return gt
Esempio n. 7
0
    def link(self, query):
        """Performs entity linking for the query.

        :param query: query string
        :return: annotated query
        """
        q = Query(query)
        linker = self.__get_linker(q)
        linked_ens = linker.link()
        res = {
            "query": q.raw_query,
            "processed_query": q.query,
            "results": linked_ens
        }
        return res
Esempio n. 8
0
    def link(self, query, qid=""):
        """Performs entity linking for the query.

        :param query: query string
        :return: annotated query
        """
        PLOGGER.info("Linking query " + qid + " [" + query + "] ")
        q = Query(query, qid)
        linker = self.__get_linker(q)
        if self.__config["step"] == "ranking":
            res = linker.rank_ens()
        else:
            linked_ens = linker.link()
            res = {
                "query": q.raw_query,
                "processed_query": q.query,
                "results": linked_ens
            }
        return res
Esempio n. 9
0
def main(args):
    entity = Entity()
    query = Query(args[0])
    cmns = Cmns(query, entity, cmns_th=0.1)
    print(cmns.link())