Exemple #1
0
                frame["document"] = document.frame
                for parse in parses:
                    span_array = store.array(len(parse))
                    for i, span in enumerate(parse):
                        span_array[i] = store.frame({
                            "begin": span.begin,
                            "end": span.end,
                            "qid": span.qid,
                            "prior": span.prior,
                            "pids": list(span.pids),
                            "count": span.count
                        })
                    parse_frame = store.frame({"spans": span_array})
                    frame.append("parse", parse_frame)
                writer.write(key, frame.data(binary=True))
                task.increment("categories_accepted")

                # Compute histogram over number of parses.
                for b in self.num_parses_bins:
                    if len(parses) <= b:
                        task.increment("#parses <= %d" % b)
                if self.num_parses_bins[-1] < len(parses):
                    task.increment("#parses > %d" % self.num_parses_bins[-1])

            reader.close()
        writer.close()
        rejected.close()


register_task("category-parse-generator", CategoryParseGenerator)
Exemple #2
0
            frame_cache = {
            }  # (pid, qid) -> frame containing their match statistics
            for parse, parse_match in zip(category("parse"), matches):
                for span, span_match in zip(parse.spans, parse_match):
                    span_key = (span.pids, span.qid)
                    if span_key not in frame_cache:
                        match_frame = span_match.as_frame(store)
                        frame_cache[span_key] = match_frame
                    span["fact_matches"] = frame_cache[span_key]
            writer.write(key, category.data(binary=True))
            task.increment("fact-matcher/categories-processed")
        reader.close()
        writer.close()


register_task("category-parse-fact-matcher", FactMatcherTask)


# Loads a KB and brings up a shell to compute and debug match statistics.
def shell():
    kb = load_kb("local/data/e/wiki/kb.sling")
    extractor = sling.api.FactExtractor(kb)
    matcher = FactMatcher(kb, extractor)

    parses = "local/data/e/wikicat/filtered-parses.rec"
    db = sling.RecordDatabase(parses)

    while True:
        item = raw_input("Enter item or category QID:")

        # See if a category QID was entered, if so, compute and output match
Exemple #3
0
            # Score each parse.
            parse_with_score = self.score(category)

            # Keep only the top-k parses.
            ranked_parses = sorted(parse_with_score, key=lambda x: -x[1])
            if len(ranked_parses) > max_parses:
                dropped = len(ranked_parses) - max_parses
                ranked_parses = ranked_parses[0:max_parses]
                task.increment("parses-dropped", dropped)
                task.increment("categories-with-too-many-parses")

            # Compute signature for each parse and store it in the parse.
            for parse, _ in ranked_parses:
                tokens, span_signature = self.signature(document, parse)
                parse["signature"] = tokens
                for span in parse.spans:
                    if span in span_signature:
                        span["signature"] = span_signature[span]

            # Replace the current set of parses with the ranked list.
            del category["parse"]
            for parse, _ in ranked_parses:
                category.append("parse", parse)
            task.increment("parses-kept", len(ranked_parses))
            writer.write(key, category.data(binary=True))
        reader.close()
        writer.close()


register_task("prelim-category-parse-ranker", PrelimCategoryParseRanker)