Esempio n. 1
0
 def __init__(self, signature):
     self.signature = signature
     self.score = 0.0  # aggregate score as per user-weights
     self.counts = util.MatchCounts()  # aggregate match counts
     self.example_category = None  # exemplar category for the parse
     self.rejected = 0  # no. of parses rejected by the formula
     self.selected = 0  # no. of parses allowed by the formula
Esempio n. 2
0
 def counts_for(self, span_subset=None, counts=None):
     if counts is None:
         counts = util.MatchCounts()
     for index, count in enumerate(self.span_counts):
         if span_subset is None or span_subset.is_allowed(index):
             counts.merge(count)
     return counts
Esempio n. 3
0
    def __init__(self, request):
        self.request = request
        self.error_message = None
        self.span_signatures = util.parse_to_span_signatures(request.query)

        # All counts are after the parse selector has come into play.
        self.counts_across_spans = util.MatchCounts()
        self.counts_across_selected_spans = util.MatchCounts()
        self.counts_by_span = [
            util.MatchCounts() for _ in self.span_signatures
        ]

        # Top selected parses shown.
        self.top_parses = []

        # Statistics of selected parses that aren't not shown.
        self.top_parses_not_shown = {"num": 0, "counts": util.MatchCounts()}

        # Statistics of unselected parses.
        self.unselected_parses = {"num": 0, "counts": util.MatchCounts()}
Esempio n. 4
0
  def __init__(self):
    # Exemplar category QID, frame, and parse with this signature.
    self.example_qid = None
    self.example_category = None
    self.example_parse = None

    # Aggregate stats across all (category, parse) pairs with this signature.
    # If a category has >=2 parses with this signature, then only the highest
    # scoring parse is considered.

    # Total score across all parses.
    self.score = 0.0

    # Total fact-matching statistics.
    self.fact_matches = util.MatchCounts()

    # Non-deduped total number of member items.
    self.members = 0

    # Total number of categories.
    self.num = 0
Esempio n. 5
0
  def handle_signature(self, signature, categories, form):
    score_type = form.getvalue("main_form_sort_metric")
    fact_weights = self.fact_match_weights(form)
    signature_type = form.getvalue("main_form_signature_type")

    # Sort all parses with this signature.
    output = []
    for (qid, category, parse) in categories:
      score = self.parse_score(category, parse, score_type, fact_weights)
      output.append((qid, category, parse, score))
    output.sort(key=lambda x: -x[3])

    # Get fact-matching statistics. Consider only the top parse for a category
    # if it has >1 parses with the same signature.
    category_count = defaultdict(int)
    match_counts = util.MatchCounts()
    span_match_counts = defaultdict(util.MatchCounts)
    num_members = 0
    for qid, category, parse, score in output:
      category_count[qid] += 1
      if category_count[qid] > 1:
        continue
      num_members += len(category.members)
      util.fact_matches_for_parse(parse, match_counts)
      for span in parse.spans:
        span_signature = util.span_signature(span, signature_type)
        util.fact_matches_for_span(span, span_match_counts[span_signature])

    # Write a summary.
    self._tag("div",
      "<b>%s</b>: in %d (category, parse) pairs across %d categories" % \
      (signature, len(categories), len(category_count)))
    self._tag("b", "#Items across categories: ")
    self._text("%d (incl. possible duplicates)" % num_members)

    self._br()
    self._tag("b", "Span-level fact-matching statistics")
    self.write_main_table_header(
      ["Span Signature"],
      [t.name for t in FactMatchType])
    for span_signature, span_matches in span_match_counts.iteritems():
      self._begin("tr")
      self._cell(span_signature)
      self._separator(header=False)
      self.write_fact_match_counts(span_matches)
      self._end("tr")
    self._begin("tr")
    self._cell("All")
    self._separator(header=False)
    self.write_fact_match_counts(match_counts)
    self._end("tr")
    self._end("table")

    # Give an option to generate a recordio file.
    if signature_type == "full":
      self._br()
      self._begin("table class='main_table'")
      self._begin("tr")
      self._begin("td")
      self._begin("form", id="recordio_form", method="POST", action="", \
                  target="_blank")
      self._begin_end("input", type="hidden", name="main_form_functionality", \
                      value="recordio")
      self._begin_end("input", type="hidden", name="recordio_signature", \
                      id="recordio_signature", value=signature)
      self._tag("b", "Generate recordio for this signature")
      self._br()
      self._br()
      self._text("Filename: ")
      filename = "local/data/e/wikicat/" + signature.replace("$", "_") + ".rec"
      self._begin_end("input", type="text", size=100, \
                      name="recordio_filename", value=filename)
      self._br()
      self._text("Category QIDs ('ALL' for all): ")
      self._begin_end("input", type="text", size=100, value="ALL", \
                      name="recordio_categories")
      self._br()
      self._text("Generate facts for these types:")
      self._begin_end("input", type="text", size=100, \
                      value="NEW,ADDITIONAL,SUBSUMED_BY_EXISTING", \
                      name="recordio_match_types")
      self._br()
      self._text("Generate the following facts:")
      self._br()
      count = 0
      for token in signature.split():
        if token[0] == '$' and token[1:].find("$") >= 0:
          name = "recordio_span%d" % count
          self._text("&nbsp;&nbsp;" + token + " ")
          self._begin_end("input", type="checkbox", name=name, id=name, \
                          checked="on")
          self._br()
          count += 1
      self._begin_end("input", type="hidden", name="recordio_num_spans", \
                      id="recordio_total_spans", value=count)
      self._begin_end("input", type="submit")
      self._end(["form", "td", "tr", "table"])

    # Write the individual parses in a tabular form.
    self._br()
    self._tag("b", "Categories with parses matching '" + signature + "'")
    seen = set()
    max_rows = 200
    self.write_main_table_header(
      ["Category", "Prelim parse score", "#Members", "Fact-matching score"],
      [t.name for t in FactMatchType])
    row_count = 0
    for qid, category, parse, score in output:
      if row_count >= max_rows:
        break
      if qid in seen:
        continue
      row_count += 1
      seen.add(qid)
      self._begin("tr")
      self._begin("td")
      self._form_anchor(qid + ": " + category.name, qid)
      if category_count[qid] > 1:
        more = category_count[qid] - 1
        self._text(" (%d more parse%s)" % (more, "" if more == 1 else "s"))
      self._end("td")
      self._cell(parse.score, numeric=True)
      self._cell(len(category.members), numeric=True)
      self._cell("%.4f" % self.parse_fact_score(parse, fact_weights), True)

      self._separator(header=False)
      counts = util.fact_matches_for_parse(parse)
      self.write_fact_match_counts(counts)
      self._end("tr")
    self._end("table")