def aggregateResults(results):
  """Aggregates results from crosswikis by ignoring case on the anchor.

  Numerators are added up, and probabilities are averaged, weighted by their
  numerators.

  Args:
    results: a results set of the form [(anchor, entity, cprob, num, denom)]

  Returns: a new results set of the form [(anchor, entity, cprob, num, denom)]
  """
  linkCounts = {}
  linkCprobs = {}
  for anchor, entity, info, cprob in results:
    anchor=anchor.lower()
    labelCounts = getLabelCounts(info)
    num = sum([num for (num, denom) in labelCounts.values()])
    myutils.addToDict(linkCounts, (anchor, entity), num)
    myutils.addToDict(linkCprobs, (anchor, entity), num*cprob)
  denom = sum(linkCounts.values())
  results = []
  for ((anchor, entity), num) in linkCounts.items():
    cprob = linkCprobs[(anchor, entity)] / denom
    results.append((anchor, entity, cprob, num, denom))
  return results
Example #2
0
def getNumInstances(instancesFile):
  instanceCounts = {}
  for line in instancesFile:
    lineParts = line.strip().split('\t')
    string = lineParts[0]
    count = int(lineParts[1])
    myutils.addToDict(instanceCounts, string, count)
  return instanceCounts
Example #3
0
def getNumInstances(instancesFile):
    instanceCounts = {}
    for line in instancesFile:
        lineParts = line.strip().split('\t')
        string = lineParts[0]
        count = int(lineParts[1])
        myutils.addToDict(instanceCounts, string, count)
    return instanceCounts
def dedupePr():
    prFile = open(PR_OUTPUT_PATH)
    dedupedPrFile = open(PR_DEDUPED_PATH, 'w')
    prResults = readPrFile(prFile)
    prCounts = {}
    prParams = {}
    for p, c, t, precision, recall in prResults:
        myutils.addToDict(prCounts, (precision, recall), 1)
        prParams[(precision, recall)] = 'p={}, c={}, t={}'.format(p, c, t)
    for ((precision, recall), count) in prCounts.items():
        paramString = prParams[(precision, recall)]
        print('{}\t{}\t{}\t{}'.format(paramString, precision, recall, count),
              file=dedupedPrFile)
def dedupePr():
  prFile = open(PR_OUTPUT_PATH)
  dedupedPrFile = open(PR_DEDUPED_PATH, 'w')
  prResults = readPrFile(prFile)
  prCounts = {}
  prParams = {}
  for p, c, t, precision, recall in prResults:
    myutils.addToDict(prCounts, (precision, recall), 1)
    prParams[(precision, recall)] = 'p={}, c={}, t={}'.format(p, c, t)
  for ((precision, recall), count) in prCounts.items():
    paramString = prParams[(precision, recall)]
    print('{}\t{}\t{}\t{}'.format(
        paramString,
        precision,
        recall,
        count
      ),
      file=dedupedPrFile
    )