def getTagTermAffinityScores(questions, includeCounts=True, frequentWords=None):
  # print 'Computing TagAffinity model on %d questions' % len(questions)
  if not frequentWords:
    frequentWords = set(wordvectors.getFrequentWords(questions)[0])
  ttas = {}
  tagCounts = {}
  infile_body = codecs.open(posts_body_file, 'r', 'utf-8')
  for (qid, question) in questions.items():
    for tagID in question.tags:
      tagCounts[tagID] = tagCounts.get(tagID, 0) + 1
    infile_body.seek(question.bodyByte)
    postWords = wordvectors.getWordsFromPost(infile_body.readline())
    for word in set(postWords):
      if word not in frequentWords:
        continue
      inner_dict = ttas.get(word, {})
      for tagID in question.tags:
        inner_dict[tagID] = inner_dict.get(tagID, 0) + 1
      ttas[word] = inner_dict
  infile_body.close()

  for (term, inner_dict) in ttas.items():
    for (tagID, freq) in inner_dict.items():
      inner_dict[tagID] = float(freq) / tagCounts[tagID]

  # print 'Finished TagAffinity model'
  if includeCounts:
    finalTagCounts = {}
    for (tagID, count) in tagCounts.items():
      if count >= 50:
        finalTagCounts[tagID] = count
    return (ttas, finalTagCounts)
  else:
    return ttas
def getTagTermBasedRankingScores(questionBody, ttas, tagCounts):
  postWords = wordvectors.getWordsFromPost(questionBody)
  result = {}
  for tagID in tagCounts.keys():
    result[tagID] = 1.0
  for word in postWords:
    if word in ttas:
      inner_dict = ttas[word]
      for (tagID, score) in result.items():
        result[tagID] *= (1 - inner_dict.get(tagID, 0))

  maxScore = 0.0
  minScore = 0.0
  for (tagID, score) in result.items():
    result[tagID] = 1 - score
    maxScore = max(maxScore, 1 - score)
    minScore = min(minScore, 1 - score)
  scoreRange = maxScore - minScore
  if scoreRange == 0.0:
    scoreRange = 1
  for (tagID, score) in result.items():
    result[tagID] = (score - minScore) / scoreRange
  return result
def getTagTermBasedRankingScores(questionBody, ttas, tagCounts):
    postWords = wordvectors.getWordsFromPost(questionBody)
    result = {}
    for tagID in tagCounts.keys():
        result[tagID] = 1.0
    for word in postWords:
        if word in ttas:
            inner_dict = ttas[word]
            for (tagID, score) in result.items():
                result[tagID] *= (1 - inner_dict.get(tagID, 0))

    maxScore = 0.0
    minScore = 0.0
    for (tagID, score) in result.items():
        result[tagID] = 1 - score
        maxScore = max(maxScore, 1 - score)
        minScore = min(minScore, 1 - score)
    scoreRange = maxScore - minScore
    if scoreRange == 0.0:
        scoreRange = 1
    for (tagID, score) in result.items():
        result[tagID] = (score - minScore) / scoreRange
    return result
def getTagTermAffinityScores(questions,
                             includeCounts=True,
                             frequentWords=None):
    # print 'Computing TagAffinity model on %d questions' % len(questions)
    if not frequentWords:
        frequentWords = set(wordvectors.getFrequentWords(questions)[0])
    ttas = {}
    tagCounts = {}
    infile_body = codecs.open(posts_body_file, 'r', 'utf-8')
    for (qid, question) in questions.items():
        for tagID in question.tags:
            tagCounts[tagID] = tagCounts.get(tagID, 0) + 1
        infile_body.seek(question.bodyByte)
        postWords = wordvectors.getWordsFromPost(infile_body.readline())
        for word in set(postWords):
            if word not in frequentWords:
                continue
            inner_dict = ttas.get(word, {})
            for tagID in question.tags:
                inner_dict[tagID] = inner_dict.get(tagID, 0) + 1
            ttas[word] = inner_dict
    infile_body.close()

    for (term, inner_dict) in ttas.items():
        for (tagID, freq) in inner_dict.items():
            inner_dict[tagID] = float(freq) / tagCounts[tagID]

    # print 'Finished TagAffinity model'
    if includeCounts:
        finalTagCounts = {}
        for (tagID, count) in tagCounts.items():
            if count >= 50:
                finalTagCounts[tagID] = count
        return (ttas, finalTagCounts)
    else:
        return ttas