Python getCats Examples

Programming Language: Python

Namespace/Package Name: entity.category

Method/Function: getCats

Examples at hotexamples.com: 2

Python getCats - 2 examples found. These are the top rated real world Python examples of entity.category.getCats extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: categoryManager.py Project: vmanisha/QueryExpansion

	def __init__(self, catVect, catDir, cclass):
		self.catFileDict = getCats(catDir)
		self.catVectors = catVect;
		self.catObjDict = {}
		self.catClass = cclass;

Example #2

Show file

File: getSessionStats.py Project: vmanisha/QueryExpansion

def getStatsPerQuery(argv):
  tagURL = 'http://localhost:8080/rest/annotate'
  catURL = 'http://localhost:8080/rest/graph/get-entity-categories'

  catVector = loadCategoryVector(argv[3])
  f1Dict = getCats(argv[2])
  sFound = 0.0
  sTotal = 0.0
  eTotal = set()
  eRemov = set()
  catFoundNoTerm = set()
  catNotFound = set()
  catTermFound = set()
  catEntity = set()
  outfile = open('match_session_dom.txt', 'w')
  #categoryVectors = {}
  for session in getSessionWithNL(argv[1]):
    catCount = {}
    entCount = {}
    querySpotList = {}
    for query in session:
      #find the entities in query
      try:
        spotDict = None  #tagQueryWithDexter(query, tagURL,catURL)
        querySpotList[query] = spotDict
        for text in spotDict.keys():
          for entry in spotDict[text]['cat'].split():
            catCount[entry] = catCount.setdefault(entry, 1) + 1
          entCount[text] = entCount.setdefault(text, 1) + 1
      except Exception as err:
        print err
        #print 'SESSION', session, 'CATCOUNT', catCount, 'ENTCOUNT',entCount

    found = False
    if len(catCount) > 0:
      #find the dominant entity
      maxEnt = max(entCount.values())
      #sessionQueryMapping = {}
      for query, spotList in querySpotList.iteritems():
        matchl = spotList.keys()
        for entry in matchl:
          eTotal.add(entry)
          if entCount[entry] < maxEnt:
            spotList.pop(entry, None)
            print 'Removing spot', query, entry
            eRemov.add(entry)
          else:
            #get the categories
            #catTermMatch = {}
            rquery = query.replace(entry, '')
            queryTerms = set(rquery.split())
            for cat in spotList[entry]['cat'].lower().split():
              catEntity.add(entry + '_' + cat)
              if cat in f1Dict:
                phrase1 = loadPhrasesWithScore(argv[2] + '/' + f1Dict[cat])
                pVector = catVector[cat]
                queryDict = getDictFromSet(queryTerms)
                pTotal = sum(phrase1.values())
                pset = set(phrase1.keys())
                sint = pset & queryTerms
                score = 0.0
                cscore = get_cosine(queryDict, pVector)

                for iphrase in sint:
                  score += phrase1[iphrase] / pTotal
                if len(queryTerms) > 0:
                  score *= (1.0 * len(sint)) / len(queryTerms)

                if sint:

                  outfile.write(query + '\t' + entry + '\t' + cat + '\t' +
                                str(cscore) + '\t' + ', '.join(sint) + '\n')
                  found = True
                  catTermFound.add(entry + '_' + cat)
                else:
                  outfile.write(query + '\t' + entry + '\t' + cat + '\t0\t0\n')
                  catFoundNoTerm.add(cat + '_' + entry)
              else:
                outfile.write(
                    query + '\t' + entry + '\t' + cat + '\t0\tNOT FOUND\n')
                catNotFound.add(cat + '_' + entry)

                #load the terms for category
                #check if these terms match
    if found:
      sFound += 1
    sTotal += 1
    outfile.write('\n')

  print 'Total Sessions ', sTotal
  print 'Sessions with dominant entity in AOL', sFound
  print '# Unique Entities', len(eTotal)
  print '# Removed Entities (non dominant)', len(eRemov)
  print '# no of entity types', len(catEntity)
  print '# no of entity types with terms match ', len(catTermFound)
  print '# no of entity types with no term match', len(catFoundNoTerm)
  print '# no of entity types with no match in AOL', len(catNotFound)