def generate_token_vectors(people): vectors = {} for ID in people: vector = generate(people[ID][data]) vectors[ID]['name'] = people[ID]['name'] vectors[ID]['tokens'] = vector return vectors
def generate_token_vectors(people): vectors = defaultdict(dict) for ID in people: try: vector = generate(people[ID]['data']) vectors[ID]['name'] = people[ID]['name'] vectors[ID]['tokens'] = vector.split() # Need a string except AttributeError: # Handling the one NoneType blob pass return vectors
def queryVector (rawQuery): cleanQuery = generate(rawQuery) tokens = cleanQuery.split(' ') doc = {} queryTfidf = {} for token in tokens: currentVal = doc.get(token, 0) newVal = currentVal + 1 doc[token] = newVal for tok in doc.keys(): #if (tok in idfIndex): #tokenIdf = idfIndex[token] #tokenTf = doc[token] #score = tokenIdf * tokenTf #queryTfidf[tok] = score tokenIdf = idfIndex.get(token, 0) tokenTf = doc.get(token, 0) score = tokenIdf * tokenTf queryTfidf[tok] = score return queryTfidf