def weightvectors(terms, documentvectors, progress=False): n = len(documentvectors) values = terms.values() for t in values: i = t.position df = t.documentcount for dv in documentvectors: tf = dv[i] dv[i] = int(round(term.tfidf(tf, n, df), 0)) if progress: print ".", sys.stdout.softspace = 0
def makevectors(documents, terms, progress=False): documentvectors = [] nullvectors = [] for d in documents: v = vec.Vec().fromlist(list(d.vector)) v.reference(d.id) if v.norm() != 0: values = terms.values() for t in values: tf = v[t.position] df = t.documentcount n = len(documents) v[t.position] = int(round(term.tfidf(tf, n, df))) documentvectors.append(v) else: nullvectors.append(v) if progress: print ".", sys.stdout.softspace = 0 return documentvectors, nullvectors