Ejemplo n.º 1
0
def weightvectors(terms, documentvectors, progress=False):
    n = len(documentvectors)
    values = terms.values()
    for t in values:
        i = t.position
        df = t.documentcount
        for dv in documentvectors:
            tf = dv[i]
            dv[i] = int(round(term.tfidf(tf, n, df), 0))
        if progress:
            print ".",
            sys.stdout.softspace = 0
Ejemplo n.º 2
0
def makevectors(documents, terms, progress=False):
    documentvectors = []
    nullvectors = []
    for d in documents:
        v = vec.Vec().fromlist(list(d.vector))
        v.reference(d.id)
        if v.norm() != 0:
            values = terms.values()
            for t in values:
                tf = v[t.position]
                df = t.documentcount
                n = len(documents)
                v[t.position] = int(round(term.tfidf(tf, n, df)))
            documentvectors.append(v)
        else:
            nullvectors.append(v)
        if progress:
            print ".",
            sys.stdout.softspace = 0
    return documentvectors, nullvectors