Beispiel #1
0
        if not os.path.exists(sourcedir + docid + '.tsv'):
            continue
        docs.append(row['volid'])
        logistic.append(float(row['logistic']))
        dates.append(float(row['dateused']))

logistic = np.array(logistic)
dates = np.array(dates)

numdocs = len(docs)

categories = dict()
for field in fields:
    categories[field] = np.zeros(numdocs)

wordcounts = filecab.get_wordfreqs(sourcedir, '.tsv', docs)

for i, doc in enumerate(docs):
    ctcat = Counter()
    allcats = 0
    for word, count in wordcounts[doc].items():
        allcats += count
        for field in fields:
            if word in inquirer[field]:
                ctcat[field] += count
    for field in fields:
        categories[field][i] = ctcat[field] / (allcats + 1)

logresults = []
dateresults = []