Beispiel #1
0
def compute_vector_for_DOI(DOI):
    db = TinyDB(db_loc)
    paper = Query()
    this_paper_dict = db.search(paper.ownDOI == DOI)[0] #returns entry as dictionary
    name = this_paper_dict['filename']
    ##here we call marcellos code
    vector = keyExtract.getRakeKeywords(text_dir+name+'.txt')
    vectore = normVectorGen(vector)
    out_path = vector_dir + name+'.pkl'
    output = open(out_path,'wb')
    pickle.dump(vector,output)
Beispiel #2
0
import sys
import os.path
import keyExtract
from pprint import pprint

#import RAKE

test = "05629128.txt"
#test = "merkel.txt"
print keyExtract.keyWords(test)
#print keyExtract.smartWords(test)
pprint(keyExtract.smartWords(test))
#pprint(keyExtract.getRakeKeywords(test))
rakeList = keyExtract.getRakeKeywords(test)

normSum = 0

for i in rakeList:
    normSum = normSum + i[1]

newList = []

for i in rakeList:
    newList.append((i[0],i[1] / normSum))
    #print i[1]

nSum = 0

for i in newList:
    nSum = nSum + i[1]