Esempio n. 1
0
def similiar():
    data, ids =get_notes_data()
    vector  = TfidfVectorizer(tokenizer=tokenize)
    X = vector.fit_transform(data)

    nbrs = NearestNeighbors(n_neighbors=3, algorithm='kd_tree').fit(X)
    distances, indices = nbrs.kneighbors(X)
    print distances,indices
    # [[id,1,2],[id,2,3]]
    print type(indices)
    for i in  xrange(len(indices)):
        for j in xrange(len(indices[0])):
            indices[i][j] = ids[indices[i][j]]
    client = MongoClient()
    notes = client.rss.notes
    print indices
    for x in indices:
        notes.update({'_id': str(x[0])},{'$set': {'related':[str(i) for i in x[1:]]}})
        


if __name__ == '__main__':
    # update_notes_key_words()	
    # similiar()    
    for x in xrange(10):
        a=10
    print x
    client = MongoClient()
    client.id11 = 1
    print client.id11