def similiar(): data, ids =get_notes_data() vector = TfidfVectorizer(tokenizer=tokenize) X = vector.fit_transform(data) nbrs = NearestNeighbors(n_neighbors=3, algorithm='kd_tree').fit(X) distances, indices = nbrs.kneighbors(X) print distances,indices # [[id,1,2],[id,2,3]] print type(indices) for i in xrange(len(indices)): for j in xrange(len(indices[0])): indices[i][j] = ids[indices[i][j]] client = MongoClient() notes = client.rss.notes print indices for x in indices: notes.update({'_id': str(x[0])},{'$set': {'related':[str(i) for i in x[1:]]}}) if __name__ == '__main__': # update_notes_key_words() # similiar() for x in xrange(10): a=10 print x client = MongoClient() client.id11 = 1 print client.id11