Esempio n. 1
0
"""run lda/plsa"""
topic_word, doc_topic = ldaAdd.runLda(lda_m, dic)
user_topic, users, users_pic_num = ldaAdd.userTopic(USER_FILE, points,
                                                    doc_topic)

if not (os.path.isfile(ZW_FILE) and os.path.isfile(DZ_FILE)):
    plsa_topic_word, plsa_doc_topic = plsaAdd.runPlsa(plsa_m, dic,
                                                      CLUS_WORD_ZERO_FILE,
                                                      ZW_FILE, DZ_FILE)
else:
    plsa_topic_word, plsa_doc_topic = plsaAdd.loadPlsa(ZW_FILE, DZ_FILE,
                                                       clus_num, len(dic))
plsa_user_topic, users, users_pic_num = ldaAdd.userTopic(
    USER_FILE, points, plsa_doc_topic)
"""trans/clus time, order score"""
sc.estTransOrder(points, users, cluster_centers)

rm_by_clus = points[:, -2] < sc.clus_k
somepoints = []
for x in points[rm_by_clus]:
    if x[1] in users:
        somepoints.append(x)
somepoints = np.array(somepoints)
print 'somepoints:'
print len(somepoints)

random.seed(100)
randpoints = random.sample(somepoints, 15000)
randpoints = np.array(randpoints)

clus_hr_sort = sc.lmsOfClusHr(users, user_topic, doc_topic, somepoints, [],
Esempio n. 2
0
    midclus.write(str(points[i,-2])+' ')
midclus.close()
#drawGmap.drawLayer(labels2, cluster_centers2, n_clusters_2, loc, 2)
clus_num = n_clusters_2

"""run lda"""
if not os.path.isfile(LDA_FILE):
	lda_m = ldaAdd.saveLda(clus_num, dic, points, LDA_FILE, LDA_ZERO_FILE)
else:
	lda_m = ldaAdd.readLda(LDA_FILE, LDA_ZERO_FILE)

topic_word, doc_topic = ldaAdd.runLda(lda_m, dic)
user_topic, users, t = ldaAdd.userTopic(USER_FILE, points, doc_topic)

"""trans/clus time, order score"""
sc.estTransOrder(points, users, cluster_centers)

rm_by_clus = points[:,-2] < sc.clus_k
somepoints = []
for x in points[rm_by_clus]:
    if x[1] in users:
        somepoints.append(x)
somepoints = np.array(somepoints)
print 'somepoints:'
print len(somepoints)

random.seed(100)
randpoints = random.sample(somepoints, 15000)
randpoints = np.array(randpoints)

trainpoints = []