def maketextveccluster2(doc):
	keylist = doc.keys()
	docvec = {}
	docvec2 = {}
	docvec3 = {}
	for key in keylist:
		wordlist = doc[key]
		word2freq = morphological_analysis(wordlist)
		voclist = word2freq.keys()
		N  = sum(np.array(word2freq.values()) + 1)
		M = np.array([wordvec(k) for k in voclist])
		ClusterNumber = int((int(np.log(len(voclist)))+ 1))
		kmeans = KMeans(n_clusters=ClusterNumber, random_state=1)
		kmeans_model = kmeans.fit(M)
		vectorMat = kmeans_model.cluster_centers_
		labels = kmeans_model.labels_
		labels = np.array(labels)
		veclist = []
		veclist2 = []
		veclist3 = []
		for k in xrange(len(vectorMat)):
			v = (vectorMat[k]/np.linalg.norm(vectorMat[k]))
			v2  = v * ((np.log(len(labels[labels == k]) + 1))/np.log(N))
			v3  = v * (float(len(labels[labels == k]))/float(sum(np.array(word2freq.values()))))
			veclist.append(v)
			veclist2.append(v2)
			veclist3.append(v3)
		docvec[key] = np.array(veclist)
		docvec2[key] = np.array(veclist2)
		docvec3[key] = np.array(veclist3)
	return docvec,docvec2,docvec3
#data = word2vec.Text8Corpus('alltopsnews.txt')
#model= word2vec.Word2Vec(data, size=200)
#model.save("Topnewsmodel200_2.model")
model = word2vec.Word2Vec.load("Topnewsmodel200.model")
#model = word2vec.Word2Vec.load("alltext200.model")
model = word2vec.Word2Vec.load("Topnewsmodel200_2.model")


voclist = model.vocab.keys()
veclist = {}
for k in voclist:
	veclist[k] = np.array(model[k]/np.linalg.norm(model[k]))
	#veclist[k] = np.array(model[k])
M = np.array(veclist.values())
features = M
kmeans = KMeans(n_clusters=DimentionN, random_state=100)
#kmeans = KMeans(n_clusters=500, random_state=100)
#kmeans_model1000 = kmeans.fit(features)
kmeans_model1000 = kmeans.fit(features)
#xmeans利用時
#from xmeans import XMeans
#x_means = XMeans(random_state = 1).fit(M)
#labels = x_means.labels_
#features = features500
kmeans_model = kmeans_model1000
labels = kmeans_model.labels_
d = zip(labels, features)
voclist = veclist.keys()
word2vecdic = dict(zip(voclist, labels))

def maketext(textid,Folda = "businesstexts"):
#word2vecdic = pickle.load(open("yahookanjokaiseki/word2vecdic" + str(DimentionN) + ".dump","r"))
#word2vecdic = pickle.load(open("yahookanjokaiseki/word2vecdic.dump"))
#word2vecdic = pickle.load(open("word2vecdic_500_2013_2014_2015.dump","r"))
#word2vecdic = pickle.load(open("word2vecdic_500_2013_2014_2015_normed.dump","r"))

#極性word2vecを使う場合
model = word2vec.Word2Vec.load_word2vec_format("encow14a_200_antsyn_noun_adj_verb.bin",binary = True)
voclist = model.vocab.keys()
veclist = {}
for k in voclist:
    veclist[k] = np.array(model[k]/np.linalg.norm(model[k]))

M = np.array(veclist.values())
random_state = 10
DimentionN = 500
kmeans = KMeans(n_clusters=DimentionN, random_state=random_state)
kmeans_model = kmeans.fit(M)
labels = kmeans_model.labels_
#kmeans_model.predict(M[0])
d = zip(labels, M)
voclist_ID = veclist.keys()
wordIDdic = pickle.load(open("../../AntSynDistinction/corpus/wordIDdic.dump"))
IDwordIDdic = {}
for key in wordIDdic.keys():
    IDwordIDdic[wordIDdic[key]] = key

voclist = []
for ID in voclist_ID:
    if ID != u"</s>":
        voclist.append(IDwordIDdic[int(ID)])
    else: