Ejemplo n.º 1
0
def init():
    global clusters
    Tweet.dim = 0
    setupTweets()
    print("computing initial centroids...")
    Centroids = canopy(M, T1, T2)
    # print len(Centroids)
    arr_M = convert_to_array(M)
    # print arr_M.shape
    arr_cent = convert_to_array(Centroids)
    # print arr_cent.shape
    global code
    print("performing k-means...")
    code, center = k_means_init(arr_M, arr_cent)
    clusters = [[None]] * len(center)
    for i in range(len(clusters)):
        clusters[i] = Cluster(TCV(0, 0, 0, 0, 0, []), [], center[i].tolist())
    print("computing ft-set...")
    for i in range(len(M)):
        M[i].cluster = code[i]
        clusters[code[i]].tweets.append(M[i])
        clusters[code[i]].tcv.sum_v = [0] * Tweet.dim
        clusters[code[i]].tcv.wsum_v = [0] * Tweet.dim
        clusters[code[i]].tcv.n = len(clusters[code[i]].tweets)
        if len(clusters[code[i]].tcv.ft_set) >= size_ftset:
            get_ftset(code[i], M[i])
        else:
            clusters[code[i]].tcv.ft_set.append(M[i])
    print("computing tweet cluster vectors...")
    for i in range(len(clusters)):
        for j in range(len(clusters[i].tweets)):
            #print(clusters[i])
            norm_fac = norm(clusters[i].tweets[j].tv)
            clusters[i].tweets[j].normtv = norm_fac
            if norm_fac == 0:
                clusters[i].tweets[j].normtv = 1
                norm_fac = 1
            newList1 = [x / norm_fac for x in clusters[i].tweets[j].tv]
            newList2 = [
                x * clusters[i].tweets[j].w for x in clusters[i].tweets[j].tv
            ]
            clusters[i].tcv.sum_v = [
                x + y for x, y in zip(clusters[i].tcv.sum_v, newList1)
            ]
            clusters[i].tcv.wsum_v = [
                x + y for x, y in zip(clusters[i].tcv.wsum_v, newList2)
            ]
Ejemplo n.º 2
0
def init():
	global clusters
	Tweet.dim = 0
	setupTweets()
	print ("computing initial centroids...")
	Centroids = canopy(M,T1,T2)
	# print len(Centroids)
	arr_M = convert_to_array(M)
	# print arr_M.shape
	arr_cent = convert_to_array(Centroids)
	# print arr_cent.shape
	global code
	print ("performing k-means...")
	code, center = k_means_init(arr_M, arr_cent)
	clusters = [[None]] * len(center)
	for i in range(len(clusters)):
		clusters[i] = Cluster(TCV(0,0,0,0,0,[]),[],center[i].tolist())
	print ("computing ft-set...")
	for i in range(len(M)):
		M[i].cluster = code[i]
		clusters[code[i]].tweets.append(M[i])
		clusters[code[i]].tcv.sum_v = [0] * Tweet.dim
		clusters[code[i]].tcv.wsum_v = [0] * Tweet.dim
		clusters[code[i]].tcv.n = len(clusters[code[i]].tweets)
		if len(clusters[code[i]].tcv.ft_set) >= size_ftset:
			get_ftset(code[i],M[i])
		else:
			clusters[code[i]].tcv.ft_set.append(M[i])
	print ("computing tweet cluster vectors...")
	for i in range(len(clusters)):
		for j in range(len(clusters[i].tweets)):
			#print(clusters[i])
			norm_fac = norm(clusters[i].tweets[j].tv)
			clusters[i].tweets[j].normtv = norm_fac
			if norm_fac == 0:
                        	clusters[i].tweets[j].normtv = 1
                        	norm_fac = 1
			newList1 = [x / norm_fac for x in clusters[i].tweets[j].tv]
			newList2 = [x * clusters[i].tweets[j].w for x in clusters[i].tweets[j].tv]
			clusters[i].tcv.sum_v = [x + y for x, y in zip(clusters[i].tcv.sum_v, newList1)]
			clusters[i].tcv.wsum_v = [x + y for x, y in zip(clusters[i].tcv.wsum_v, newList2)]
Ejemplo n.º 3
0
def init(M):
	Centroids = canopy(M,1,10)
	arr_M = convert_to_array(M)
	arr_cent = convert_to_array(Centroids)
	code = k_means_init(arr_M, arr_cent)
Ejemplo n.º 4
0
def init(M):
    Centroids = canopy(M, 1, 10)
    arr_M = convert_to_array(M)
    arr_cent = convert_to_array(Centroids)
    code = k_means_init(arr_M, arr_cent)