def init(): global clusters Tweet.dim = 0 setupTweets() print("computing initial centroids...") Centroids = canopy(M, T1, T2) # print len(Centroids) arr_M = convert_to_array(M) # print arr_M.shape arr_cent = convert_to_array(Centroids) # print arr_cent.shape global code print("performing k-means...") code, center = k_means_init(arr_M, arr_cent) clusters = [[None]] * len(center) for i in range(len(clusters)): clusters[i] = Cluster(TCV(0, 0, 0, 0, 0, []), [], center[i].tolist()) print("computing ft-set...") for i in range(len(M)): M[i].cluster = code[i] clusters[code[i]].tweets.append(M[i]) clusters[code[i]].tcv.sum_v = [0] * Tweet.dim clusters[code[i]].tcv.wsum_v = [0] * Tweet.dim clusters[code[i]].tcv.n = len(clusters[code[i]].tweets) if len(clusters[code[i]].tcv.ft_set) >= size_ftset: get_ftset(code[i], M[i]) else: clusters[code[i]].tcv.ft_set.append(M[i]) print("computing tweet cluster vectors...") for i in range(len(clusters)): for j in range(len(clusters[i].tweets)): #print(clusters[i]) norm_fac = norm(clusters[i].tweets[j].tv) clusters[i].tweets[j].normtv = norm_fac if norm_fac == 0: clusters[i].tweets[j].normtv = 1 norm_fac = 1 newList1 = [x / norm_fac for x in clusters[i].tweets[j].tv] newList2 = [ x * clusters[i].tweets[j].w for x in clusters[i].tweets[j].tv ] clusters[i].tcv.sum_v = [ x + y for x, y in zip(clusters[i].tcv.sum_v, newList1) ] clusters[i].tcv.wsum_v = [ x + y for x, y in zip(clusters[i].tcv.wsum_v, newList2) ]
def init(): global clusters Tweet.dim = 0 setupTweets() print ("computing initial centroids...") Centroids = canopy(M,T1,T2) # print len(Centroids) arr_M = convert_to_array(M) # print arr_M.shape arr_cent = convert_to_array(Centroids) # print arr_cent.shape global code print ("performing k-means...") code, center = k_means_init(arr_M, arr_cent) clusters = [[None]] * len(center) for i in range(len(clusters)): clusters[i] = Cluster(TCV(0,0,0,0,0,[]),[],center[i].tolist()) print ("computing ft-set...") for i in range(len(M)): M[i].cluster = code[i] clusters[code[i]].tweets.append(M[i]) clusters[code[i]].tcv.sum_v = [0] * Tweet.dim clusters[code[i]].tcv.wsum_v = [0] * Tweet.dim clusters[code[i]].tcv.n = len(clusters[code[i]].tweets) if len(clusters[code[i]].tcv.ft_set) >= size_ftset: get_ftset(code[i],M[i]) else: clusters[code[i]].tcv.ft_set.append(M[i]) print ("computing tweet cluster vectors...") for i in range(len(clusters)): for j in range(len(clusters[i].tweets)): #print(clusters[i]) norm_fac = norm(clusters[i].tweets[j].tv) clusters[i].tweets[j].normtv = norm_fac if norm_fac == 0: clusters[i].tweets[j].normtv = 1 norm_fac = 1 newList1 = [x / norm_fac for x in clusters[i].tweets[j].tv] newList2 = [x * clusters[i].tweets[j].w for x in clusters[i].tweets[j].tv] clusters[i].tcv.sum_v = [x + y for x, y in zip(clusters[i].tcv.sum_v, newList1)] clusters[i].tcv.wsum_v = [x + y for x, y in zip(clusters[i].tcv.wsum_v, newList2)]
def init(M): Centroids = canopy(M,1,10) arr_M = convert_to_array(M) arr_cent = convert_to_array(Centroids) code = k_means_init(arr_M, arr_cent)
def init(M): Centroids = canopy(M, 1, 10) arr_M = convert_to_array(M) arr_cent = convert_to_array(Centroids) code = k_means_init(arr_M, arr_cent)