예제 #1
0
def batch_em_cluster(read_directory, write_directory1, write_directory2):
    
    file_number = sum([len(files) for root, dirs, files in os.walk(read_directory)])
    
    cluster_number = 8
    init_mu = 0.1
    init_sigma = 1.0
    
    for i in range(file_number):
        vsm = np.loadtxt(read_directory + '/' + str(i + 1) + '.txt')
        data_dimension = vsm.shape[1]
        
        init_means = []
        for j in range(cluster_number):
            init_means.append(init_sigma * np.random.randn(data_dimension) + init_mu)
        
        cluster_model = cluster.EMClusterer(init_means, bias=0.1)
        
        cluster_tag = cluster_model.cluster(vsm, True, trace=False)
        
        cluster_tag_to_string = [str(x) for x in cluster_tag]
        center_data = cluster_model._means
        
        quick_write_list_to_text(cluster_tag_to_string, write_directory1 + '/' + str(i + 1) + '.txt')
        write_matrix_to_text(center_data, write_directory2 + '/' + str(i + 1) + '.txt')
예제 #2
0
def batch_em_cluster(read_directory, write_directory1, write_directory2):

    file_number = sum(
        [len(files) for root, dirs, files in os.walk(read_directory)])

    cluster_number = 8
    init_mu = 0.1
    init_sigma = 1.0

    for i in range(file_number):
        vsm = np.loadtxt(read_directory + '/' + str(i + 1) + '.txt')
        data_dimension = vsm.shape[1]

        init_means = []
        for j in range(cluster_number):
            init_means.append(init_sigma * np.random.randn(data_dimension) +
                              init_mu)

        cluster_model = cluster.EMClusterer(init_means, bias=0.1)

        cluster_tag = cluster_model.cluster(vsm, True, trace=False)

        cluster_tag_to_string = [str(x) for x in cluster_tag]
        center_data = cluster_model._means

        quick_write_list_to_text(cluster_tag_to_string,
                                 write_directory1 + '/' + str(i + 1) + '.txt')
        write_matrix_to_text(center_data,
                             write_directory2 + '/' + str(i + 1) + '.txt')
예제 #3
0
def SP_CT_LDA(read_directory1, read_directory2, write_directory1, write_directory2, write_directory3):
    
    file_number = sum([len(files) for root, dirs, files in os.walk(read_directory1)])
    
    for i in range(file_number):
        
        THETA = np.loadtxt(read_directory1 + '/' + str(i + 1) + '.txt')
        PHAI = np.loadtxt(read_directory2 + '/' + str(i + 1) + '.txt')
        
        #视图1,根据词汇分布计算潜在主题之间的相似度
        W1 = np.zeros((len(PHAI), len(PHAI)))
        for j in range(len(PHAI)):
            for k in range(j, len(PHAI)):
                W1[j, k] = 1.0 / (SKLD(PHAI[j], PHAI[k]) + 1.0)
                W1[k, j] = W1[j, k]

        #估计聚类数目
        cluster_number = get_cluster_number(W1)
        
        print cluster_number
        cluster_tag = spectral_cluster2(W1, cluster_number)
        
        #聚类分析
        center_topic = np.zeros((cluster_number, len(PHAI[0])))
        each_cluster_number = np.zeros(cluster_number, int)
        
        weibo_topic_similarity = np.zeros((cluster_number, len(THETA)))
        THETA = THETA.transpose()
        
        for j in range(len(cluster_tag)):
            center_topic[cluster_tag[j]] += PHAI[j]
            each_cluster_number[cluster_tag[j]] += 1
            
            weibo_topic_similarity[cluster_tag[j]] += THETA[j]
        
        #
        for j in range(cluster_number):
            center_topic[j] = center_topic[j] / each_cluster_number[j]
            #weibo_topic_similarity[j] = weibo_topic_similarity[j] / each_cluster_number[j]
        
        weibo_topic_similarity = weibo_topic_similarity.transpose()
        
        ecn_to_string = [str(x) for x in each_cluster_number]
        
        write_matrix_to_text(weibo_topic_similarity, write_directory1 + '/' + str(i + 1) + '.txt')
        write_matrix_to_text(center_topic, write_directory2 + '/' + str(i + 1) + '.txt')
        quick_write_list_to_text(ecn_to_string, write_directory3 + '/' + str(i + 1) + '.txt')
        
        print "Segment %d Completed." % (i + 1)
예제 #4
0
def pattern_cluster(read_filename1, read_filename2, read_filename3, write_filename1, write_filename2):
    pattern_list = []
    f = open(read_filename1, 'r')
    line = f.readline()
    while line:
        if len(line.split()) > 1:
            pattern_list.append(line.split())
        line = f.readline()
    f.close()
    
    word_weight_dict = {}
    f = open(read_filename2, 'r')
    line = f.readline()
    while line:
        word_weight_dict[line.split()[0]] = float(line.split()[1])
        line = f.readline()
    f.close()
    
    #调用compute_similarity函数计算相似度矩阵并给出聚类数目
    similarity_matrix, cluster_number = compute_similarity(pattern_list, read_filename3, word_weight_dict)
    
    write_matrix_to_text(similarity_matrix, write_filename1)
    quick_write_list_to_text([str(cluster_number)], write_filename2)
예제 #5
0
def pattern_cluster(read_filename1, read_filename2, read_filename3, write_filename1, write_filename2):
    pattern_list = []
    f = open(read_filename1, "r")
    line = f.readline()
    while line:
        if len(line.split()) > 1:
            pattern_list.append(line.split())
        line = f.readline()
    f.close()

    word_weight_dict = {}
    f = open(read_filename2, "r")
    line = f.readline()
    while line:
        word_weight_dict[line.split()[0]] = float(line.split()[1])
        line = f.readline()
    f.close()

    # 调用compute_similarity函数计算相似度矩阵并给出聚类数目
    similarity_matrix, cluster_number = compute_similarity(pattern_list, read_filename3, word_weight_dict)

    write_matrix_to_text(similarity_matrix, write_filename1)
    quick_write_list_to_text([str(cluster_number)], write_filename2)
예제 #6
0
def CT_LDA(read_directory1, read_directory2, write_directory1, write_directory2, write_directory3, write_filename):
    
    gamma = 0.1
    run_time = []
    file_number = sum([len(files) for root, dirs, files in os.walk(read_directory1)])
    
    for i in range(file_number):
        
        start = time.clock()
        
        THETA = np.loadtxt(read_directory1 + '/' + str(i + 1) + '.txt')
        PHAI = np.loadtxt(read_directory2 + '/' + str(i + 1) + '.txt')
        
        
        #视图1,根据词汇分布计算潜在主题之间的相似度
        W1 = np.zeros((len(PHAI), len(PHAI)))
        for j in range(len(PHAI)):
            for k in range(j, len(PHAI)):
                W1[j, k] = 1.0 / (SKLD(PHAI[j], PHAI[k]) + 1.0)
                W1[k, j] = W1[j, k]
        
        #视图2,根据相关微博文本集合计算潜在主题之间的相似度
        W2 = np.zeros((len(PHAI), len(PHAI)))
        
        related_weibo_list = []
        for j in range(len(PHAI)):
            related_weibo_list.append([])
            
        for j in range(len(THETA)):
            for k in range(len(THETA[0])):
                if THETA[j, k] >= gamma:
                    related_weibo_list[k].append(j)
        
        for j in range(len(PHAI)):
            for k in range(j, len(PHAI)):
                numerator = len(set(related_weibo_list[j]) & set(related_weibo_list[k]))
                denominator = len(set(related_weibo_list[j]) | set(related_weibo_list[k]))
                if j == k:
                    W2[j, k] = 1.0
                    W2[k, j] = 1.0
                elif denominator == 0.0:
                    W2[j, k] = 0.0
                    W2[k, j] = 0.0
                else:
                    W2[j, k] = np.true_divide(numerator, denominator)
                    W2[k, j] = W2[j, k]
        
        
        #估计聚类数目
        cluster_number = get_cluster_number(W1)
        max_iter = 3
        print cluster_number
        cluster_tag = co_training_spectral_cluster(W1, W2, cluster_number, iter=max_iter)
        
        
        
        #聚类分析
        center_topic = np.zeros((cluster_number, len(PHAI[0])))
        each_cluster_number = np.zeros(cluster_number, int)
        
        weibo_topic_similarity = np.zeros((cluster_number, len(THETA)))
        THETA = THETA.transpose()
        
        for j in range(len(cluster_tag)):
            center_topic[cluster_tag[j]] += PHAI[j]
            each_cluster_number[cluster_tag[j]] += 1
            
            weibo_topic_similarity[cluster_tag[j]] += THETA[j]
        
        #
        for j in range(cluster_number):
            center_topic[j] = center_topic[j] / each_cluster_number[j]
            #weibo_topic_similarity[j] = weibo_topic_similarity[j] / each_cluster_number[j]
        
        weibo_topic_similarity = weibo_topic_similarity.transpose()
        
        ecn_to_string = [str(x) for x in each_cluster_number]
        #time.sleep(5)
        run_time.append(str(time.clock() - start))
        print "This time:", str(time.clock() - start)
        
        write_matrix_to_text(weibo_topic_similarity, write_directory1 + '/' + str(i + 1) + '.txt')
        write_matrix_to_text(center_topic, write_directory2 + '/' + str(i + 1) + '.txt')
        quick_write_list_to_text(ecn_to_string, write_directory3 + '/' + str(i + 1) + '.txt')
        
        print "Segment %d Completed." % (i + 1)
    quick_write_list_to_text(run_time, write_filename)
예제 #7
0
def stream_CT_LDA(read_directory1, read_directory2, read_directory3, write_directory1, write_directory2, write_directory3, write_filename):
    
    gamma = 0.1
    s_lambda = 0.7
    
    # 时间窗口
    q = 4
    
    ct_window = []
    ct_num_window = []
    ct_wordlist_window = []
    
    run_time = []
    
    file_number = sum([len(files) for root, dirs, files in os.walk(read_directory1)])
    
    for i in range(file_number):
        
        THETA = np.loadtxt(read_directory1 + '/' + str(i + 1) + '.txt')
        PHAI = np.loadtxt(read_directory2 + '/' + str(i + 1) + '.txt')
        
        # 视图1,根据词汇分布计算潜在主题之间的相似度
        W1 = np.zeros((len(PHAI), len(PHAI)))
        for j in range(len(PHAI)):
            for k in range(j, len(PHAI)):
                W1[j, k] = 1.0 / (SKLD(PHAI[j], PHAI[k]) + 1.0)
                W1[k, j] = W1[j, k]
        
        # 估计聚类数目
        cluster_number = get_cluster_number(W1)
        print cluster_number
        
        # 本片数据的词汇列表
        this_word_list = []
        f1 = open(read_directory3 + '/' + str(i + 1) + '.txt', 'rb')
        line = f1.readline()
        while line:
            this_word_list.append(line.split()[0])
            line = f1.readline()
        
        f1.close()
        
        start = time.clock()
        if i < q or np.mod(i, q) == 0: 
        
            # 视图2,根据相关微博文本集合计算潜在主题之间的相似度
            W2 = np.zeros((len(PHAI), len(PHAI)))
        
            related_weibo_list = []
            for j in range(len(PHAI)):
                related_weibo_list.append([])
            
            for j in range(len(THETA)):
                for k in range(len(THETA[0])):
                    if THETA[j, k] >= gamma:
                        related_weibo_list[k].append(j)
        
            for j in range(len(PHAI)):
                for k in range(j, len(PHAI)):
                    numerator = len(set(related_weibo_list[j]) & set(related_weibo_list[k]))
                    denominator = len(set(related_weibo_list[j]) | set(related_weibo_list[k]))
                    if j == k:
                        W2[j, k] = 1.0
                        W2[k, j] = 1.0
                    elif denominator == 0.0:
                        W2[j, k] = 0.0
                        W2[k, j] = 0.0
                    else:
                        W2[j, k] = np.true_divide(numerator, denominator)
                        W2[k, j] = W2[j, k]

            max_iter = 20
            
            cluster_tag = co_training_spectral_cluster(W1, W2, cluster_number, max_iter)
            
            # 聚类分析
            center_topic = np.zeros((cluster_number, len(PHAI[0])))
            each_cluster_number = np.zeros(cluster_number, int)
        
            weibo_topic_similarity = np.zeros((cluster_number, len(THETA)))
            THETA = THETA.transpose()
            
            for j in range(len(cluster_tag)):
                center_topic[cluster_tag[j]] += PHAI[j]
                each_cluster_number[cluster_tag[j]] += 1
            
                weibo_topic_similarity[cluster_tag[j]] += THETA[j]
            
            #
            for j in range(cluster_number):
                center_topic[j] = center_topic[j] / each_cluster_number[j]
        
            weibo_topic_similarity = weibo_topic_similarity.transpose()
        
        else:
            # 回溯一个数据片
            temp_ct = np.zeros((cluster_number, len(PHAI[0])))
            
            if len(ct_window[-1]) >= cluster_number:
                idx = ct_num_window[-1].argsort()
                idx = idx[::-1]
                
                temp_ct = ct_window[-1][idx][0 : cluster_number, :]
            else:
                temp_ct[0 : len(ct_window[-1]), :] = ct_window[-1]
                
            # 合并向量空间
            new_temp_ct, new_this_lt, new_word_list = merge_space(ct_wordlist_window[-1], this_word_list, temp_ct, PHAI)
            
            
            
            #计算当前潜在主题与前一片的中心主题之间的相似度
            lt_ct_similarity = np.zeros((len(new_this_lt), len(new_temp_ct)));
            for j in range(len(new_this_lt)):
                for k in range(len(new_temp_ct)):
                    lt_ct_similarity[j, k] = 1.0 / (SKLD(new_this_lt[j], new_temp_ct[k]) + 1.0)
            
            #print lt_ct_similarity
            
            cluster_tag = []
            
            new_part_lt = []  #原空间(500维)下的本数据片的新出现的潜在主题
            last_part_lt = []  #原空间下的与上一数据片中的中心主题比较相似的潜在主题,二维
            for j in range(len(new_temp_ct)):
                last_part_lt.append([])
                
            for j in range(len(new_this_lt)):
                if np.max(lt_ct_similarity[j]) < s_lambda:
                    new_part_lt.append(PHAI[j])
                    cluster_tag.append(-1)  #新类编号
                else:
                    max_index = np.argmax(lt_ct_similarity[j])
                    last_part_lt[max_index].append(PHAI[j])
                    cluster_tag.append(max_index)
            
            empty_count = 0
            this_last_ct = []
            this_last_ct_count = []
            #for j in range(len(new_this_lt)):
            for j in range(len(last_part_lt)):
                
                if len(last_part_lt[j]) == 0:
                    empty_count += 1
                else:
                    temp_this_ct = np.zeros(len(PHAI[0]))
                    temp_this_ct_count = 0
                    for k in range(len(last_part_lt[j])):
                        temp_this_ct += last_part_lt[j][k]
                        temp_this_ct_count += 1
                    
                    this_last_ct.append(temp_this_ct / temp_this_ct_count)
                    this_last_ct_count.append(temp_this_ct_count)
            
            
            center_topic = np.zeros((cluster_number, len(PHAI[0])))
            each_cluster_number = np.zeros(cluster_number, int)
            
            print "empty_number" , empty_count
            
            '''
            分情况讨论中心主题更新
            '''
            #new_part_it_number = cluster_number - (len(new_this_lt) - empty_count)
            new_part_it_number = cluster_number - (len(last_part_lt) - empty_count)
            print "new_part_it_number" , new_part_it_number
            if new_part_it_number == 0 and len(new_part_lt) == 0:
                #直接将上一片的主题作为本片的主题
                #每片求均值
                for j in range(len(this_last_ct)):
                    center_topic[j] = this_last_ct[j]
                    each_cluster_number[j] = this_last_ct_count[j]
            #此种情况一般不会发生,若发生,表明s_lamdba设置过小
            elif new_part_it_number > 0 and len(new_part_lt) == 0:
                #直接将上一片的主题作为本片的主题
                #每片求均值
                for j in range(len(this_last_ct)):
                    center_topic[j] = this_last_ct[j]
                    each_cluster_number[j] = this_last_ct_count[j]
                
                center_topic = center_topic[0 : len(this_last_ct), :]
                each_cluster_number = each_cluster_number[0 : len(this_last_ct)]
                cluster_tag = cluster_tag[0 : len(this_last_ct)]
                
            elif new_part_it_number == 0 and len(new_part_lt) > 0:    
                #替换一个中心主题
                
                new_part_ct = np.zeros((1, len(PHAI[0])))
                for j in range(len(new_part_lt)):
                    new_part_ct += new_part_lt[j]
                
                new_part_ct = new_part_ct / len(new_part_lt)
                
                min_index = np.argmin(this_last_ct_count)
                
                #找出被删去的主题与哪一个最为相近,合并之
                merge_si = np.zeros(len(this_last_ct), float)
                for j in range(len(this_last_ct)):
                    if j == min_index:
                        merge_si[j] = -1
                    else:
                        merge_si[j] = 1.0 / (SKLD(this_last_ct[min_index], this_last_ct[j]) + 1.0)
                
                merge_des = np.argmax(merge_si)
                
                this_last_ct[min_index] = new_part_ct
                this_last_ct_count[min_index] = len(new_part_lt)
                this_last_ct[merge_des] = (this_last_ct[merge_des] + this_last_ct[min_index]) / 2.0
                #聚类元素个数相加
                this_last_ct_count[merge_des] = this_last_ct_count[merge_des] + this_last_ct_count[min_index]
                
                for j in range(len(this_last_ct)):
                    center_topic[j] = this_last_ct[j]
                    each_cluster_number[j] = this_last_ct_count[j]
                
                for j in range(len(cluster_tag)):
                    #-1变为min_index
                    #min_index变为merge_des
                    if cluster_tag[j] == -1:
                        cluster_tag[j] = min_index
                    elif cluster_tag[j] == min_index:
                        cluster_tag[j] = merge_des
            else:
                #更新前面部分
                for j in range(len(this_last_ct)):
                    center_topic[j] = this_last_ct[j]
                    each_cluster_number[j] = this_last_ct_count[j]
                
                #新增1个主题
                if new_part_it_number == 1:
                    new_part_ct = np.zeros((1, len(PHAI[0])))
                    for j in range(len(new_part_lt)):
                        new_part_ct += new_part_lt[j]
                
                    new_part_ct = new_part_ct / len(new_part_lt)

                    center_topic[-1] = new_part_ct
                    each_cluster_number[-1] = len(new_part_lt)
                    for j in range(len(cluster_tag)):
                        if cluster_tag[j] == -1:
                            cluster_tag[j] = cluster_number - 1
                
                #这里可能会有异常                
                #elif len(new_part_lt) == 1:
                    
                #新增若干个主题   
                else:
                    #谱聚类
                    #print new_part_lt
                    sp_label = spectral_cluster(new_part_lt, new_part_it_number)
                    new_part_ct = np.zeros((new_part_it_number, len(PHAI[0])))
                    new_part_ct_number = np.zeros(new_part_it_number, int)
                    for j in range(len(sp_label)):
                        new_part_ct[sp_label[j]] += new_part_lt[j]
                        new_part_ct_number[sp_label[j]] += 1
                    
                    for j in range(new_part_it_number):
                        new_part_ct[j] = new_part_ct[j] / new_part_ct_number[j]
                        center_topic[len(this_last_ct) + j] = new_part_ct[j]
                        each_cluster_number[len(this_last_ct) + j] = new_part_ct_number[j]
                    
                    new_count = 0
                    for j in range(len(cluster_tag)):
                        if cluster_tag[j] == -1:
                            cluster_tag[j] = cluster_number - new_part_it_number + sp_label[new_count]
                            new_count += 1
                
            #计算文档-主题相似度
            weibo_topic_similarity = np.zeros((cluster_number, len(THETA)))
            THETA = THETA.transpose()
            
            for j in range(len(cluster_tag)):
                weibo_topic_similarity[cluster_tag[j]] += THETA[j]
        
            weibo_topic_similarity = weibo_topic_similarity.transpose()

        run_time.append(str(time.clock() - start))
        print "This time:", str(time.clock() - start)
        # 公共部分
        
        # 加入时间窗口
        ecn_to_string = [str(x) for x in each_cluster_number]    
        ct_window.append(center_topic)
        ct_num_window.append(each_cluster_number)
        ct_wordlist_window.append(this_word_list)
        
        #删除最历史数据
        if len(ct_window) > q:
            ct_window.remove(ct_window[0])
            ct_num_window.remove(ct_num_window[0])
            ct_wordlist_window.remove(ct_wordlist_window[0])

        
        write_matrix_to_text(weibo_topic_similarity, write_directory1 + '/' + str(i + 1) + '.txt')
        write_matrix_to_text(center_topic, write_directory2 + '/' + str(i + 1) + '.txt')
        quick_write_list_to_text(ecn_to_string, write_directory3 + '/' + str(i + 1) + '.txt')
        
        print "Segment %d Completed." % (i + 1)
    
    quick_write_list_to_text(run_time, write_filename)
예제 #8
0
def topic_life(read_directory1, read_directory2, read_directory3, write_directory1):
    
    gamma = 0.65
    delta = 0.80
    
    #file_number = sum([len(files) for root, dirs, files in os.walk(read_directory1)])
    q = 4
    start_batch = 46
    interval = 7
    end_batch = start_batch + interval
    
    all_topic_batch, new_word_list, all_count = merge_all_center(read_directory1, read_directory2, start_batch, end_batch)
    
    evolution_matrix = np.zeros((all_count, all_count), int)
    
    previous_topics = []
    previous_num = []
    previous_intensity = []
    
    start_index = 0
    end_index = 0
    
    for i in range(len(all_topic_batch)):
        this_topic_intensity = []
        get_text_to_single_list(this_topic_intensity, read_directory3 + '/' + str(start_batch + i) + '.txt')
        this_topic_intensity = [int(x) for x in this_topic_intensity]
        print this_topic_intensity
        
        if i == 0:
            for j in range(len(all_topic_batch[i])):
                evolution_matrix[j, j] = 1
                previous_topics.append(all_topic_batch[i][j])
                previous_intensity.append(this_topic_intensity[j])
            
            start_index = 0
            end_index += len(all_topic_batch[i])
            
            previous_num.append(len(all_topic_batch[i]))

        else:
            kl_matrix = np.zeros((len(all_topic_batch[i]), len(previous_topics)))
            
            for j in range(len(all_topic_batch[i])):
                for k in range(len(previous_topics)):
                    kl_matrix[j, k] = 1.0 / (SKLD(all_topic_batch[i][j], previous_topics[k]) + 1.0)
            
            #判断出现
            for j in range(len(kl_matrix)):
                #if np.max(kl_matrix[j]) < gamma:
                evolution_matrix[end_index + j, end_index + j] = 1
            
            #判断消失
            for j in range(len(kl_matrix[0])):
                if np.max(kl_matrix[:, j]) < gamma:
                    evolution_matrix[start_index + j, start_index + j] = -1
            
            #判断延续
            for j in range(len(kl_matrix)):
                for k in range(len(kl_matrix[j])):
                    if kl_matrix[j][k] >= delta:
                        evolution_matrix[start_index + k, end_index + j] = 2
                        evolution_matrix[end_index + j, start_index + k] = 2
            
            #判断合并
            for j in range(len(kl_matrix)):
                latent_merge_index = []
                si_value = []
                for k in range(len(kl_matrix[j])):
                    if kl_matrix[j][k] >= gamma and kl_matrix[j][k] < delta:
                        latent_merge_index.append(k)
                        si_value.append(kl_matrix[j][k])
                
                
                
                if len(latent_merge_index) >= 2:
                    sl = zip(latent_merge_index, si_value)
                    sl = sorted(sl, key = itemgetter(1), reverse=True)
                    latent_merge_index = []
                
                    m_count = 0
                    for each in sl:
                        latent_merge_index.append(each[0])
                        m_count += 1
                    
                        if m_count >= 3:
                            break
                    
                    Z = np.zeros(len(all_topic_batch[i][0]))
                    all_intensity = 0
                    for each in latent_merge_index:
                        Z += previous_topics[each] * previous_intensity[each]
                        all_intensity += previous_intensity[each]
                    
                    Z = Z / all_intensity
                    related = 1.0 / (SKLD(all_topic_batch[i][j], Z) + 1.0)
                    
                    if related > delta:
                        for each in latent_merge_index:
                            evolution_matrix[start_index + each, end_index + j] = 3
                            evolution_matrix[end_index + j, start_index + each] = 3
            #判断分裂
            if len(kl_matrix) > 1: 
                for j in range(len(kl_matrix[0])):
                    latent_split_index = []
                    for k in range(len(kl_matrix)):
                        if kl_matrix[k][j] >= gamma and kl_matrix[k][j] < delta:
                            latent_split_index.append(k)
                
                    if len(latent_split_index) >= 2:
                        Z = np.zeros(len(all_topic_batch[i][0]))
                        all_intensity = 0
                        for each in latent_split_index:
                            Z += all_topic_batch[i][each] * this_topic_intensity[each]
                            all_intensity += this_topic_intensity[each]
                    
                        Z = Z / all_intensity
                        related = 1.0 / (SKLD(previous_topics[j], Z) + 1.0)
                    
                        if related > delta:
                            for each in latent_split_index:
                                evolution_matrix[start_index + j, end_index + each] = 4
                                evolution_matrix[end_index + each, start_index + j] = 4     
            
            for j in range(len(all_topic_batch[i])):
                previous_topics.append(all_topic_batch[i][j])
                previous_intensity.append(this_topic_intensity[j])
            
            previous_num.append(len(all_topic_batch[i]))
            
            if len(previous_num) > q:
                start_index += previous_num[0]
                for l in range(previous_num[0]):
                    previous_topics.remove(previous_topics[0])
                    previous_intensity.remove(previous_intensity[0])
                
                previous_num.remove(previous_num[0])
                
            
            end_index += len(all_topic_batch[i])
        
        write_matrix_to_text(evolution_matrix, write_directory1 + '/' + str(i + 1) + '.txt')        
        print "Evolution %d Completed." % (i + 1)