def test():

    to_run = [40, 60, 80, 100]
    for i in to_run:
        start_time = time.time()
        method = Greedy(i, datapre.Features(),
                        datapre.CategoriesDistribution(), 0.1555)
        # profiles = method.SearchWithoutConstraints()
        # profiles = method.SearchWithConstraints()
        profiles = method.SearchWithReplace()
        # print len(profiles)
        end_time = time.time()

        # 将结果写入文件
        with open("%dGB_results" % i, "wb") as f:
            f.write("cost %f s" % (end_time - start_time))
            f.write("\n")
            f.write("Attribute Representativeness is:")
            f.write(str(method.AttributeRepresentative(profiles)))
            f.write("\n")
            for profile in profiles:
                f.write(profile + "\t")


# test()
def ProfileDomainDistribution(profiles):
    features = datapre.Features()
    categories = [
        'Politics', 'Religion', 'Military', 'Education', 'Economy',
        'Technology', 'Agriculture', 'Sports', 'Entertainment'
    ]
    number = [0 for i in range(len(categories))]
    for profile in profiles:
        for i in range(len(categories)):
            if features[profile][5] == categories[i]:
                number[i] += 1
    return number
Example #3
0
def test():
    method = Classifier(datapre.Features())
    train_set,test_set = method.Split()
    print "数据集分割完成"
    print "训练集和测试集数量为:%d,%d" % (len(train_set),len(test_set))
    # 三个方法分别在train_set中寻找100个代表性人物,用代表性人物来分类test_set
    epsilons = [0.1560,0.1556,0.1555]
    # 将PageRank提取出来的100个用户也来做个分类
    # PageRank_method = pr.PageRank(40,train_set,datapre.GetUserCategory())
    # # 获得出入度矩阵
    # uMatrix = PageRank_method.GetUserMatrix()
    # #
    # # 转移矩阵
    # fMatrix = mat([(1 - 0.85) / len(train_set.keys()) for i in range(len(train_set.keys()))]).T
    # # 初始矩阵
    # initPRMatrix = mat([1 for i in range(len(train_set.keys()))]).T
    # # result为影响力分数结果
    # PRMatrix = PageRank_method.PageRank(uMatrix,fMatrix,0.85,initPRMatrix,0.01,120)
    # user_ids = train_set.keys()
    # uPR = {}
    # for i,id in zip(range(len(user_ids)),user_ids):
    #     uPR[id] = PRMatrix[i]
    # # 对uPR排序
    # uPR = sorted(uPR.items(),key = lambda dic:dic[1],reverse=True)
    # profiles = [u[0] for u in uPR[:100]]
    # print "PageRank的分类准确性为%.3f" % method.Classify(profiles,test_set)
    # return
    # epsilons = [0.1556,0.1555]
    # epsilons = [0.1560]
    # init.InitialMatrix(train_set)
    number = [40,60,80,100]
    print "开始抽取代表性用户"
    for epsilon in epsilons:
        with open("%.4f" % epsilon,"wb") as f:
            for k in number:
                profiles1 = greedy.Greedy(k,train_set,datapre.CategoriesDistribution(),epsilon).SearchWithReplace()
                print "GB方法计算完成"
                profiles2 = kmediods.KMedoids(k,train_set,datapre.CategoriesDistribution(),epsilon).Search()
                print "kmedoids方法计算完成"
                profiles3 = sa.SAalgo(k,train_set,datapre.CategoriesDistribution(),epsilon,0.3,10,0.9).Search()
                print "sa方法计算完成"

                accuracy1 = method.Classify(profiles1,test_set)
                f.write("方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1))
                accuracy2 = method.Classify(profiles2,test_set)
                f.write("方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2))
                accuracy3 = method.Classify(profiles3,test_set)
                f.write("方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3))

                print "方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1)
                print "方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2)
                print "方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3)
def test():
    features = datapre.Features()
    to_run = [40, 60, 80, 100]
    for i in to_run:
        start_time = time.time()
        method = KMedoids(i, datapre.Features(),
                          datapre.CategoriesDistribution(), 0.1555)
        profiles = method.Search()
        end_time = time.time()
        print metric.AttributeRepresentative(features, profiles)
        print profiles
        print "cost %f s" % (end_time - start_time)
        with open("%dclustering_result" % i, "wb") as f:
            f.write("cost %f s" % (end_time - start_time))
            f.write("\n")
            f.write("Attribute Representativeness is:")
            f.write(str(metric.AttributeRepresentative(features, profiles)))
            f.write("\n")
            for profile in profiles:
                f.write(profile + "\t")


# test()
def Run():
    features = datapre.Features()
    epsilons = [0.1560, 0.1556, 0.1555]
    number = [40, 60, 80, 100]
    for epsilon in epsilons:
        for n in number:
            start_time = time.time()
            profile1 = greedy.Greedy(n, features,
                                     datapre.CategoriesDistribution(),
                                     epsilon).SearchWithReplace()
            end_time = time.time()
            with open("GB%d_%.4f" % (n, epsilon), "wb") as f:
                f.write("cost %f s" % (end_time - start_time))
                f.write("Attribute Representativeness is:")
                f.write(str(metric.AttributeRepresentative(features,
                                                           profile1)))
                f.write("\n")
                for profile in profile1:
                    f.write(profile + "\t")
            start_time = time.time()
            profile2 = kmedoids.KMedoids(n, features,
                                         datapre.CategoriesDistribution(),
                                         epsilon).Search()
            end_time = time.time()
            with open("kmedoids%d_%.4f" % (n, epsilon), "wb") as f:
                f.write("cost %f s" % (end_time - start_time))
                f.write("Attribute Representativeness is:")
                f.write(str(metric.AttributeRepresentative(features,
                                                           profile2)))
                f.write("\n")
                for profile in profile2:
                    f.write(profile + "\t")
            start_time = time.time()
            profile3 = sa.SAalgo(n, features, datapre.CategoriesDistribution(),
                                 epsilon, 0.3, 10, 0.9).Search()
            end_time = time.time()
            with open("sa%d_%.4f" % (n, epsilon), "wb") as f:
                f.write("cost %f s" % (end_time - start_time))
                f.write("Attribute Representativeness is:")
                f.write(str(metric.AttributeRepresentative(features,
                                                           profile3)))
                f.write("\n")
                for profile in profile3:
                    f.write(profile + "\t")