def test(): to_run = [40, 60, 80, 100] for i in to_run: start_time = time.time() method = Greedy(i, datapre.Features(), datapre.CategoriesDistribution(), 0.1555) # profiles = method.SearchWithoutConstraints() # profiles = method.SearchWithConstraints() profiles = method.SearchWithReplace() # print len(profiles) end_time = time.time() # 将结果写入文件 with open("%dGB_results" % i, "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("\n") f.write("Attribute Representativeness is:") f.write(str(method.AttributeRepresentative(profiles))) f.write("\n") for profile in profiles: f.write(profile + "\t") # test()
def ProfileDomainDistribution(profiles): features = datapre.Features() categories = [ 'Politics', 'Religion', 'Military', 'Education', 'Economy', 'Technology', 'Agriculture', 'Sports', 'Entertainment' ] number = [0 for i in range(len(categories))] for profile in profiles: for i in range(len(categories)): if features[profile][5] == categories[i]: number[i] += 1 return number
def test(): method = Classifier(datapre.Features()) train_set,test_set = method.Split() print "数据集分割完成" print "训练集和测试集数量为:%d,%d" % (len(train_set),len(test_set)) # 三个方法分别在train_set中寻找100个代表性人物,用代表性人物来分类test_set epsilons = [0.1560,0.1556,0.1555] # 将PageRank提取出来的100个用户也来做个分类 # PageRank_method = pr.PageRank(40,train_set,datapre.GetUserCategory()) # # 获得出入度矩阵 # uMatrix = PageRank_method.GetUserMatrix() # # # # 转移矩阵 # fMatrix = mat([(1 - 0.85) / len(train_set.keys()) for i in range(len(train_set.keys()))]).T # # 初始矩阵 # initPRMatrix = mat([1 for i in range(len(train_set.keys()))]).T # # result为影响力分数结果 # PRMatrix = PageRank_method.PageRank(uMatrix,fMatrix,0.85,initPRMatrix,0.01,120) # user_ids = train_set.keys() # uPR = {} # for i,id in zip(range(len(user_ids)),user_ids): # uPR[id] = PRMatrix[i] # # 对uPR排序 # uPR = sorted(uPR.items(),key = lambda dic:dic[1],reverse=True) # profiles = [u[0] for u in uPR[:100]] # print "PageRank的分类准确性为%.3f" % method.Classify(profiles,test_set) # return # epsilons = [0.1556,0.1555] # epsilons = [0.1560] # init.InitialMatrix(train_set) number = [40,60,80,100] print "开始抽取代表性用户" for epsilon in epsilons: with open("%.4f" % epsilon,"wb") as f: for k in number: profiles1 = greedy.Greedy(k,train_set,datapre.CategoriesDistribution(),epsilon).SearchWithReplace() print "GB方法计算完成" profiles2 = kmediods.KMedoids(k,train_set,datapre.CategoriesDistribution(),epsilon).Search() print "kmedoids方法计算完成" profiles3 = sa.SAalgo(k,train_set,datapre.CategoriesDistribution(),epsilon,0.3,10,0.9).Search() print "sa方法计算完成" accuracy1 = method.Classify(profiles1,test_set) f.write("方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1)) accuracy2 = method.Classify(profiles2,test_set) f.write("方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2)) accuracy3 = method.Classify(profiles3,test_set) f.write("方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3)) print "方法:GB; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy1) print "方法:kmedoids; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy2) print "方法:SA; 典型阈值:%f; 代表性子集数量:%d; 准确率:%.3f \n" % (epsilon,k,accuracy3)
def test(): features = datapre.Features() to_run = [40, 60, 80, 100] for i in to_run: start_time = time.time() method = KMedoids(i, datapre.Features(), datapre.CategoriesDistribution(), 0.1555) profiles = method.Search() end_time = time.time() print metric.AttributeRepresentative(features, profiles) print profiles print "cost %f s" % (end_time - start_time) with open("%dclustering_result" % i, "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("\n") f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profiles))) f.write("\n") for profile in profiles: f.write(profile + "\t") # test()
def Run(): features = datapre.Features() epsilons = [0.1560, 0.1556, 0.1555] number = [40, 60, 80, 100] for epsilon in epsilons: for n in number: start_time = time.time() profile1 = greedy.Greedy(n, features, datapre.CategoriesDistribution(), epsilon).SearchWithReplace() end_time = time.time() with open("GB%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile1))) f.write("\n") for profile in profile1: f.write(profile + "\t") start_time = time.time() profile2 = kmedoids.KMedoids(n, features, datapre.CategoriesDistribution(), epsilon).Search() end_time = time.time() with open("kmedoids%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile2))) f.write("\n") for profile in profile2: f.write(profile + "\t") start_time = time.time() profile3 = sa.SAalgo(n, features, datapre.CategoriesDistribution(), epsilon, 0.3, 10, 0.9).Search() end_time = time.time() with open("sa%d_%.4f" % (n, epsilon), "wb") as f: f.write("cost %f s" % (end_time - start_time)) f.write("Attribute Representativeness is:") f.write(str(metric.AttributeRepresentative(features, profile3))) f.write("\n") for profile in profile3: f.write(profile + "\t")