def get_paperinfo(): data = [] for i in range(1, 5): cl = Clustering(bibcode='paper_%s' % str(i), vector_low=[float(i) / 10] * 5) data.append(cl) ar1 = Reads(cookie='u1', reads=['ppr1'] * 13 + ['ppr2'] * 7) data.append(ar1) ar2 = Reads(cookie='u2', reads=['ppr2'] * 53 + ['ppr3'] * 17) data.append(ar2) return data
def clusterFromData(dataset): clusterList = [] for data in dataset: if len(data) > 5: # 剔除不合规则的数据 pointdata = data[1:] clusterCenter, belong = Clustering(pointdata, 5) clusterCenter.sort() clusterCenter.append(data[0]) # 加入标签 for i in range(5, 0, -1): clusterCenter[i] = clusterCenter[i - 1] clusterCenter[0] = data[0] clusterList.append(clusterCenter) return clusterList
# Hyper Parameter settings cf.use_cuda = torch.cuda.is_available() trainloader, testloader, validloader, encoded_trainloader, encoded_testloader, encoded_validloader = get_all_dataLoders( args, valid=True, one_hot=True) args.num_classes = 10 if args.dataset == 'cifar-10' else 100 # Data class Preparation Class_generator = {} for i in range(100): Class_generator[i] = i # Model print('\nModel setup') net = Expert_gate(args) function = Clustering(args) Expert = {} Autoencoder = {} Superclass = {} Old_superclass = {} def save_and_load(Superclass, Autoencoder, Old_superclass, num=0, load=False): save_point = cf.var_dir + args.dataset if not load: if (num + 1) % 10 == 0 and (num + 1) != 0: torch.save(Superclass, save_point + '/classes' + str(num) + '_Superclass.pkl') torch.save( Old_superclass, save_point + '/classes' + str(num) + '_old_Superclass.pkl')
from models import Clustering,get_dataset,criterion from django.test import TestCase dataset = get_dataset() testdata=[] for item in dataset: if(item[0]==(4,2018,2)): for items in item: testdata.append(items) testdata.remove((4,2018,2)) #获取最佳聚类数 maxk = 9 #所测试的最大聚类数 WSSarray = [] # 对聚类数为1,2,...maxk,分别存储其误差值,WSSarray的长度为maxk for i in range(1,maxk+1): cluster, belong = Clustering(testdata, i) WSS = criterion(testdata,cluster,belong) WSSarray.append(WSS) # print("不同聚类数对应的误差值所组成的数组为:",WSS) # 输出误差数组 WSSDelta = list(np.ones(maxk)) # 获得误差数组的增量差,选择增量差最大的点对应的聚类数作为合适的聚类数 WSSDelta[0] = 0 maxDelta = -1 indexDelta = 0 for i in range(1,maxk): WSSDelta[i] = WSSarray[i-1] - WSSarray[i] if WSSDelta[i]>maxDelta: maxDelta = WSSDelta[i] indexDelta = i+1 # 较好的聚类数 # print("最大的误差差值为:",maxDelta) # 输出最大误差 # print("误差差值对应的数组为:",SSEDelta) # 输出误差差值
def application1(aa,bb,cc): a = 0 b = 0 c = 0 d = '' a = input('输入省份id:') b = input('文理id:') c = input('分数:') # d=input('用户id:') user = [int(a), int(b), int(c)] # 省id,,理科1文科2综合3,分 print(user) # userid=str(d) myinput = user # 文理,省,分数 # (2,2017,1)(2,2018,1)(2,2019,1) # user[2] dataset = get_dataset() testdata = [] for item in dataset: if (item[0] == (user[0], 2017, user[1])): for items in item: testdata.append(items) testdata.remove((user[0], 2017, user[1])) ######判断此年数据是否缺失(是否为空表) if len(testdata) == 0: a = 0 else: a = 1 x = [d[0] for d in testdata] y = [d[1] for d in testdata] ######去除数据中的负数人数以及对应的分数 ###为什么数据里面会这样 for items in y: if (items < 0): num = y.index(items) del x[num] del y[num] dataset1 = get_dataset() testdata1 = [] for item in dataset1: if (item[0] == (user[0], 2018, user[1])): for items in item: testdata1.append(items) testdata1.remove((user[0], 2018, user[1])) ######判断数据缺失 if len(testdata1) == 0: b = 0 else: b = 1 x1 = [d[0] for d in testdata1] y1 = [d[1] for d in testdata1] ######去除数据中的负数人数以及对应的分数 for items in y1: if (items < 0): num = y1.index(items) del x1[num] del y1[num] dataset2 = get_dataset() testdata2 = [] for item in dataset2: if (item[0] == (user[0], 2019, user[1])): for items in item: testdata2.append(items) testdata2.remove((user[0], 2019, user[1])) ######判断数据缺失 if len(testdata2) == 0: c = 0 else: c = 1 x2 = [d[0] for d in testdata2] y2 = [d[1] for d in testdata2] ######去除数据中的负数人数以及对应的分数 for items in y2: if (items < 0): num = y2.index(items) del x2[num] del y2[num] ### if a == 1: belong = Clustering(testdata, 5) colorlist = [] for items in belong[1]: if (items == 0): colorlist.append('grey') if (items == 1): colorlist.append('gold') if (items == 2): colorlist.append('turquoise') if (items == 3): colorlist.append('plum') if (items == 4): colorlist.append('lawngreen') else: print("2017年数据缺失") if b == 1: belong1 = Clustering(testdata1, 5) colorlist1 = [] for items in belong1[1]: if (items == 0): colorlist1.append('grey') if (items == 1): colorlist1.append('gold') if (items == 2): colorlist1.append('turquoise') if (items == 3): colorlist1.append('plum') if (items == 4): colorlist1.append('lawngreen') else: print("2018年数据缺失") if c == 1: belong2 = Clustering(testdata2, 5) colorlist2 = [] for items in belong2[1]: if (items == 0): colorlist2.append('grey') if (items == 1): colorlist2.append('gold') if (items == 2): colorlist2.append('turquoise') if (items == 3): colorlist2.append('plum') if (items == 4): colorlist2.append('lawngreen') else: print("2019年数据缺失") if a == 1: plt.figure() plt.title('2017 scores distribution') plt.xlabel('scores') plt.ylabel('numbers of students') plt.bar(x, y, color=colorlist, alpha=0.8) plt.bar(user[2], y, color='red', alpha=0.8) picid = 'pic1.jpg' plt.savefig(picid) else: text = "2017年数据缺失" font = ImageFont.truetype("font.ttf", 18) lines = [] line = "2017年数据缺失" lines.append(line) line_height = font.getsize(text)[1] img_height = line_height * (2) im = Image.new("RGB", (200, img_height), (255, 255, 255)) dr = ImageDraw.Draw(im) x, y = 5, 5 for line in lines: dr.text((x, y), line, font=font, fill="#000000") y += line_height im.save("pic1.jpg") if b == 1: plt.figure() plt.title('2018 scores distribution') plt.xlabel('scores') plt.ylabel('numbers of students') plt.bar(x1, y1, color=colorlist1, alpha=0.8) plt.bar(user[2], y1, color='red', alpha=0.8) picid = 'pic2.jpg' plt.savefig(picid) else: text = "2018年数据缺失" font = ImageFont.truetype("font.ttf", 18) lines = [] line = "2018年数据缺失" lines.append(line) line_height = font.getsize(text)[1] img_height = line_height * (2) im = Image.new("RGB", (200, img_height), (255, 255, 255)) dr = ImageDraw.Draw(im) x, y = 5, 5 for line in lines: dr.text((x, y), line, font=font, fill="#000000") y += line_height im.save("pic2.jpg") if c == 1: plt.figure() plt.title('2019 scores distribution') plt.xlabel('scores') plt.ylabel('numbers of students') plt.bar(x2, y2, color=colorlist2, alpha=0.8) plt.bar(user[2], y2, color='red', alpha=0.8) picid = 'pic3.jpg' plt.savefig(picid) else: text = "2019年数据缺失" font = ImageFont.truetype("font.ttf", 18) lines = [] line = "2019年数据缺失" lines.append(line) line_height = font.getsize(text)[1] img_height = line_height * (2) im = Image.new("RGB", (200, img_height), (255, 255, 255)) dr = ImageDraw.Draw(im) x, y = 5, 5 for line in lines: dr.text((x, y), line, font=font, fill="#000000") y += line_height im.save("pic3.jpg") # 最后print出聚类边界和每一个分数的标签,并画出一个分段聚类彩色图 # 2020.9 ###下面进行学校推荐 collegelist = [] for d in datasetCOLLEGE: if d[0:2] == myinput[0:2]: collegelist.append(d) for e in newcen1: if e[0] == myinput[0] and e[1] == myinput[1]: abslist = [] for i in range(2, 7): abslist.append(abs(myinput[2] - e[i])) # print(abslist) minnum = 1000 minj = 0 for j in range(5): if abslist[j] < minnum: minj = j minnum = abslist[j] target = e[minj + 2] str1 = "你属于第" + str(minj + 1) + "分段," for e in collegelist: e.append(abs(e[3] - target)) collegelist.sort(key=lambda x: x[4]) str2 = "为你推荐的大学是:" strlist = [str1, str2] for i in range(1, 7, 2): strlist.append(str(collegelist[i][2]) + ' ') # print(strlist) strsend = '' for items in strlist: strsend += items print(strsend) text = strsend font = ImageFont.truetype("font.ttf", 18) lines = [] line = strsend lines.append(line) line_height = font.getsize(text)[1] img_height = line_height * (2) im = Image.new("RGB", (900, img_height), (255, 255, 255)) dr = ImageDraw.Draw(im) x, y = 5, 5 for line in lines: dr.text((x, y), line, font=font, fill="#000000") y += line_height im.save("commandtest.jpg")
plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy') plt.legend() plt.show() # Model print('\nModel setup') net = HD_CNN(args) if cf.use_cuda: net.cuda() for i in range(args.num_superclasses): net.fines[i].cuda() cudnn.benchmark = True function = Clustering(args) # Pre-Training def pretrain_coarse(epoch): net.share.train() net.coarse.train() param = list(net.share.parameters())+list(net.coarse.parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) print('\n==> Epoch #%d, LR=%.4f' % (epoch, lr)) for batch_idx, (inputs, targets) in enumerate(pretrainloader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU setting optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets)