Ejemplos de Clustering en Python, ejemplos de models.Clustering en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: testEndpoints.py Proyecto: golnazads/recommender_service

def get_paperinfo():
    data = []
    for i in range(1, 5):
        cl = Clustering(bibcode='paper_%s' % str(i),
                        vector_low=[float(i) / 10] * 5)
        data.append(cl)
    ar1 = Reads(cookie='u1', reads=['ppr1'] * 13 + ['ppr2'] * 7)
    data.append(ar1)
    ar2 = Reads(cookie='u2', reads=['ppr2'] * 53 + ['ppr3'] * 17)
    data.append(ar2)
    return data

Ejemplo n.º 2

0

Mostrar archivo

def clusterFromData(dataset):
    clusterList = []
    for data in dataset:
        if len(data) > 5:  # 剔除不合规则的数据
            pointdata = data[1:]
            clusterCenter, belong = Clustering(pointdata, 5)
            clusterCenter.sort()
            clusterCenter.append(data[0])  # 加入标签
            for i in range(5, 0, -1):
                clusterCenter[i] = clusterCenter[i - 1]
            clusterCenter[0] = data[0]
            clusterList.append(clusterCenter)
    return clusterList

Ejemplo n.º 3

0

Mostrar archivo

Archivo: main.py Proyecto: Changgang-Zheng/Expert-Gate

# Hyper Parameter settings
cf.use_cuda = torch.cuda.is_available()

trainloader, testloader, validloader, encoded_trainloader, encoded_testloader, encoded_validloader = get_all_dataLoders(
    args, valid=True, one_hot=True)
args.num_classes = 10 if args.dataset == 'cifar-10' else 100

# Data class Preparation
Class_generator = {}
for i in range(100):
    Class_generator[i] = i

# Model
print('\nModel setup')
net = Expert_gate(args)
function = Clustering(args)
Expert = {}
Autoencoder = {}
Superclass = {}
Old_superclass = {}


def save_and_load(Superclass, Autoencoder, Old_superclass, num=0, load=False):
    save_point = cf.var_dir + args.dataset
    if not load:
        if (num + 1) % 10 == 0 and (num + 1) != 0:
            torch.save(Superclass,
                       save_point + '/classes' + str(num) + '_Superclass.pkl')
            torch.save(
                Old_superclass,
                save_point + '/classes' + str(num) + '_old_Superclass.pkl')

Ejemplo n.º 4

0

Mostrar archivo

Archivo: tests.py Proyecto: Fattyzqrrrrr621/REC2020

from models import Clustering,get_dataset,criterion
from django.test import TestCase

dataset = get_dataset()
testdata=[]
for item in dataset:
    if(item[0]==(4,2018,2)):
        for items in item:
            testdata.append(items)
testdata.remove((4,2018,2))

#获取最佳聚类数
maxk = 9 #所测试的最大聚类数
WSSarray = []  # 对聚类数为1,2,...maxk，分别存储其误差值，WSSarray的长度为maxk
for i in range(1,maxk+1):
    cluster, belong = Clustering(testdata, i)
    WSS = criterion(testdata,cluster,belong)
    WSSarray.append(WSS)
# print("不同聚类数对应的误差值所组成的数组为:",WSS)  # 输出误差数组

WSSDelta = list(np.ones(maxk))  # 获得误差数组的增量差，选择增量差最大的点对应的聚类数作为合适的聚类数
WSSDelta[0] = 0
maxDelta = -1
indexDelta = 0
for i in range(1,maxk):
    WSSDelta[i] = WSSarray[i-1] - WSSarray[i]
    if WSSDelta[i]>maxDelta:
        maxDelta = WSSDelta[i]
        indexDelta = i+1  # 较好的聚类数
# print("最大的误差差值为:",maxDelta)  # 输出最大误差
# print("误差差值对应的数组为:",SSEDelta)  # 输出误差差值

Ejemplo n.º 5

0

Mostrar archivo

Archivo: app1.py Proyecto: Fattyzqrrrrr621/REC2020

def application1(aa,bb,cc):
    a = 0
    b = 0
    c = 0
    d = ''
    a = input('输入省份id：')
    b = input('文理id：')
    c = input('分数：')

    # d=input('用户id：')
    user = [int(a), int(b), int(c)]  # 省id,,理科1文科2综合3,分
    print(user)
    # userid=str(d)
    myinput = user  # 文理，省，分数
    # （2，2017，1）（2，2018，1）（2，2019，1）

    # user[2]
    dataset = get_dataset()

    testdata = []
    for item in dataset:
        if (item[0] == (user[0], 2017, user[1])):
            for items in item:
                testdata.append(items)
    testdata.remove((user[0], 2017, user[1]))

    ######判断此年数据是否缺失(是否为空表)
    if len(testdata) == 0:
        a = 0
    else:
        a = 1

    x = [d[0] for d in testdata]
    y = [d[1] for d in testdata]

    ######去除数据中的负数人数以及对应的分数
    ###为什么数据里面会这样
    for items in y:
        if (items < 0):
            num = y.index(items)
            del x[num]
            del y[num]

    dataset1 = get_dataset()
    testdata1 = []
    for item in dataset1:
        if (item[0] == (user[0], 2018, user[1])):
            for items in item:
                testdata1.append(items)
    testdata1.remove((user[0], 2018, user[1]))

    ######判断数据缺失
    if len(testdata1) == 0:
        b = 0
    else:
        b = 1

    x1 = [d[0] for d in testdata1]
    y1 = [d[1] for d in testdata1]
    ######去除数据中的负数人数以及对应的分数
    for items in y1:
        if (items < 0):
            num = y1.index(items)
            del x1[num]
            del y1[num]

    dataset2 = get_dataset()
    testdata2 = []
    for item in dataset2:
        if (item[0] == (user[0], 2019, user[1])):
            for items in item:
                testdata2.append(items)
    testdata2.remove((user[0], 2019, user[1]))

    ######判断数据缺失
    if len(testdata2) == 0:
        c = 0
    else:
        c = 1

    x2 = [d[0] for d in testdata2]
    y2 = [d[1] for d in testdata2]

    ######去除数据中的负数人数以及对应的分数
    for items in y2:
        if (items < 0):
            num = y2.index(items)
            del x2[num]
            del y2[num]

    ###
    if a == 1:
        belong = Clustering(testdata, 5)
        colorlist = []
        for items in belong[1]:
            if (items == 0):
                colorlist.append('grey')
            if (items == 1):
                colorlist.append('gold')
            if (items == 2):
                colorlist.append('turquoise')
            if (items == 3):
                colorlist.append('plum')
            if (items == 4):
                colorlist.append('lawngreen')
    else:
        print("2017年数据缺失")

    if b == 1:
        belong1 = Clustering(testdata1, 5)
        colorlist1 = []
        for items in belong1[1]:
            if (items == 0):
                colorlist1.append('grey')
            if (items == 1):
                colorlist1.append('gold')
            if (items == 2):
                colorlist1.append('turquoise')
            if (items == 3):
                colorlist1.append('plum')
            if (items == 4):
                colorlist1.append('lawngreen')
    else:
        print("2018年数据缺失")

    if c == 1:
        belong2 = Clustering(testdata2, 5)
        colorlist2 = []
        for items in belong2[1]:
            if (items == 0):
                colorlist2.append('grey')
            if (items == 1):
                colorlist2.append('gold')
            if (items == 2):
                colorlist2.append('turquoise')
            if (items == 3):
                colorlist2.append('plum')
            if (items == 4):
                colorlist2.append('lawngreen')
    else:
        print("2019年数据缺失")

    if a == 1:
        plt.figure()
        plt.title('2017 scores distribution')
        plt.xlabel('scores')
        plt.ylabel('numbers of students')
        plt.bar(x, y, color=colorlist, alpha=0.8)
        plt.bar(user[2], y, color='red', alpha=0.8)
        picid = 'pic1.jpg'
        plt.savefig(picid)
    else:
        text = "2017年数据缺失"
        font = ImageFont.truetype("font.ttf", 18)
        lines = []
        line = "2017年数据缺失"
        lines.append(line)
        line_height = font.getsize(text)[1]
        img_height = line_height * (2)
        im = Image.new("RGB", (200, img_height), (255, 255, 255))
        dr = ImageDraw.Draw(im)
        x, y = 5, 5
        for line in lines:
            dr.text((x, y), line, font=font, fill="#000000")
            y += line_height
        im.save("pic1.jpg")

    if b == 1:
        plt.figure()
        plt.title('2018 scores distribution')
        plt.xlabel('scores')
        plt.ylabel('numbers of students')
        plt.bar(x1, y1, color=colorlist1, alpha=0.8)
        plt.bar(user[2], y1, color='red', alpha=0.8)
        picid = 'pic2.jpg'
        plt.savefig(picid)
    else:
        text = "2018年数据缺失"
        font = ImageFont.truetype("font.ttf", 18)
        lines = []
        line = "2018年数据缺失"
        lines.append(line)
        line_height = font.getsize(text)[1]
        img_height = line_height * (2)
        im = Image.new("RGB", (200, img_height), (255, 255, 255))
        dr = ImageDraw.Draw(im)
        x, y = 5, 5
        for line in lines:
            dr.text((x, y), line, font=font, fill="#000000")
            y += line_height
        im.save("pic2.jpg")

    if c == 1:
        plt.figure()
        plt.title('2019 scores distribution')
        plt.xlabel('scores')
        plt.ylabel('numbers of students')
        plt.bar(x2, y2, color=colorlist2, alpha=0.8)
        plt.bar(user[2], y2, color='red', alpha=0.8)
        picid = 'pic3.jpg'
        plt.savefig(picid)
    else:
        text = "2019年数据缺失"
        font = ImageFont.truetype("font.ttf", 18)
        lines = []
        line = "2019年数据缺失"
        lines.append(line)
        line_height = font.getsize(text)[1]
        img_height = line_height * (2)
        im = Image.new("RGB", (200, img_height), (255, 255, 255))
        dr = ImageDraw.Draw(im)
        x, y = 5, 5
        for line in lines:
            dr.text((x, y), line, font=font, fill="#000000")
            y += line_height
        im.save("pic3.jpg")

    # 最后print出聚类边界和每一个分数的标签，并画出一个分段聚类彩色图
    # 2020.9

    ###下面进行学校推荐

    collegelist = []
    for d in datasetCOLLEGE:
        if d[0:2] == myinput[0:2]:
            collegelist.append(d)

    for e in newcen1:
        if e[0] == myinput[0] and e[1] == myinput[1]:
            abslist = []
            for i in range(2, 7):
                abslist.append(abs(myinput[2] - e[i]))
            # print(abslist)
            minnum = 1000
            minj = 0
            for j in range(5):
                if abslist[j] < minnum:
                    minj = j
                    minnum = abslist[j]
            target = e[minj + 2]
            str1 = "你属于第" + str(minj + 1) + "分段,"

    for e in collegelist:
        e.append(abs(e[3] - target))
    collegelist.sort(key=lambda x: x[4])
    str2 = "为你推荐的大学是："

    strlist = [str1, str2]

    for i in range(1, 7, 2):
        strlist.append(str(collegelist[i][2]) + ' ')

    # print(strlist)
    strsend = ''
    for items in strlist:
        strsend += items

    print(strsend)

    text = strsend
    font = ImageFont.truetype("font.ttf", 18)
    lines = []
    line = strsend
    lines.append(line)
    line_height = font.getsize(text)[1]
    img_height = line_height * (2)
    im = Image.new("RGB", (900, img_height), (255, 255, 255))
    dr = ImageDraw.Draw(im)
    x, y = 5, 5
    for line in lines:
        dr.text((x, y), line, font=font, fill="#000000")
        y += line_height
    im.save("commandtest.jpg")

Ejemplo n.º 6

0

Mostrar archivo

Archivo: main.py Proyecto: xujiangyu/HD-CNN

    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.show()

# Model
print('\nModel setup')
net = HD_CNN(args)
if cf.use_cuda:
    net.cuda()
    for i in range(args.num_superclasses):
        net.fines[i].cuda()
    cudnn.benchmark = True

function = Clustering(args)

# Pre-Training
def pretrain_coarse(epoch):
    net.share.train()
    net.coarse.train()

    param = list(net.share.parameters())+list(net.coarse.parameters())
    optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch)

    print('\n==> Epoch #%d, LR=%.4f' % (epoch, lr))
    for batch_idx, (inputs, targets) in enumerate(pretrainloader):
        if cf.use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU setting
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)