Ejemplo n.º 1
0
        ] * 10  #stores the sse metric for each number of clusters from 5 to 50
num_users = 100
numsse = 0
numclusters = 5  # starts at 5
max_iterations = 10
start_time = datetime.datetime.now()
while numclusters <= 50:  # compute SSE from num_clusters=5 to 50
    users = []  # users are the items of this example
    for i in range(num_users):
        user = createProfile()
        users.append(user)
    print(" inicializing kmeans...")
    cl = KMeansClustering(users, HDdistItems, HDequals)
    print(" executing...", numclusters)
    st = datetime.datetime.now()
    print(st)
    numclusters = numclusters
    solution = cl.HDgetclusters(numclusters, max_iterations)
    for i in range(numclusters):
        a = solution[i]
        print(util.HDcentroid(a), ",")
    st = datetime.datetime.now()

    sses[numsse] = HDcomputeSSE(solution, numclusters)
    numsse += 1
    numclusters += 5
end_time = datetime.datetime.now()
print("start_time:", start_time)
print("end_time:", end_time)
print("sses:", sses)
Ejemplo n.º 2
0
start_time = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
while numclusters <= 50:  # compute SSE from num_clusters=5 to 50
    supersol = 0  #supersolucion, distancias entre el clusters y los usuarios.
    users = []  # users are the items of this example
    for i in range(num_users):  #en el range el numero de usuarios
        user = createProfile()
        users.append(user)
    #x=0;
    print " inicializing kmeans..."
    cl = KMeansClustering(users, HDdistItems, HDequals)
    print " executing...", numclusters
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    print st
    numclusters = numclusters
    solution = cl.HDgetclusters(numclusters, max_iteraciones)
    for i in range(numclusters):
        a = solution[i]
        print util.HDcentroid(a), ","
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')

    sses[numsse] = HDcomputeSSE(solution, numclusters)
    numsse += 1
    numclusters += 5
ts = time.time()
end_time = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
print "start_time:", start_time
print "end_time:", end_time
print "sses:", sses