예제 #1
0
    def populate(self):
        n_clusters = self.n_cluster
        # mapping the sample cluster result to idx
        for i in range(len(self.sampleIdx)):
            idx = self.m_sample_order_idx[i]
            self.idx[idx] = self.sampleIdx[i]

        # create an unsampleSet that removes all samples used in kmeans
        unSampleIdx = set(range(self.dataSize))
        for sampleIdx in self.m_sample_order_idx:
            unSampleIdx.remove(sampleIdx)

        # Check how many available spots in a cluster
        availableSpot = [0 for _ in range(n_clusters)]
        clusterList = super(balanced_kmeans, self).getClusters()

        for i in range(n_clusters):
            availableSpot[i] = max(self.minVolume - len(clusterList[i]), 0)

        centroids = super(balanced_kmeans, self).getCentroids()
        for sampleIdx in unSampleIdx:
            instance = self.m_order[sampleIdx]
            minDistance = 1000000
            clusterIdx = -1
            for i in range(n_clusters):
                if availableSpot[i] > 0:
                    centroid = centroids[i]
                    if Calculator.calDistance(instance,
                                              centroid) < minDistance:
                        minDistance = Calculator.calDistance(
                            instance, centroid)
                        clusterIdx = i

            if clusterIdx > -1:
                availableSpot[clusterIdx] -= 1
                self.idx[sampleIdx] = clusterIdx
            else:
                for i in range(n_clusters):
                    centroid = centroids[i]
                    if Calculator.calDistance(instance,
                                              centroid) < minDistance:
                        minDistance = Calculator.calDistance(
                            instance, centroid)
                        clusterIdx = i
                if clusterIdx > -1:
                    availableSpot[clusterIdx] -= 1
                    self.idx[sampleIdx] = clusterIdx
                else:
                    print("Warning")
예제 #2
0
 def findNearestCentroid(self, instance, centroids):
     minDist = 99999999
     for i in range(len(centroids)):
         distance = Calculator.calDistance(instance, centroids[i])
         if minDist > distance:
             minDist = distance
             index = i
     return index
예제 #3
0
 def execute(self):
     super(balanced_kmeans, self).execute()
     g = time.time()
     self.populate()
     self.refine()
     objValue = Calculator.calObjective(self.quantityTopic,
                                        self.quantityInvoice,
                                        self.w_location, self.d_location,
                                        self.c_location, self.idx)
     return objValue
예제 #4
0
    def refine(self):
        '''
        move a point from its current cluster to a nearer one guarrenting the balanced constraints
        '''
        '''
        Refinement: move points between clusters in order to low total distance
            input:
                m_orders: provide features of points
                clusterList: storage points for each clusters
                m_centroids: provide representatives for clusters
            output:
                clusterList: new clusters after refining
        '''
        epsilon = 0.1
        n_clusters = self.n_cluster
        m_orders = self.m_order
        minVolume = self.minVolume
        centroids = super(balanced_kmeans, self).getCentroids()
        clusterList = self.getClusters()

        while True:
            numCluster = [len(clusterList[i]) for i in range(n_clusters)]
            #print(numCluster)

            for clusterIdx in range(n_clusters):
                if numCluster[clusterIdx] > minVolume:
                    for j in range(numCluster[clusterIdx]):
                        if j >= len(clusterList[clusterIdx]): break
                        instanceIdx = clusterList[clusterIdx][j]
                        bestIndex = self.findNearestCentroid(
                            m_orders[instanceIdx], centroids)
                        if clusterIdx != bestIndex:
                            self.idx[instanceIdx] = bestIndex
                            #print('idx',instanceIdx,'original',clusterIdx,'after',bestIndex)
                            numCluster[bestIndex] += 1
                            numCluster[clusterIdx] -= 1
                            if numCluster[clusterIdx] <= minVolume:
                                break

            clusterList = self.getClusters()
            numCluster = [len(clusterList[i]) for i in range(n_clusters)]
            #print(numCluster)
            newCentroids = self.updateCentroids_Refine(m_orders, clusterList)
            print(
                'Objective value after refining:',
                Calculator.calObjective(self.quantityTopic,
                                        self.quantityInvoice, self.w_location,
                                        self.d_location, self.c_location,
                                        self.idx))
            #print('obj',self.total_obj())
            if np.linalg.norm(centroids - newCentroids) <= epsilon:
                break
            centroids = newCentroids
        self.centroids = centroids
예제 #5
0
 def assignCluster(self):
     '''
         Assign each instance to a cluster
         :param None
         :return distortionValue
     '''
     distortion = np.zeros(self.sampleSize)
     for i in range(self.sampleSize):
         min_distance = 99999
         for k in range(self.n_cluster):
             d = Calculator.calDistance(self.m_sample_orders[i],
                                        self.centroids[k])
             if d < min_distance:
                 min_distance = d
                 distortion[i] = min_distance**2
                 self.sampleIdx[i] = k
     distortionValue = sum(distortion)
     return distortionValue
예제 #6
0
 def getObj(self):
     distortion = 0
     for i in range(self.sampleSize):
         distortion += Calculator.calDistance(
             self.m_sample_orders[i], self.centroids[self.sampleIdx[i]])
     return distortion
예제 #7
0
 def total_obj(self):
     distortion = 0
     for i in range(self.dataSize):
         distortion += Calculator.calDistance(self.m_order[i],
                                              self.centroids[self.idx[i]])
     return distortion
예제 #8
0
dataSet = dataSet(fileName)
m_order = dataSet.get_order()
topicList = dataSet.get_topicList()
d_location = dataSet.get_dLocation()
w_location = dataSet.get_wLocation()
c_location = dataSet.get_cLocation()
nearWarehouse = dataSet.get_sortedDistanceIndex()
skuKeeping = dataSet.get_skuKeeping()
invoiceList = dataSet.get_quantity()

_alpha = [i*0.01 for i in range(101)]

for minVolume in [500,600,700,800]:
    _obj = []
    for alpha in _alpha:
        Calculator.setAlpha(alpha)
        km = balanced_kmeans(m_order = m_order, quantityTopic = topicList, quantityInvoice = invoiceList, w_location = w_location, d_location = d_location, c_location = c_location, nearWarehouse = nearWarehouse, n_clusters = 20,minVolume = minVolume)
        objValue = km.execute()
        _obj.append(objValue)
        print('m =%d | α: %f Cost:%f' % (minVolume,alpha,objValue))
    plt.plot(_alpha,_obj, label='m = %d' % minVolume)
plt.xlim((0, 1))
plt.xlabel('α')
plt.ylabel('Cost')
plt.legend()
plt.show()

'''
clusterList = km.getClusters()
centroidList = km.getCentroids()
idx = km.getIdx()