Esempio n. 1
0
def cluster(inputs, number_clusters=12, norm=2, time_limit=300, mip_gap=0.0):
    """
    """
    # Determine time steps per day
    len_day = int(inputs.shape[1] / 365)

    # Manipulate inputs
    # Initialize arrays
    inputsTransformed = []
    inputsScaled = []
    inputsScaledTransformed = []

    # Fill and reshape
    # Scaling to values between 0 and 1, thus all inputs shall have the same
    # weight and will be clustered equally in terms of quality
    for i in range(inputs.shape[0]):
        vals = inputs[i, :]
        temp = (vals - np.min(vals)) / (np.max(vals) - np.min(vals))
        inputsScaled.append(temp)
        inputsScaledTransformed.append(temp.reshape((len_day, 365), order="F"))
        inputsTransformed.append(vals.reshape((len_day, 365), order="F"))

    # Put the scaled and reshaped inputs together
    L = np.concatenate(tuple(inputsScaledTransformed))

    # Compute distances
    d = _distances(L, norm)

    # Execute optimization model
    (y, z, obj) = k_medoids.k_medoids(d, number_clusters, time_limit, mip_gap)

    # Retain typical days
    nc = np.zeros_like(y)
    typicalDays = []

    # nc contains how many days are there in each cluster
    nc = []
    for i in xrange(len(y)):
        temp = np.sum(z[i, :])
        if temp > 0:
            nc.append(temp)
            typicalDays.append([ins[:, i] for ins in inputsTransformed])

    typicalDays = np.array(typicalDays)
    nc = np.array(nc, dtype="int")
    nc_cumsum = np.cumsum(nc) * len_day

    # Construct (yearly) load curves
    # ub = upper bound, lb = lower bound
    clustered = np.zeros_like(inputs)
    for i in xrange(len(nc)):
        if i == 0:
            lb = 0
        else:
            lb = nc_cumsum[i - 1]
        ub = nc_cumsum[i]

        for j in xrange(len(inputsTransformed)):
            clustered[j, lb:ub] = np.tile(typicalDays[i][j], nc[i])

    # Scaling to preserve original demands
    sums_inputs = [np.sum(inputs[j, :]) for j in range(inputs.shape[0])]
    scaled = np.array(
        [nc[day] * typicalDays[day, :, :] for day in range(number_clusters)])
    sums_scaled = [np.sum(scaled[:, j, :]) for j in range(inputs.shape[0])]
    scaling_factors = [
        sums_inputs[j] / sums_scaled[j] for j in range(inputs.shape[0])
    ]
    scaled_typ_days = [
        scaling_factors[j] * typicalDays[:, j, :]
        for j in range(inputs.shape[0])
    ]

    return (scaled_typ_days, nc, z)
Esempio n. 2
0
def cluster(inputs, number_clusters=12, norm=2, time_limit=300, mip_gap=0.0):
    """
    Cluster a set of inputs into clusters by solving a k-medoid problem.
    
    Parameters
    ----------
    inputs : 2-dimensional array
        First dimension: Number of different input types.
        Second dimension: Values for each time step of interes.
    number_clusters : integer, optional
        How many clusters shall be computed?
    norm : integer, optional
        Compute the distance according to this norm. 2 is the standard
        Euklidean-norm.
    time_limit : integer, optional
        Time limit for the optimization in seconds
    mip_gap : float, optional
        Optimality tolerance (0: proven global optimum)
    
    Returns
    -------
    scaled_typ_days : 
        Scaled typical demand days. The scaling is based on the annual demands.
    nc : array_like
        Weighting factors of each cluster
    z : 2-dimensional array
        Mapping of each day to the clusters
    """
    # Determine time steps per day
    len_day = int(inputs.shape[1] / 365)
    
    # Manipulate inputs
    # Initialize arrays
    inputsTransformed = []
    inputsScaled = []
    inputsScaledTransformed = []
    
    # Fill and reshape
    # Scaling to values between 0 and 1, thus all inputs shall have the same
    # weight and will be clustered equally in terms of quality 
    for i in range(inputs.shape[0]):
        vals = inputs[i,:]
        temp = (vals - np.min(vals)) / (np.max(vals) - np.min(vals))
        inputsScaled.append(temp)
        inputsScaledTransformed.append(temp.reshape((len_day, 365), order="F"))
        inputsTransformed.append(vals.reshape((len_day, 365), order="F"))

    # Put the scaled and reshaped inputs together
    L = np.concatenate(tuple(inputsScaledTransformed))

    # Compute distances
    d = _distances(L, norm)

    # Execute optimization model
    (y, z, obj) = k_medoids.k_medoids(d, number_clusters, time_limit, mip_gap)
    
    # Section 2.3 and retain typical days
    nc = np.zeros(number_clusters)
    typicalDays = []

    # nc contains how many days are there in each cluster
    nc = []
    for i in xrange(len(y)):
        temp = np.sum(z[i,:])
        if temp > 0:
            nc.append(temp)
            typicalDays.append([ins[:,i] for ins in inputsTransformed])

    typicalDays = np.array(typicalDays)
    nc = np.array(nc, dtype="int")
    nc_cumsum = np.cumsum(nc) * len_day

    # Construct (yearly) load curves
    # ub = upper bound, lb = lower bound
    clustered = np.zeros_like(inputs)
    for i in xrange(len(nc)):
        if i == 0:
            lb = 0
        else:
            lb = nc_cumsum[i-1]
        ub = nc_cumsum[i]
        
        for j in xrange(len(inputsTransformed)):
            clustered[j, lb:ub] = np.tile(typicalDays[i][j], nc[i])

    # Scaling to preserve original demands
    sums_inputs = [np.sum(inputs[j,:]) for j in range(inputs.shape[0])]
    scaled = np.array([nc[day] * typicalDays[day,:,:] 
                       for day in range(number_clusters)])
    sums_scaled = [np.sum(scaled[:,j,:]) for j in range(inputs.shape[0])]
    scaling_factors = [sums_inputs[j] / sums_scaled[j] 
                       for j in range(inputs.shape[0])]
    scaled_typ_days = [scaling_factors[j] * typicalDays[:,j,:]
                       for j in range(inputs.shape[0])]
    
    return (scaled_typ_days, nc, z)
Esempio n. 3
0
import k_medoids as km
'''
finput_user_similarity = "Data/user_similarity_matrix"
finput_cluster_number = 200
foutput_user_cluster_set = "Data/user_cluster_set"
'''

if (__name__ == '__main__'):
    # data path
    finput_user_similarity = sys.argv[1]
    finput_cluster_number = int(sys.argv[2])
    foutput_user_cluster_set = sys.argv[3]

    # read into user similarity matrix
    user_similarity_matrix = rw.readffile(finput_user_similarity)

    # k-medoids
    user_cluster_set = km.k_medoids(user_similarity_matrix.values,
                                    K=finput_cluster_number,
                                    max_iterations=20)
    print("\ndone!")

    rw.write2file(user_cluster_set, foutput_user_cluster_set)
    print("file saved done!")

    print("top 20% of user cluster:")
    length = []
    for lst in user_cluster_set:
        length.append(len(lst))
    length.sort(reverse=True)
    print(length[0:int(len(length) * 0.2)])