コード例 #1
0
def update_user_routeDistanceMatrix(user_id,
                                    data_feature,
                                    step1=100000,
                                    step2=100000,
                                    method='lcs',
                                    radius1=1000):
    ids = data_feature.keys()
    user_query = get_routeDistanceMatrix_db().find_one(
        {'$and': [{
            'user': user_id
        }, {
            'method': method
        }]})
    if user_query == None:
        user_disMat = {}
        for _id in ids:
            user_disMat[_id] = {}
        get_routeDistanceMatrix_db().insert({
            'user': user_id,
            'method': method,
            'disMat': user_disMat
        })
    else:
        user_disMat = user_query['disMat']
    a = 0

    # print(len(ids))
    for _id in ids:
        print(a)
        a += 1
        for key in ids:
            try:
                user_disMat[_id][key]
            except KeyError:
                # print('start calculation')
                dis = fullMatchDistance(data_feature[_id], data_feature[key],
                                        step1, step2, method, radius1)
                user_disMat[_id][key] = dis

    get_routeDistanceMatrix_db().update(
        {'$and': [{
            'user': user_id
        }, {
            'method': method
        }]}, {
            'user': user_id,
            'method': method,
            'disMat': user_disMat
        })
コード例 #2
0
def update_user_routeDistanceMatrix(user_id,
                                    data_feature,
                                    step1=100000,
                                    step2=100000,
                                    method='lcs',
                                    radius1=1000):
    ids = data_feature.keys()
    """
    user_query=get_routeDistanceMatrix_db().find_one({'$and':[{'user':user_id},{'method':method}]})
    if user_query==None:
        user_disMat={}
        for _id in ids:
            user_disMat[_id] = {}
        get_routeDistanceMatrix_db().insert({'user':user_id,'method':method,'disMat':user_disMat})
    else:
        user_disMat=user_query['disMat']
    """
    user_disMat = get_routeDistanceMatrix_db(user_id, method)

    a = 0

    # print(len(ids))
    for _id in ids:
        if a % 100 == 0:
            print "In update_user_routeDistanceMatrix, a = %d" % a
        a += 1
        for key in ids:
            try:
                user_disMat[_id][key]
                #print("found it")
            except KeyError:
                #print('Updating matrix for the trip ' + _id + '. Doing calculations.')
                dis = fullMatchDistance(data_feature[_id], data_feature[key],
                                        step1, step2, method, radius1)
                #user_disMat[_id] = {}
                if _id not in user_disMat:
                    user_disMat[_id] = {}
                user_disMat[_id][key] = dis
                #print('Update successful.')
                #print(user_disMat[_id])

    #get_routeDistanceMatrix_db().update({'$and':[{'user':user_id},{'method':method}]},{'user':user_id,'method':method,'disMat':user_disMat})
    print(type(user_disMat))
    user_disMat = update_routeDistanceMatrix_db(user_id, method, user_disMat)
    return user_disMat
コード例 #3
0
def kmedoids(data_feature, k, user_id, method='lcs'):
    '''
    kMedoids - PAM implemenation
    See more : http://en.wikipedia.org/wiki/K-medoids
    The most common realisation of k-medoid clustering is the Partitioning Around Medoids (PAM) algorithm and is as follows:[2]
    1. Initialize: randomly select k of the n data points as the medoids
    2. Associate each data point to the closest medoid. ("closest" here is defined using any valid distance metric, most commonly Euclidean distance, Manhattan distance or Minkowski distance)
    3. For each medoid m
        For each non-medoid data point o
            Swap m and o and compute the total cost of the configuration
    4. Select the configuration with the lowest cost.
    5. repeat steps 2 to 4 until there is no change in the medoid.
    '''

    disMat_user = get_routeDistanceMatrix_db().find_one(
        {'$and': [{
            'user': user_id
        }, {
            'method': method
        }]})['disMat']

    medoids_idx = random.sample([i for i in data_feature.keys()], k)

    pre_cost, medoids = totalCost(data_feature, disMat_user, medoids_idx)

    current_cost = pre_cost
    best_choice = []
    best_res = {}
    iter_count = 0

    while True:
        # print(iter_count)
        for m in medoids_idx:
            for item in medoids[m]:
                # NOTE: both m and item are idx!
                if item != m:
                    # Swap m and o - save the idx
                    idx = medoids_idx.index(m)
                    # This is m actually...
                    swap_temp = medoids_idx[idx]
                    medoids_idx[idx] = item
                    tmp_cost, tmp_medoids = totalCost(data_feature,
                                                      disMat_user, medoids_idx)
                    # Find the lowest cost
                    if tmp_cost < current_cost:
                        best_choice = list(medoids_idx)  # Make a copy
                        best_res = dict(tmp_medoids)  # Make a copy
                        current_cost = tmp_cost
                    # Re-swap the m and o
                    medoids_idx[idx] = swap_temp
        # Increment the counter
        iter_count += 1

        # print('current_cost: ', current_cost)
        # print('iter_count: ', iter_count)

        if best_choice == medoids_idx:
            # Done the clustering
            break

        # Update the cost and medoids
        if current_cost <= pre_cost:
            pre_cost = current_cost
            medoids = best_res
            medoids_idx = best_choice

    return (current_cost, best_choice, best_res)
コード例 #4
0
def kmedoids(data_feature, k, user_id,method='lcs'):
    '''
    kMedoids - PAM implemenation
    See more : http://en.wikipedia.org/wiki/K-medoids
    The most common realisation of k-medoid clustering is the Partitioning Around Medoids (PAM) algorithm and is as follows:[2]
    1. Initialize: randomly select k of the n data points as the medoids
    2. Associate each data point to the closest medoid. ("closest" here is defined using any valid distance metric, most commonly Euclidean distance, Manhattan distance or Minkowski distance)
    3. For each medoid m
        For each non-medoid data point o
            Swap m and o and compute the total cost of the configuration
    4. Select the configuration with the lowest cost.
    5. repeat steps 2 to 4 until there is no change in the medoid.
    '''
    
    if k >= len(data_feature):
        return (0, [], {})

    #disMat_user=get_routeDistanceMatrix_db().find_one({'$and':[{'user':user_id},{'method':method}]})['disMat']
    disMat_user = get_routeDistanceMatrix_db(user_id, method)
    #print(len(disMat_user))
    medoids_idx = random.sample([i for i in data_feature.keys()], k)

    pre_cost, medoids = totalCost(data_feature,disMat_user,medoids_idx)

    current_cost = pre_cost
    best_choice = []
    best_res = {}
    iter_count = 0
    #print("medoids idx")
    #print(medoids_idx)
    #print("medoids")
    #print(medoids)
    while True:

        for m in medoids_idx:
            #print("This is length of medoid_idx")
            #print(len(medoids_idx))
            for item in medoids[m]:
                # NOTE: both m and item are idx!
                if item != m:
                    # Swap m and o - save the idx
                    idx = medoids_idx.index(m)
                    # This is m actually...
                    swap_temp = medoids_idx[idx]
                    medoids_idx[idx] = item
                    tmp_cost, tmp_medoids = totalCost(data_feature,disMat_user,medoids_idx)
                    #print("inside here")
                    #print(len(medoids_idx))
                    # Find the lowest cost
                    if tmp_cost < current_cost:
                        best_choice = list(medoids_idx) # Make a copy
                        best_res = dict(tmp_medoids) 	# Make a copy
                        current_cost = tmp_cost
                    # Re-swap the m and o
                    medoids_idx[idx] = swap_temp
        #print("This is length of medoid_idx")
        #print(len(medoids_idx))
        # Increment the counter
        iter_count += 1

        # print('current_cost: ', current_cost)
        # print('iter_count: ', iter_count)

        if best_choice == medoids_idx:
            # Done the clustering
            break

        # Update the cost and medoids
        if current_cost <= pre_cost:
            pre_cost = current_cost
            medoids = best_res
            medoids_idx = best_choice

    return(current_cost, best_choice, best_res)