def update_user_routeDistanceMatrix(user_id, data_feature, step1=100000, step2=100000, method='lcs', radius1=1000): ids = data_feature.keys() user_query = get_routeDistanceMatrix_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]}) if user_query == None: user_disMat = {} for _id in ids: user_disMat[_id] = {} get_routeDistanceMatrix_db().insert({ 'user': user_id, 'method': method, 'disMat': user_disMat }) else: user_disMat = user_query['disMat'] a = 0 # print(len(ids)) for _id in ids: print(a) a += 1 for key in ids: try: user_disMat[_id][key] except KeyError: # print('start calculation') dis = fullMatchDistance(data_feature[_id], data_feature[key], step1, step2, method, radius1) user_disMat[_id][key] = dis get_routeDistanceMatrix_db().update( {'$and': [{ 'user': user_id }, { 'method': method }]}, { 'user': user_id, 'method': method, 'disMat': user_disMat })
def update_user_routeDistanceMatrix(user_id, data_feature, step1=100000, step2=100000, method='lcs', radius1=1000): ids = data_feature.keys() """ user_query=get_routeDistanceMatrix_db().find_one({'$and':[{'user':user_id},{'method':method}]}) if user_query==None: user_disMat={} for _id in ids: user_disMat[_id] = {} get_routeDistanceMatrix_db().insert({'user':user_id,'method':method,'disMat':user_disMat}) else: user_disMat=user_query['disMat'] """ user_disMat = get_routeDistanceMatrix_db(user_id, method) a = 0 # print(len(ids)) for _id in ids: if a % 100 == 0: print "In update_user_routeDistanceMatrix, a = %d" % a a += 1 for key in ids: try: user_disMat[_id][key] #print("found it") except KeyError: #print('Updating matrix for the trip ' + _id + '. Doing calculations.') dis = fullMatchDistance(data_feature[_id], data_feature[key], step1, step2, method, radius1) #user_disMat[_id] = {} if _id not in user_disMat: user_disMat[_id] = {} user_disMat[_id][key] = dis #print('Update successful.') #print(user_disMat[_id]) #get_routeDistanceMatrix_db().update({'$and':[{'user':user_id},{'method':method}]},{'user':user_id,'method':method,'disMat':user_disMat}) print(type(user_disMat)) user_disMat = update_routeDistanceMatrix_db(user_id, method, user_disMat) return user_disMat
def kmedoids(data_feature, k, user_id, method='lcs'): ''' kMedoids - PAM implemenation See more : http://en.wikipedia.org/wiki/K-medoids The most common realisation of k-medoid clustering is the Partitioning Around Medoids (PAM) algorithm and is as follows:[2] 1. Initialize: randomly select k of the n data points as the medoids 2. Associate each data point to the closest medoid. ("closest" here is defined using any valid distance metric, most commonly Euclidean distance, Manhattan distance or Minkowski distance) 3. For each medoid m For each non-medoid data point o Swap m and o and compute the total cost of the configuration 4. Select the configuration with the lowest cost. 5. repeat steps 2 to 4 until there is no change in the medoid. ''' disMat_user = get_routeDistanceMatrix_db().find_one( {'$and': [{ 'user': user_id }, { 'method': method }]})['disMat'] medoids_idx = random.sample([i for i in data_feature.keys()], k) pre_cost, medoids = totalCost(data_feature, disMat_user, medoids_idx) current_cost = pre_cost best_choice = [] best_res = {} iter_count = 0 while True: # print(iter_count) for m in medoids_idx: for item in medoids[m]: # NOTE: both m and item are idx! if item != m: # Swap m and o - save the idx idx = medoids_idx.index(m) # This is m actually... swap_temp = medoids_idx[idx] medoids_idx[idx] = item tmp_cost, tmp_medoids = totalCost(data_feature, disMat_user, medoids_idx) # Find the lowest cost if tmp_cost < current_cost: best_choice = list(medoids_idx) # Make a copy best_res = dict(tmp_medoids) # Make a copy current_cost = tmp_cost # Re-swap the m and o medoids_idx[idx] = swap_temp # Increment the counter iter_count += 1 # print('current_cost: ', current_cost) # print('iter_count: ', iter_count) if best_choice == medoids_idx: # Done the clustering break # Update the cost and medoids if current_cost <= pre_cost: pre_cost = current_cost medoids = best_res medoids_idx = best_choice return (current_cost, best_choice, best_res)
def kmedoids(data_feature, k, user_id,method='lcs'): ''' kMedoids - PAM implemenation See more : http://en.wikipedia.org/wiki/K-medoids The most common realisation of k-medoid clustering is the Partitioning Around Medoids (PAM) algorithm and is as follows:[2] 1. Initialize: randomly select k of the n data points as the medoids 2. Associate each data point to the closest medoid. ("closest" here is defined using any valid distance metric, most commonly Euclidean distance, Manhattan distance or Minkowski distance) 3. For each medoid m For each non-medoid data point o Swap m and o and compute the total cost of the configuration 4. Select the configuration with the lowest cost. 5. repeat steps 2 to 4 until there is no change in the medoid. ''' if k >= len(data_feature): return (0, [], {}) #disMat_user=get_routeDistanceMatrix_db().find_one({'$and':[{'user':user_id},{'method':method}]})['disMat'] disMat_user = get_routeDistanceMatrix_db(user_id, method) #print(len(disMat_user)) medoids_idx = random.sample([i for i in data_feature.keys()], k) pre_cost, medoids = totalCost(data_feature,disMat_user,medoids_idx) current_cost = pre_cost best_choice = [] best_res = {} iter_count = 0 #print("medoids idx") #print(medoids_idx) #print("medoids") #print(medoids) while True: for m in medoids_idx: #print("This is length of medoid_idx") #print(len(medoids_idx)) for item in medoids[m]: # NOTE: both m and item are idx! if item != m: # Swap m and o - save the idx idx = medoids_idx.index(m) # This is m actually... swap_temp = medoids_idx[idx] medoids_idx[idx] = item tmp_cost, tmp_medoids = totalCost(data_feature,disMat_user,medoids_idx) #print("inside here") #print(len(medoids_idx)) # Find the lowest cost if tmp_cost < current_cost: best_choice = list(medoids_idx) # Make a copy best_res = dict(tmp_medoids) # Make a copy current_cost = tmp_cost # Re-swap the m and o medoids_idx[idx] = swap_temp #print("This is length of medoid_idx") #print(len(medoids_idx)) # Increment the counter iter_count += 1 # print('current_cost: ', current_cost) # print('iter_count: ', iter_count) if best_choice == medoids_idx: # Done the clustering break # Update the cost and medoids if current_cost <= pre_cost: pre_cost = current_cost medoids = best_res medoids_idx = best_choice return(current_cost, best_choice, best_res)