def computeNearestNeighbor(user, username):
    distances = []
    for instance in user:
        if instance != username:
            distance = tdist.edr(np.array(user[username]),
                                 np.array(user[instance]))
            #对豪斯多夫距离取对数加1的倒数
            # distance=1/(math.log10(distance)+1)
            distances.append((instance, distance))
    distances.sort(key=lambda artistTuple: artistTuple[1], reverse=False)
    return distances
Exemple #2
0
def compute_dists(trajs):
    pattern_num = len(trajs)
    sample_num = len(trajs[0])
    res_euc = np.zeros((pattern_num, sample_num))
    res_dtw = np.zeros((pattern_num, sample_num))
    res_sspd = np.zeros((pattern_num, sample_num))
    res_lcss = np.zeros((pattern_num, sample_num))
    res_edr = np.zeros((pattern_num, sample_num))
    res_erp = np.zeros((pattern_num, sample_num))
    res_fre = np.zeros((pattern_num, sample_num))
    res_hau = np.zeros((pattern_num, sample_num))
    res_hmm = np.zeros((pattern_num, sample_num))
    res_aes1 = np.zeros((pattern_num, sample_num))
    res_aes2 = np.zeros((pattern_num, sample_num))

    for i in range(pattern_num):
        print('-', i)  # patterns
        for j in range(sample_num):
            print(i, '-', j)  # samples
            sample1 = trajs[i][j][0]
            sample2 = trajs[i][j][1]
            if i > 2:
                norm_sample1 = resample(trajs[i][j][0])
                norm_sample2 = resample(trajs[i][j][1])
                res_euc[i, j] = eucdist(norm_sample1, norm_sample2)
            else:
                res_euc[i, j] = eucdist(sample1, sample2)

            res_dtw[i, j] = tdist.dtw(sample1, sample2)
            res_sspd[i, j] = tdist.sspd(sample1, sample2)
            res_lcss[i, j] = tdist.lcss(sample1, sample2, eps=0.6 / tlen)
            res_edr[i, j] = tdist.edr(sample1, sample2, eps=0.6 / tlen)
            res_erp[i, j] = tdist.erp(sample1,
                                      sample2,
                                      g=np.zeros(2, dtype=float))
            res_fre[i, j] = tdist.frechet(sample1, sample2)
            res_hau[i, j] = tdist.hausdorff(sample1, sample2)
            res_hmm[i, j] = hmmdist(sample1,
                                    sample2,
                                    M=np.array([1, 1, 1, 1, 1, 1]))
            res_aes1[i, j] = aeowd(sample1, sample2, loss_th=1e-4)
            res_aes2[i, j] = aebid(sample1, sample2, loss_th=1e-4)

    results = np.array([res_euc, res_dtw, res_sspd, res_lcss, res_edr, res_erp, \
                        res_fre, res_hau, res_hmm, res_aes1, res_aes2]) # (12,8,11)
    return results
    for c in range(c_num):
        print('--', c)
        idx = np.arange(0, train_num, dtype=np.int)[train_labels == (c + 1)]
        sample_set = np.array([train_set[i] for i in idx])
        train_traj = np.mean(sample_set, axis=0)  # (tlen, 2)

        for j in range(test_num):
            if j % 3000 == 0:
                print(j)
            test_traj = test_set[j]

            dm_tst_eucl[c, j] = eucdist(train_traj, test_traj)
            dm_tst_hau[c, j] = tdist.hausdorff(train_traj, test_traj)
            dm_tst_dtw[c, j] = tdist.dtw(train_traj, test_traj)
            dm_tst_sspd[c, j] = tdist.sspd(train_traj, test_traj)
            dm_tst_lcss[c, j] = tdist.lcss(train_traj, test_traj, eps=0.05)
            dm_tst_edr[c, j] = tdist.edr(train_traj, test_traj, eps=0.05)
            dm_tst_erp[c, j] = tdist.erp(train_traj,
                                         test_traj,
                                         g=np.zeros(2, dtype=float))
            dm_tst_fre[c, j] = tdist.frechet(train_traj, test_traj)

    np.savez_compressed('cross_c_dm_tst_eucl', dm_tst_eucl)
    np.savez_compressed('cross_c_dm_tst_dtw', dm_tst_dtw)
    np.savez_compressed('cross_c_dm_tst_sspd', dm_tst_sspd)
    np.savez_compressed('cross_c_dm_tst_lcss', dm_tst_lcss)
    np.savez_compressed('cross_c_dm_tst_edr', dm_tst_edr)
    np.savez_compressed('cross_c_dm_tst_erp', dm_tst_erp)
    np.savez_compressed('cross_c_dm_tst_fre', dm_tst_fre)
    np.savez_compressed('cross_c_dm_tst_hau', dm_tst_hau)
Exemple #4
0
def searchResult(query, train, query_num, user_k, qgram_size):
    logger = logging.getLogger('predict')
    logger.setLevel(logging.DEBUG)

    fh = logging.FileHandler('./log/%s' % query)
    fh.setLevel(logging.DEBUG)
    # create console handler with a higher log level
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    # create formatter and add it to the handlers
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    # loading the files:
    logger.info('---------------------------- Predict the top-k similar trajectories ----------------------------')
    qgram_tag = 'q_%d' % qgram_size
    query_path = './data/processed/%s.txt' % query
    train_path = './data/processed/%s.txt' % train
    candidate_traj_path = './data/interim/%s/%s/candidate_trajectory_%s.txt' % (query, train, qgram_tag)
    query_id_dict_path = './data/interim/%s/%s/query_id_dict_%s.txt' % (query, train, qgram_tag)
    rtree_id_dict_path = './data/interim/%s/rtree_id_dict_%s.txt' % (train, qgram_tag)
    result_path = './data/result/%s/%s/%s/' % (query, train, qgram_tag)
    stats_path = './data/stats/%s/%s/edr_count_%s.txt' % (query, train, qgram_tag)
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    f_stats = open(stats_path, 'w')
    f_stats.write('query edr_count candidate_count\n')

    candidateList = read_pickle(candidate_traj_path)  # candidateList => [[queryID_1,[(traID1, count1),(traID2, count2)]], [...]]
    logger.info('Load candidate trajectory: %s' % candidate_traj_path)

    query_id_to_key = read_pickle(query_id_dict_path)
    logger.info('Load query id dictionary: %s' % query_id_dict_path)

    rtree_id_to_key = read_pickle(rtree_id_dict_path)
    logger.info('Load rtree id dictionary: %s' % rtree_id_dict_path)

    trajectory_dict = load_trajectory(train_path)
    logger.info('Load train trajectory: %s' % train_path)

    real_query_dict = load_trajectory(query_path, n=query_num)
    logger.info('Load %d query trajectory: %s' % (query_num, query_path))

    query_key_to_id = swap_k_v(query_id_to_key)  # key: encoded key; value: trajectory id in string
    rtree_key_to_id = swap_k_v(rtree_id_to_key)  # key: encoded key; value: trajectory id in string

    edr_count_result=[]
    logger.info('Start finding top K')
    for index in range(len(candidateList)):  # start to calculate
        edr_count = 0
        k = min(user_k, len(candidateList[index][1]))
        topK = candidateList[index][1][0:k]
        queryID = candidateList[index][0]
        print('%d, query id: %d' % (index, queryID))
        pre_result = list(map(lambda x: x[0], topK))  # get the candidate trajectory IDs from top k
        # print(queryID)
        # print(pre_result)
        result_map = {}  # build a map to save the result
        for t in pre_result:
            edr_count += 1
             # result_map[t] = calculateEdr(trajectory_dict[rtree_id_dict[t]], real_query_dict[query_id_dict[queryID]])
            result_map[t] = tdist.edr(np.array(trajectory_dict[rtree_key_to_id[t]]), np.array(real_query_dict[query_key_to_id[queryID]]), "spherical")*max(len(trajectory_dict[rtree_key_to_id[t]]),len(real_query_dict[query_key_to_id[queryID]]))
        # print(result_map)
        fullCandidates = candidateList[index][1]  # list of [ID, count]
        i = k
        query_tra = real_query_dict[query_key_to_id[queryID]]
        lengthQ = len(query_tra)
        bestSoFar = result_map[topK[i-1][0]]
        while i < len(fullCandidates):
            candidate = fullCandidates[i]
            candidateID = candidate[0]
            tra_s = trajectory_dict[rtree_key_to_id[candidateID]]
            countValue = candidate[1]
            lengthS = len(tra_s)
            if countValue >= (max(lengthQ, lengthS) - (bestSoFar+1)*qgram_size):
                # pointedByCounts = filter(lambda e:e[1]==countValue, fullCandidates)
                # for s in pointedByCounts:
                realDist = tdist.edr(np.array(tra_s), np.array(query_tra), "spherical")*max(len(tra_s), len(query_tra))
                edr_count += 1
                if realDist < bestSoFar:
                    result_map[candidateID] = realDist
                    bestSoFar = sorted(result_map.items(), key=lambda kv: (kv[1], kv[0]))[k-1][1]  # update the best so far
            else:
                break
            i += 1
        finalResult = sorted(result_map.items(), key=lambda kv: (kv[1], kv[0]))[0:k]
        with open(result_path + "/query_%s.txt" % queryID, 'w') as f:
            f.write(query_key_to_id[queryID] + '\n')
            f.write('\n'.join('{} {}'.format(item[0], item[1]) for item in finalResult))
        f.close()
        gc.collect()
        edr_count_result.append("query_%s.txt %d %d" % (queryID, edr_count, len(candidateList[index][1])))
    logger.info('Finished')
    f_stats.write('\n'.join(edr_count_result))
    f_stats.close()
Exemple #5
0
def main(query, train, query_num):
    logger = logging.getLogger('build_truth')
    logger.setLevel(logging.DEBUG)

    fh = logging.FileHandler('./log/%s' % query)
    fh.setLevel(logging.DEBUG)
    # create console handler with a higher log level
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    logger.info(
        '---------------------------- Search for ground truth ----------------------------'
    )

    query_path = './data/processed/%s.txt' % query
    train_path = './data/processed/%s.txt' % train
    query_id_dict_path = './data/interim/%s/%s/query_id_dict.txt' % (query,
                                                                     train)
    rtree_id_dict_path = './data/interim/%s/rtree_id_dict.txt' % train
    result_path = './data/truth/%s/%s' % (query, train)
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    query_id_dict = read_pickle(query_id_dict_path)  # fakeID -> realID
    logger.info('Load query id dictionary: %s' % query_id_dict_path)

    train_id_dict = read_pickle(rtree_id_dict_path)
    logger.info('Load train id dictionary: %s' % rtree_id_dict_path)

    query_data = load_trajectory(query_path, n=query_num)
    logger.info('Load %d query trajectory: %s' % (query_num, query_path))

    train_data = load_trajectory(train_path)  # realID -> vectors
    logger.info('Load train trajectory: %s' % train_path)

    train_id_dict = {v: k for k, v in train_id_dict.items()}
    # print(train_id_dict)
    # result = []
    for query_id, query_trajectory in query_data.items():
        query_key = query_id_dict[query_id]
        if query_key in [36, 41]:
            print(query_key)
            distance_list = []
            train_key_list = []
            for train_id, train_trajectory in train_data.items():
                try:
                    train_key = train_id_dict[train_id]
                    distance = tdist.edr(
                        np.array(train_trajectory), np.array(query_trajectory),
                        "spherical") * max(len(train_trajectory),
                                           len(query_trajectory))
                    distance_list.append(distance)
                    train_key_list.append(train_key)
                except KeyError:
                    pass
            ix = sorted(range(len(distance_list)),
                        key=lambda k: distance_list[k])
            distance_list_sorted = [distance_list[i] for i in ix]
            train_key_sorted = [train_key_list[i] for i in ix]
            trajectory_result = [
                (e1, e2)
                for e1, e2 in zip(train_key_sorted, distance_list_sorted)
            ]
            with open(result_path + "/query_%s.txt" % query_key, 'w') as f:
                f.write(query_id + '\n')
                f.write('\n'.join('{} {}'.format(item[0], item[1])
                                  for item in trajectory_result))
            f.close()
            gc.collect()
    # result.append([query_key, trajectory_result])
    logger.info('Finished building ground truth')
    return
Exemple #6
0
                   [-122.41252, 37.79232], [-122.41316, 37.7951],
                   [-122.41392, 37.7989], [-122.41435, 37.80129],
                   [-122.41434, 37.80129]])
traj_B = np.array([[-122.39472, 37.77672], [-122.3946, 37.77679],
                   [-122.39314, 37.77846], [-122.39566, 37.78113],
                   [-122.39978, 37.78438], [-122.40301, 37.78708],
                   [-122.4048, 37.78666], [-122.40584, 37.78564],
                   [-122.40826, 37.78385], [-122.41061, 37.78321],
                   [-122.41252, 37.78299]])
traj_C = np.array([[-122.39542, 37.77665], [-122.3988, 37.77417],
                   [-122.41042, 37.76944], [-122.41459, 37.77016],
                   [-122.41462, 37.77013]])
traj_list = [traj_A, traj_B, traj_C]

import traj_dist.distance as tdist

# Simple distance
#
dist = tdist.edr(traj_A, traj_A)
print(dist)

# Pairwise distance

# pdist = tdist.pdist(traj_list,metric="sspd")
# print(pdist)

# Distance between two list of trajectories
#
# cdist = tdist.cdist(traj_list, traj_list,metric="sspd")
# print(cdist)
        for j in range(i+1,len(ends)):
            dist[i,j] = np.linalg.norm(ends.iloc[i] - ends.iloc[j])
            dist[j,i] = dist[i,j]

    with open('../data/euclidean_matrix_'+data+'.pickle','wb') as f:
        pickle.dump(dist,f,protocol=pickle.HIGHEST_PROTOCOL)

elif metric == 'EDR':
    trajs = df['POLYLINE']
    threshold = 0.1 # in km

    for i in range(len(trajs)-1):
        n_i = df['LENGTH'].iloc[i]
        for j in range(i+1,len(trajs)):
            n_j = df['LENGTH'].iloc[j]
            dist[i,j] = edr(trajs.iloc[i],trajs.iloc[j],
                            eps = threshold) * max(n_i,n_j)
            dist[j,i] = dist[i,j]

elif metric == 'DTW':
    trajs = df['POLYLINE']

    DTW = np.zeros((len(trajs),len(trajs)),dtype=np.float32)
    for i in range(len(trajs)-1):
        n_i = df['LENGTH'].iloc[i]
        for j in range(i+1,len(trajs)):
            n_j = df['LENGTH'].iloc[j]
            dist[i,j] = dtw(trajs.iloc[i],trajs.iloc[j])
            dist[j,i] = dist[i,j]

else:
    print('metric should be "EDR","DTW" or "euclidean"')