コード例 #1
0
def class_by_time(svm_data, grid_2_id):
    print 'svm'
    trips = []
    labels = []
    for datas in svm_data:
        trip = datas[0]
        time = datas[1]
        data = []
        # 时间特征
        for i in time:
            data.append(int(i))

        p = random.randint(40, 70)

        (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(trip[0])
        (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(trip[int(len(trip) * float(p) / 100.0)])
        data.append(lat1)
        data.append(lon1)
        data.append(lat2)
        data.append(lon2)

        print data
        trips.append(data)
        labels.append(grid_2_id[trip[-1]])
    # print len(trips)
    # print trips

    lin_clf = svm.LinearSVC()
    lin_clf.fit(trips, labels)

    joblib.dump(lin_clf, "../../data/lin_clf.model")
    print 'finish svm'
    return lin_clf
コード例 #2
0
def compute_MT(A, M, grid_2_id, id_2_grid):
    print 'computing MT'
    MT = np.zeros((1584, 1584))
    sortlist = defaultdict(list)

    for i in id_2_grid:
        for j in id_2_grid:
            if i == j:
                continue
            x = id_2_grid[i]
            y = id_2_grid[j]
            (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(x)
            (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(y)
            sortlist[(abs(lat1 - lat2) + abs(lon1 - lon2))].append([i, j])

    # Mpow = np.eye(1584)
    Mpow = gnp.garray(np.eye(1584))
    M = gnp.garray(M)
    #print Mpow
    for i in sortlist:
        # print  i
        Mpow = Mpow.dot(M)
        Lde = int(i * 0.2)
        Mtemp = Mpow.dot(A[Lde])
        # np.dot(Mpow, A[Lde])
        # print 'finish'
        # print Mtemp
        for x in sortlist[i]:
            MT[x[0], x[1]] = Mtemp[x[0], x[1]]

    # print np.max(MT), np.unravel_index(MT.argmax(), MT.shape)
    #print MT
    return MT
コード例 #3
0
def subsyn(test_data, trip_data, test_des, M, MT, des, test_time, id_2_grid):
    print 'subsyn'

    idx = 0
    total_error = []
    total_km = 0
    gtotal_error = []
    gtotal_km = 0
    for datas in test_data:
        # 计算P(Tp)
        # Ptp = 100000000
        # pre = -1
        # for point in datas:
        #     if pre < 0:
        #         pre = point
        #         continue
        #     Ptp = Ptp * M[pre, point]
        #
        # 计算P(T^p|d \in n_j)
        Ptpnj = defaultdict(float)
        for j in des:
            if MT[datas[0], j] <= 0:
                continue
            try:
                Ptpnj[j] = MT[datas[-1], j] / MT[datas[0], j]
            except:
                print ''

        P = defaultdict(float)
        sum = 0.0
        for j in Ptpnj:
            P[j] = Ptpnj[j] * des[j]
            sum += P[j]
        # 改进subsyn
        gP = defaultdict(float)
        tmp_time = []
        for i in test_time[idx]:
            tmp_time.append(int(i))
        (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(id_2_grid[datas[0]])
        (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(id_2_grid[datas[-1]])
        predictdata = tmp_time + [lat1, lon1, lat2, lon2]
        label = svm_time.predict_by_svm(data=[predictdata])
        for j in Ptpnj:
            gP[j] = Ptpnj[j] * des[j] * 0.7
            if label[0] == j:
                gP[j] += 0.3
        gP = sorted(gP.iteritems(), key=lambda (k, v): (v, k), reverse=True)
        P = sorted(P.iteritems(), key=lambda (k, v): (v, k), reverse=True)
        Q = []
        for k, v in P:
            Q.append(int(k))

        gQ = []
        for k, v in gP:
            gQ.append(int(k))

        # print test_des[idx], Q[:5]
        # 计算涵盖率
        yes_list = []
        # print test_des[idx], Q[:5]
        if test_des[idx] in Q[:1]:
            yes_list.append(1)
        else:
            yes_list.append(0)

        if test_des[idx] in Q[:3]:
            yes_list.append(1)
        else:
            yes_list.append(0)

        if test_des[idx] in Q[:5]:
            yes_list.append(1)
        else:
            yes_list.append(0)
        # print data, max_ID, max_P, test_des[idx]
        total_error.append(yes_list)
        # 计算误差曼哈顿距离
        # print id_2_grid[int(test_des[idx])]
        (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(id_2_grid[int(test_des[idx])])
        (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(id_2_grid[Q[0]])
        total_km += abs(lat1 - lat2) + abs(lon1 - lon2)

        # 计算改进subsyn
        # print test_des[idx], Q[:5]
        # 计算涵盖率
        yes_list = []
        # print test_des[idx], Q[:5]
        if test_des[idx] in gQ[:1]:
            yes_list.append(1)
        else:
            yes_list.append(0)

        if test_des[idx] in gQ[:3]:
            yes_list.append(1)
        else:
            yes_list.append(0)

        if test_des[idx] in gQ[:5]:
            yes_list.append(1)
        else:
            yes_list.append(0)
        # print data, max_ID, max_P, test_des[idx]
        gtotal_error.append(yes_list)
        # 计算误差曼哈顿距离
        # print id_2_grid[int(test_des[idx])]
        (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(id_2_grid[int(test_des[idx])])
        (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(id_2_grid[gQ[0]])
        gtotal_km += abs(lat1 - lat2) + abs(lon1 - lon2)


        idx += 1

    P1 = P3 = P5 = 0.0
    for data in total_error:
        P1 += data[0]
        P3 += data[1]
        P5 += data[2]
    print P1, P3, P5
    print P1 / len(test_des), P3 / len(test_des), P5 / len(test_des)
    print total_km * 1.0 / len(test_des)

    P1 = P3 = P5 = 0.0
    for data in gtotal_error:
        P1 += data[0]
        P3 += data[1]
        P5 += data[2]
    print P1, P3, P5
    print P1 / len(test_des), P3 / len(test_des), P5 / len(test_des)
    print gtotal_km * 1.0 / len(test_des)
コード例 #4
0
def ZMDB(test_data, trip_data, test_des, id_2_grid):
    print 'ZMDB'
    # print test_des
    total_error = []
    total_km = 0
    try:
        idx = 0
        for data in test_data:
            des_num = defaultdict(int)
            num = 0
            # 计算  每个查询轨迹:满足目的地为n^j 且 查询轨迹匹配trip_data的数目:des_num 总的匹配轨迹为num
            for trip in trip_data:
                tmp = testINtrip(data, trip)
                num += tmp
                des_num[trip[-1]] += tmp

            if num <= 0:
                idx += 1
                continue
            # print num
            max_P = -1
            max_ID = -1
            # 对每个目的地而言, P(n^j | T^end(np.eye(1584))p)
            P = defaultdict(float)
            for i in des_num:
                P[i] = des_num[i] * 1.0 / num
                if max_P < P[i]:
                    max_P = P[i]
                    max_ID = i

            P = sorted(P.iteritems(), key=lambda (k, v): (v, k), reverse=True)
            Q = []
            for k, v in P:
                Q.append(int(k))

            # 计算涵盖率
            yes_list = []
            # print test_des[idx], Q[:5]
            if test_des[idx] in Q[:1]:
                yes_list.append(1)
            else:
                yes_list.append(0)

            if test_des[idx] in Q[:3]:
                yes_list.append(1)
            else:
                yes_list.append(0)

            if test_des[idx] in Q[:5]:
                yes_list.append(1)
            else:
                yes_list.append(0)
            # print data, max_ID, max_P, test_des[idx]
            total_error.append(yes_list)
            # 计算误差曼哈顿距离
            # print test_des[idx]
            # print id_2_grid[int(test_des[idx])]
            (lat1, lon1, lat_length, lon_length) = gh._decode_c2i(id_2_grid[int(test_des[idx])])
            (lat2, lon2, lat_length, lon_length) = gh._decode_c2i(id_2_grid[Q[0]])
            total_km += abs(lat1 - lat2) + abs(lon1 - lon2)
            idx += 1
    except:
        # print test_des[idx], Q[0]
        s = sys.exc_info()
        print "Error '%s' happened on line %d" % (s[1], s[2].tb_lineno)
    P1 = P3 = P5 = 0.0
    for data in total_error:
        P1 += data[0]
        P3 += data[1]
        P5 += data[2]
    print P1, P3, P5
    print P1 / len(test_des), P3 / len(test_des), P5 / len(test_des)
    print total_km * 1.0 / len(test_des)