Esempio n. 1
0
def draw_origin_paths():
    rows = return_data_rows()
    for data in rows:
        one_taxi_polyline = data.polyline
        points_list = []
        for point in one_taxi_polyline:
            points_list.append([point.lat, point.long])
        plt.plot(*zip(*points_list), linewidth=0.5)
        plt.title("Raw-paths")
    # and finally save it with high resolution
    plt.savefig(Config.assets_path + 'origin-paths.png', dpi=500)
Esempio n. 2
0
def do_apriori(min_support):
    transactions = []
    result = return_data_rows()
    for row in result:
        one_trip_locations = []
        for location in row.polyline:
            x = str(location.lat)[:8] + "," + str(location.long)[:8]
            one_trip_locations.append(x)
        transactions.append(one_trip_locations)
    start_time = datetime.now()
    item_sets = list(
        apriori(transactions, min_support=min_support / 10, min_confidence=1))
    end_time = datetime.now()
    diff = (end_time - start_time)
    print(item_sets)
    print("apriori longs : ", diff.total_seconds(), "seconds")
Esempio n. 3
0
def do_eclat(min_support):
    transactions = []
    result = return_data_rows()
    for row in result:
        one_trip_locations = []
        for location in row.polyline:
            x = str(location.lat)[:8] + "," + str(location.long)[:8]
            one_trip_locations.append(x)
        numbers_tuple = tuple(one_trip_locations)
        transactions.append(numbers_tuple)
    start_time = datetime.now()
    rules = eclat(transactions, supp=min_support * 10)
    rules.sort(key=lambda x: x[1], reverse=True)
    end_time = datetime.now()
    print(rules)
    diff = (end_time - start_time)
    print("eclat longs : ", diff.total_seconds(), "seconds")
Esempio n. 4
0
def do_fp_growth(min_support):
    transactions = []
    result = return_data_rows()
    for row in result:
        one_trip_locations = []
        for location in row.polyline:
            x = str(location.lat)[:8] + "," + str(location.long)[:8]
            one_trip_locations.append(x)
        numbers_tuple = tuple(one_trip_locations)
        transactions.append(numbers_tuple)
    min_support = min_support * len(transactions)
    start_time = datetime.now()
    itemset = find_frequent_itemsets(transactions, minimum_support=min_support)
    end_time = datetime.now()
    for item in itemset:
        print(item)
    diff = (end_time - start_time)
    print("fp_growth longs : ", diff.total_seconds(), "seconds")
Esempio n. 5
0
def check_jump_data():
    result = return_data_rows()
    miss_data = []
    long_trip_dict = {}
    all_distance = []
    for row in result:
        polyline_locations = row.polyline
        for i in range(len(polyline_locations) - 1):
            distance = calculate_distance(polyline_locations[i],
                                          polyline_locations[i + 1])
            distance = distance * 1000
            all_distance.append(distance)
            if distance > 417:
                miss_data.append((row.trip_id, distance))
                if row.trip_id in long_trip_dict:
                    long_trip_dict[
                        row.trip_id] = long_trip_dict[row.trip_id] + 1
                else:
                    long_trip_dict[row.trip_id] = 1

    sorted_long_trip_dict = sorted(long_trip_dict.items(),
                                   key=operator.itemgetter(1))
    from pylab import rcParams
    rcParams['figure.figsize'] = 20, 5
    key_list = []
    value_list = []
    for i in sorted_long_trip_dict:
        key_list.append(i[0])
        value_list.append(i[1])
    objects = tuple(key_list)
    y_pos = np.arange(len(objects))
    performance = value_list
    plt.bar(y_pos, performance, align='center', alpha=0.5, linewidth=0.5)
    plt.xticks(y_pos)
    plt.ylabel('Count of long distance')
    plt.xlabel('Trip ID')
    plt.title('long Distance between two locations')
    plt.savefig('long_distance.png', dpi=500)
    plt.show()
Esempio n. 6
0
def main():
    result = return_data_rows()
    distance_list = []
    hour_and_min = []
    for row in result:
        polyline_locations = row.polyline
        for i in range(len(polyline_locations) - 1):
            distance = calculate_distance(polyline_locations[i],
                                          polyline_locations[i + 1])
            distance = distance * 1000
            distance_list.append(distance)
            trip_datetime = datetime.fromtimestamp(row.timestamp)
            hour_and_min.append(
                int(str(trip_datetime.hour) + str(trip_datetime.minute)))

    x = hour_and_min
    print(hour_and_min)
    y = distance_list

    r = Regression

    sum_x = r.find_sum(x, 1)
    sum_y = r.find_sum(y, 1)
    sum_x2 = r.find_sum(x, 2)
    sum_xy = r.find_mul_sum(x, y)

    res = []

    res = r.solve_equ(sum_x, sum_x2, sum_y, sum_xy)

    y_pred = r.predict(x, res)

    plt.scatter(x, y, color='red')
    plt.plot(x, y_pred, color='blue')
    plt.title('Distance per day time Regression')
    plt.xlabel('Time (from 00:00 to 24:00)')
    plt.ylabel('Distance')
    plt.show()