예제 #1
0
def filter_errorneous_distance(points, check_speed=60):
    '''Filter out points with unreasonably fast speeds where next point is closer
       than erroneous point'''
    points, points_copy = itertools.tee(points)
    next(points_copy)  # push ahead for lookup

    last_p = None
    for p in points:
        next_p = next(points_copy)

        # do not test first point but save for testing next point
        if not last_p:
            last_p = p
            yield p

        # find the distance and time passed since last point collected
        distance_from_last_point = tools.pythagoras((last_p['easting'], last_p['northing']),
                                                    (p['easting'], p['northing']))
        seconds_since_last_point = (p['timestamp'] - last_p['timestamp']).total_seconds()

        # toss the point if the speed is greater than `check_speed` and the distance between
        # the previous and next point is less than the distance from the last point to this one
        if distance_from_last_point and seconds_since_last_point:
            kph_since_last_point = (distance_from_last_point / seconds_since_last_point) * 3.6
            distance_between_adjacent_points = tools.pythagoras((last_p['easting'], last_p['northing']),
                                                                (next_p['easting'], next_p['northing']))
            if (kph_since_last_point >= check_speed and 
                distance_between_adjacent_points < distance_from_last_point):
                continue

        last_p = p
        yield p
예제 #2
0
def metro_buffer(stations, point, distance):
    '''Return a boolean indicating whether a point is within a specified distance of
       of a dictionary of metro stations'''
    for station in stations:
        if tools.pythagoras(station, point) <= distance:
            return True, station
    return False, None
예제 #3
0
def distance_speed(trip_group):
    trip_distance = 0.
    last_point = None

    # test for the specific case of a single point being attached to a
    # missing trip <250 m
    if len(trip_group) == 2:
        notes = [p['note'] for p in trip_group]
        if 'missing trip - less than 250m' in notes and 'single point' in notes:
            for p in trip_group:
                p['distance'], p['trip_distance'], p['avg_speed'] = 0, 0, 0
            return trip_group

    for idx, p in enumerate(trip_group):
        point = (p['easting'], p['northing'])
        if idx == 0:
            p['distance'], p['trip_distance'], p['avg_speed'] = 0, 0, 0
            last_point = point
        elif last_point:
            p['distance'] = tools.pythagoras(last_point, point)
            trip_distance += p['distance']
            p['trip_distance'] = trip_distance
            if p['break_period'] > 0:
                p['avg_speed'] = p['distance'] / p['break_period']
            else:
                p['avg_speed'] = trip_group[idx-1]['avg_speed']
        if p['note'] != 'missing trip - less than 250m':
            last_point = point
    return trip_group
예제 #4
0
def find_metro_transfers(stations, segment_groups, buffer_m):
    '''Create a list of tuples containing two consecutive segment numbers. Test the last (end) point
        of the first segment and the first (start) point of the second segment to identify a transfer'''
    # create a list of tuples container pairs of overlapping segment IDs in order to test
    # for missing underground trips between each
    potential_transfers, last_segment_num = [], None
    for segment_num in segment_groups:
        if last_segment_num:
            potential_transfers.append((last_segment_num, segment_num))
        last_segment_num = segment_num

    # create a list of transfers found by intersecting the last and first segments
    # with each available metro station
    found_transfers = []
    for pt in potential_transfers:
        seg1_num, seg2_num = pt
        segment1, segment2 = segment_groups[seg1_num], segment_groups[seg2_num]
        segment1_end_p = (segment1[-1]['easting'], segment1[-1]['northing'])
        segment2_start_p = (segment2[0]['easting'], segment2[0]['northing'])

        intersect1, station1 = metro_buffer(stations, segment1_end_p, buffer_m)
        intersect2, station2 = metro_buffer(stations, segment2_start_p, buffer_m)

        # check for transfer and ensure it is not at same station
        if intersect1 and intersect2 and station1 != station2:
            # test that metro trip does not take longer than 80 minutes between stops
            # and that the user is travelling at least 0.1m/s on average
            interval = ((segment2[0]['timestamp'] - segment1[-1]['timestamp']).total_seconds())
            distance = tools.pythagoras(segment1_end_p, segment2_start_p)
            segment_speed = distance / interval
            if interval < 4800 and segment_speed > 0.1:
                segment_groups = labels.metro(segment_groups, pt)
                found_transfers.append(pt)

    # merge tuples with overlapping transfers to a single trip
    transfers = []
    for ft in found_transfers:
        # test whether first transfer num is included in the last found transfer
        if transfers and ft[0] in transfers[-1]:
            transfers[-1].append(ft[1])
        else:
            transfers.append(list(ft))
    transfers = [tuple(t) for t in transfers]

    # link segments that have be indentified as having a metro transfer
    counter = 0
    linked_trips = {}
    transfer_end_ids = [t[1] for t in transfers]
    for num, segments in segment_groups.items():
        # append to previous segment if indentified as a transfer
        if num in transfer_end_ids:
            linked_trips[counter].extend(segments)
            for segment in linked_trips[counter]:
                segment['note'] = 'trip with metro transfer'
        # otherwise create a new trip
        else:
            counter += 1
            linked_trips[counter] = segments
    return linked_trips
예제 #5
0
def summarize(rows):
    '''Condense trip to information from first and last GPS point and add attribute information'''
    # group points into dictionaries by trip id
    trips, group, last_trip_id = {}, [], 1
    for row in rows:
        trip_id = row['trip']
        if trip_id == last_trip_id:
            group.append(row)
        else:
            if group:
                trips[last_trip_id] = distance_speed(group)
                group = [row]
            last_trip_id = trip_id

    summaries = {}
    for num, trip in trips.items():
        labels = list(set([p['note'] for p in trip]))
        labels = labeling_hierarchy(labels)
        assert len(labels) == 1
        c = trip_codes[labels[0]]

        start_pt = trip[0]
        end_pt = trip[-1]

        merge_codes = set()
        for segment in trip:
            for mcode in segment['merge_codes']:
                merge_codes.add(mcode)

        direct_distance = tools.pythagoras((start_pt['easting'], start_pt['northing']),
                                           (end_pt['easting'], end_pt['northing']))

        if end_pt['trip_distance'] > 250 and c == 103:
            c = 1
        elif end_pt['trip_distance'] == 0:
            c = 201
        elif end_pt['trip_distance'] < 250:
            c = 202

        outrow = {
            'olat': start_pt['latitude'],
            'olon': start_pt['longitude'],
            'dlat': end_pt['latitude'],
            'dlon': end_pt['longitude'],
            'trip_id': num,
            'trip_code': c,
            'start': start_pt['timestamp'],
            'end': end_pt['timestamp'],
            'direct_distance': direct_distance,
            'cumulative_distance': end_pt['trip_distance'],
            'merge_codes': ', '.join(merge_codes)
        }

        summaries[num] = outrow

        for p in trip:
            p['trip_code'] = c

    return trips, summaries
예제 #6
0
def infer_missing_trips(stations, linked_trips):
    '''Determines the missing distance and period between each trip; key is correlated to linked_trips
       where the missing trip key indicates the gap before the linked trip with the same key'''
    missing_trips = {}
    prior_trip = None
    for num, trip in linked_trips.items():
        if not prior_trip:
            prior_trip = trip
            continue

        prior_point = (prior_trip[-1]['easting'], prior_trip[-1]['northing'])
        first_point = (trip[0]['easting'], trip[0]['northing'])
        spatial_gap = tools.pythagoras(prior_point, first_point)
        prior_timestamp = prior_trip[-1]['timestamp']
        timestamp = trip[0]['timestamp']
        period = float((timestamp - prior_timestamp).seconds)

        missing = {
            'id': prior_trip[-1]['id'],
            'latitude': prior_trip[-1]['latitude'],
            'longitude': prior_trip[-1]['longitude'],
            'easting': prior_trip[-1]['easting'],
            'northing': prior_trip[-1]['northing'],
            'timestamp': prior_timestamp,
            'next_time': timestamp,
            'distance': spatial_gap,
            'break_period': period,
            'note': '',
            'merge_codes': []
        }

        if spatial_gap < 250:
            missing['note'] = 'missing trip - less than 250m'
            missing['merge_codes'].append('missing trip - less than 250m')
            missing_trips[num] = missing
        else:
            # check for missing trips to/from a metro
            intersect1, station1 = metro_buffer(stations, prior_point, 300)
            intersect2, station2 = metro_buffer(stations, first_point, 300)
            if intersect1 and intersect2 and station1 != station2:
                missing['note'] = 'missing trip - metro'
                missing['merge_codes'].append('missing trip - metro')
                missing_trips[num] = missing

            # next, check if missing trip is below the cold start threshold
            elif spatial_gap <= 750:
                missing['note'] = 'cold start'
                missing['prev_time'] = prior_timestamp
                missing['timestamp'] = timestamp
                missing['merge_codes'].append('cold start')
                trip.insert(0, missing)
            # if no criteria is match, mark as a vanilla missing trip
            else:
                missing['note'] = 'missing trip'
                missing['merge_codes'].append('missing trip')
                missing_trips[num] = missing
        prior_trip = trip
    return missing_trips
예제 #7
0
def filter_single_points(linked_trips):
    '''Detects single points and attaches to nearest to/from trip within 20 minute
       time period and 150 meter radius'''

    test_trips = tools.quick_deepcopy(linked_trips)
    cleaned_trips = {}
    offset = 0
    max_time = 20
    max_dist = 150
    for idx, (num, trip) in enumerate(test_trips.items()):
        # check for single points that have been isolated from other segments and
        # calculate the time since the previous trips and until the next trip
        if (idx != 0) and (num + 1 in linked_trips) and (num - 1 in linked_trips) and (len(trip) == 1):
            # skip first and last points
            point = trip[0]
            point['note'] = 'single point'
            point_loc = (point['easting'], point['northing'])
            point_dt = point['timestamp']

            last_trip_num = num - 1
            last_trip_end = linked_trips[last_trip_num][-1]
            last_trip_pt = (last_trip_end['easting'], last_trip_end['northing'])
            last_trip_dist = tools.pythagoras(last_trip_pt, point_loc)

            next_trip_num = num + 1
            next_trip_start = linked_trips[next_trip_num][0]
            next_trip_pt = (next_trip_start['easting'], next_trip_start['northing'])
            next_trip_dist = tools.pythagoras(point_loc, next_trip_pt)

            if last_trip_dist <= next_trip_dist:
                point['timestamp'] = last_trip_end['timestamp']
                labels.single_point(point, cleaned_trips[num - offset - 1], 'append')
                cleaned_trips[num - offset - 1].append(point)
            else:
                point['timestamp'] = next_trip_start['timestamp']
                labels.single_point(point, test_trips[num + 1], 'insert')
                test_trips[num + 1].insert(0, point)
            offset += 1
        else:
            cleaned_trips[num - offset] = trip
    return cleaned_trips
def get_distance(pt1, pt2):
    pt1_easting, pt1_northing, _, _ = utm.from_latlon(pt1['latitude'],
                                                      pt1['longitude'])
    pt2_easting, pt2_northing, _, _ = utm.from_latlon(pt2['latitude'],
                                                      pt2['longitude'])
    return pythagoras((pt1_easting, pt1_northing), (pt2_easting, pt2_northing))