Esempio n. 1
0
def test_config_must_be_dag():
    config = ptg.config.default_config()

    assert config.has_edge('routes.txt', 'trips.txt')

    # Make a cycle
    config.add_edge('trips.txt', 'routes.txt')

    path = zip_file('amazon-2017-08-06')
    with pytest.raises(AssertionError, message='Config must be a DAG'):
        ptg.feed(path, config=config)
Esempio n. 2
0
def test_filtered_columns(path):
    service_ids_by_date = ptg.read_service_ids_by_date(path)
    service_ids = list(service_ids_by_date.values())[0]

    feed_full = ptg.feed(path)
    feed_view = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}})
    feed_null = ptg.feed(path,
                         view={'trips.txt': {
                             'service_id': 'never-match'
                         }})

    assert set(feed_full.trips.columns) == set(feed_view.trips.columns)
    assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
Esempio n. 3
0
def test_add_shape_dist_traveled(zip_file, scenario_results, scenario_date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_file)
    service_ids = service_ids_by_date[scenario_date]

    feed = ptg.feed(zip_file,
                    view={
                        'trips.txt': {
                            'service_id': service_ids,
                        },
                    })

    stop_times_df = Trip.add_shape_dist_traveled(feed.stop_times, feed.stops)
    stop_times_df.sort_values(
        [Trip.TRIPS_COLUMN_TRIP_ID, Trip.STOPTIMES_COLUMN_STOP_SEQUENCE],
        inplace=True)

    for trip_id, expected_array in scenario_results.iteritems():
        print stop_times_df[stop_times_df[
            Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][
                Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values.tolist()
        np.testing.assert_allclose(
            stop_times_df[stop_times_df[Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][
                Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values,
            expected_array,
            rtol=0,
            atol=0.00001)
Esempio n. 4
0
def get_representative_feed(file_loc: str, day_type: str = 'busiest'):
    # Extract service ids and then trip counts by those dates
    service_ids_by_date = ptg.read_service_ids_by_date(file_loc)
    trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc)

    # Make sure we have some valid values returned in trips
    if not len(trip_counts_by_date.items()):
        # Otherwise, error out
        raise InvalidGTFS('No valid trip counts by date '
                          'were identified in GTFS.')

    # At this point, different methods can be implemented to help select how
    # to pick which date/schedule id to use
    if day_type == 'busiest':
        # Choose the service id that has the most trips associated with it
        (selected_date, trip_count) = max(trip_counts_by_date.items(),
                                          key=lambda p: p[1])
    else:
        raise NotImplementedError('Unsupported day type string supplied.')

    log('Selected_date: {}'.format(selected_date))
    log('Number of trips on that date: {}'.format(trip_count))

    all_service_ids = '\n\t'.join(service_ids_by_date[selected_date])
    log('\nAll related service IDs: \n\t{}'.format(all_service_ids))

    sub = service_ids_by_date[selected_date]
    feed_query = {'trips.txt': {'service_id': sub}}
    return ptg.feed(file_loc, view=feed_query)
Esempio n. 5
0
def test_extract_routes(path):
    fd = ptg.feed(path)

    agencies = fd.agency
    assert len(agencies) == 3

    routes = fd.routes
    assert len(routes) == 14

    route_ids = [routes.iloc[0].route_id]
    agency_ids = set(fd.routes[fd.routes.route_id.isin(route_ids)].agency_id)
    trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id)
    stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id)

    assert len(agency_ids)
    assert len(trip_ids)
    assert len(stop_ids)

    try:
        tmpdir = tempfile.mkdtemp()
        outfile = os.path.join(tmpdir, 'test.zip')

        result = ptg.extract_routes(path, outfile, route_ids)
        assert result == outfile

        new_fd = ptg.feed(outfile)
        assert list(new_fd.routes.route_id) == route_ids
        assert set(new_fd.agency.agency_id) == agency_ids
        assert set(new_fd.trips.trip_id) == trip_ids
        assert set(new_fd.stop_times.trip_id) == trip_ids
        assert set(new_fd.stops.stop_id) == stop_ids

        nodes = []
        for node in fd.config.nodes():
            df = fd.get(node)
            if not df.empty:
                nodes.append(node)

        assert len(nodes)

        for node in nodes:
            original_df = fd.get(node)
            new_df = new_fd.get(node)
            assert set(original_df.columns) == set(new_df.columns)

    finally:
        shutil.rmtree(tmpdir)
Esempio n. 6
0
def test_read_file(path, dates, shapes):
    service_ids_by_date = ptg.read_service_ids_by_date(path)

    service_ids = {
        service_id
        for date in dates if date in service_ids_by_date
        for service_id in service_ids_by_date[date]
    }

    if service_ids:
        feed = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}})
    else:
        feed = ptg.feed(path)

    for filename, shape in shapes.items():
        assert feed.get(filename).shape == shape, \
            '{}/{} dataframe shape was incorrect'.format(path, filename)
Esempio n. 7
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(zip_path)
    service_ids = service_ids_by_date[date]

    feed = ptg.feed(zip_path, view={
        'trips.txt': {
            'service_id': service_ids,
        },
    })
    return feed
Esempio n. 8
0
def gtfs_feed(zip_file, network_date):
    from fasttrips.Assignment import Assignment
    Assignment.NETWORK_BUILD_DATE = network_date
    service_ids_by_date = ptg.read_service_ids_by_date(zip_file)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.feed(os.path.join(zip_file),
                    config=Util.get_fast_trips_config(),
                    view={
                        'trips.txt': {
                            'service_id': service_ids
                        },
                    })
    yield feed
Esempio n. 9
0
def get_gtfs_feed(network, network_date):
    from fasttrips.Assignment import Assignment
    from fasttrips.Util import Util

    Assignment.NETWORK_BUILD_DATE = network_date

    service_ids_by_date = ptg.read_service_ids_by_date(network)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.feed(network, config=Util.get_fast_trips_config(), view={
        'trips.txt': {
            'service_id': service_ids
        },
    })
    return feed
Esempio n. 10
0
    def read_input_files(self):
        """
        Reads in the input network and demand files and initializes the relevant data structures.
        """
        self.performance.record_step_start(0, 0, 0, "read_input_files")

        # Read the gtfs files first
        FastTripsLogger.info("Reading GTFS schedule")

        service_ids_by_date = ptg.read_service_ids_by_date(
            Assignment.INPUT_NETWORK_ARCHIVE)
        service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE]
        gtfs_feed = ptg.feed(os.path.join(Assignment.INPUT_NETWORK_ARCHIVE),
                             config=Util.get_fast_trips_config(),
                             view={
                                 'trips.txt': {
                                     'service_id': service_ids
                                 },
                             })
        # Read Stops (gtfs-required)
        self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE)

        # Read routes, agencies, fares
        self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE,
                            Assignment.OUTPUT_DIR, gtfs_feed,
                            Assignment.NETWORK_BUILD_DATE, self.stops)

        # Read Transfers
        self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE,
                                  Assignment.OUTPUT_DIR, gtfs_feed)

        # Read trips, vehicles, calendar and stoptimes
        self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE,
                          Assignment.OUTPUT_DIR, gtfs_feed,
                          Assignment.NETWORK_BUILD_DATE, self.stops,
                          self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID)

        # read the TAZs into a TAZ instance
        self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed,
                        Assignment.NETWORK_BUILD_DATE, self.stops,
                        self.transfers, self.routes)

        # Read the demand int passenger_id -> passenger instance
        self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR,
                                    Assignment.OUTPUT_DIR,
                                    Assignment.NETWORK_BUILD_DATE, self.stops,
                                    self.routes,
                                    Assignment.CAPACITY_CONSTRAINT)
Esempio n. 11
0
def get_partridge_feed_by_date(zip_path, date):
    service_ids_by_date = ptg.read_service_ids_by_date(
        zip_path)  # , encoding='utf-8')
    service_ids = service_ids_by_date[date]

    feed = ptg.feed(
        zip_path,
        view={
            'trips.txt': {
                'service_id': service_ids,
            },
        },
        # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED
    )
    return feed
Esempio n. 12
0
def gtfs_feed(network):
    from fasttrips.Assignment import Assignment

    Assignment.NETWORK_BUILD_DATE = network[2]
    network_dir = os.path.join(HOME_DIR, network[0], "networks", network[1])

    service_ids_by_date = ptg.read_service_ids_by_date(network_dir)
    service_ids = service_ids_by_date[network_date]
    feed = ptg.feed(network_dir,
                    config=Util.get_fast_trips_config(),
                    view={
                        'trips.txt': {
                            'service_id': service_ids
                        },
                    })
    yield feed
Esempio n. 13
0
def prepare_partridge_feed(date: datetime.date,
                           gtfs_file_full_path: str,
                           filtered_feeds_directory=configuration.files.full_paths.filtered_feeds):

    if configuration.write_filtered_feed:
        filtered_gtfs_path = join(filtered_feeds_directory, basename(gtfs_file_full_path))

        logging.info(f'Filtering gtfs feed for {date} from {gtfs_file_full_path} into {filtered_gtfs_path}')
        write_filtered_feed_by_date(gtfs_file_full_path, date, filtered_gtfs_path)

        logging.info(f'Reading filtered feed for file from path {filtered_gtfs_path}')
        feed = ptg.feed(filtered_gtfs_path)
    else:
        logging.info(f'Creating daily partridge feed for {date} from {gtfs_file_full_path}')
        feed = get_partridge_feed_by_date(gtfs_file_full_path, date)

    logging.debug(f'Finished creating daily partridge feed for {date} from {gtfs_file_full_path}')
    return feed
    trn_stop_labels["TM2 Node"] = trn_stop_labels["TM2 Node"].astype(int)

    for operator in ["Caltrain", "San Francisco MUNI","Vallejo Baylink Ferry", "Blue and Gold", "Amtrak Capitol Cor. & Reg. Svc", "BART",
	                 "ACE", "Golden Gate Ferry", "Alameda Harbor Bay Ferry","Alameda/Oakland Ferry","Vallejo Baylink Ferry", "Santa Clara VTA", "Blue and Gold"]:
        Wrangler.WranglerLogger.info("Processing operator %s" % operator)

        # get the stop labels for this operator
        operator_stop_labels = trn_stop_labels.loc[ trn_stop_labels["Operator"] == operator ]
        Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head())
        operator_stop_label_dict = operator_stop_labels.set_index(["TM2 Node"]).to_dict(orient="index")

        # read GTFS
        fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator])
        service_ids_by_date = partridge.read_service_ids_by_date(fullpath)
        service_ids = service_ids_by_date[datetime.date(2015,03,11)]
        feed = partridge.feed(fullpath, view={'trips.txt':{'service_id':service_ids}})

        # lets see the stop_times with the stop names
        gtfs_stop_times = pandas.merge(left=feed.stop_times,
                                       right=feed.stops[["stop_id","stop_name"]]).sort_values(by=["trip_id","stop_sequence"])
        # and the route_id and direction_id
        gtfs_stop_times = pandas.merge(left=gtfs_stop_times,
                                       right=feed.trips[["trip_id","route_id","direction_id"]], how="left")
        # and route_long_name and route_type
        gtfs_stop_times = pandas.merge(left=gtfs_stop_times,
                                       right=feed.routes[["route_id","route_long_name","route_type"]], how="left")
        # => filter out buses since the travel time comes from traffic
        gtfs_stop_times = gtfs_stop_times.loc[gtfs_stop_times.route_type != 3,:]

        # join TM2 node number from both the gtfs cols in the mapping
        for gtfs_col in ["GTFS stop_id NB/inbound","GTFS stop_id SB/outbound"]:
Esempio n. 15
0
def get_partridge_feed_by_date(zip_path: str, date: datetime.date):
    return ptg.feed(zip_path, view=get_partridge_filter_for_date(zip_path, date))
Esempio n. 16
0
def test_missing_zip():
    with pytest.raises(AssertionError, message='File or path not found'):
        ptg.feed(fixture('missing.zip'))
Esempio n. 17
0
def after (data, datadir):
    global join, readZippedShapefile, fastOverlay, ptg, Point, gp, pd, FOOT_TO_METER, ACRE_TO_HECTARE

    print('reprojecting data')
    data = data.to_crs(epsg=26942)

    # M-1, M-1(S) and M-2 zones conditionally permit multifamily housing iff it is in the central city or within 1/4 mile
    # of a light rail stop
    print('loading central city')
     # this file was created by hand based on the description in the code
    centralCity = readZippedShapefile(join(datadir, 'sacramento_central_city.zip')).to_crs(epsg=26942)

    print('loading light rail stations from GTFS')
    feed = ptg.feed(join(datadir, 'sacramento_gtfs_20180213.zip'))

    lightRailRoutes = feed.routes.route_id[feed.routes.route_type == 0]
    lightRailTrips = feed.trips.trip_id[feed.trips.route_id.isin(lightRailRoutes)]
    feed.stop_times.set_index(['trip_id', 'stop_sequence'], inplace=True)
    lightRailStopIds = feed.stop_times.loc[lightRailTrips, 'stop_id'].unique()
    feed.stops.set_index('stop_id', inplace=True)
    lightRailStops = feed.stops.loc[lightRailStopIds].copy()

    print(f'found {len(lightRailStops)} light rail stops')

    # convert to geodataframe
    lightRailStops['geometry'] = lightRailStops.apply(lambda stop: Point(stop.stop_lon, stop.stop_lat), 1)
    # GTFS is defined to be WGS 84
    lightRailStops = gp.GeoDataFrame(lightRailStops, geometry='geometry', crs={'init': 'epsg:4326'})
    lightRailStops = lightRailStops.to_crs(epsg=26942)

    # save memory
    del feed

    print('buffering light rail stops')
    lightRailStops['geometry'] = lightRailStops.buffer(5280 / 4 * FOOT_TO_METER, resolution=32)

    # For M and RMX-SPD-R St zones, we cut these zones out of the whole file, overlay them with the affected area, and
    # then merge them back in.
    print('adding multifamily as conditional use to industrial zones in central city and near light rail')
    industrialZoneLocs = data.zone.apply(lambda zone: zone.startswith('M-1') or zone.startswith('M-1(S)' or zone.startswith('M-2')))
    industrialZones = data[industrialZoneLocs]
    affectedAreas = lightRailStops.loc[:,['geometry']].copy()
    # and add the central city
    affectedAreas = affectedAreas.append(centralCity)
    affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped
    # Do an overlay so that we split large industrial zones at the boundaries of the affected area
    splitIndustrialAreas = gp.overlay(industrialZones, affectedAreas, how='union')

    # and set the multiFamily flag
    splitIndustrialAreas['multiFamily'] = splitIndustrialAreas['affected'].apply(lambda x: 'conditional' if x == 42 else 'no')

    print('setting density limits in RMX-SPD-R Street Corridor')
    rmxSpdRstLocs = data.zone == 'RMX-SPD-R Street Corridor'
    rmxSpdRst = data[rmxSpdRstLocs]

    affectedAreas = lightRailStops.loc[:,['geometry']].copy()
    affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped
    splitRmxSpd = gp.overlay(rmxSpdRst, affectedAreas, how='union')
    splitRmxSpd['loMaxUnitsPerHectare'] = splitRmxSpd['hiMaxUnitsPerHectare'] =\
        splitRmxSpd.affected.apply(lambda x: 100 / ACRE_TO_HECTARE if x == 42 else 60 / ACRE_TO_HECTARE)

    # put it all back together into a single dataframe
    recombined = gp.GeoDataFrame(
        pd.concat([
            data[~(rmxSpdRstLocs | industrialZoneLocs)],
            splitRmxSpd,
            splitIndustrialAreas
        ]),
        geometry='geometry' ,
        crs={'init': 'epsg:26942'}
        )

    # drop the 'affected' column we were using as a flag
    del recombined['affected']

    return recombined
Esempio n. 18
0
def after (data, datadir):
    global join, readZippedShapefile, fastOverlay, ptg, Point, gp, pd, FOOT_TO_METER, ACRE_TO_HECTARE, np

    print('reprojecting data')
    data = data.to_crs(epsg=26942)

    print('adding parking requirements \U0001f697')
    # Parking Districts required a CA Public Records Act request:
    # https://sacramentoca.mycusthelp.com/WEBAPP/_rs/(S(2rztm4xsj04qo445twgqihlj))/RequestArchiveDetails.aspx?rid=8033&view=1
    parkingDistricts = readZippedShapefile(join(datadir, 'sacramento_parking.zip')).to_crs(epsg=26942)\
        .rename(columns={'SECTION': 'parkingDist'})

    data = fastOverlay(data, parkingDistricts)
    data['parkingDist'] = data.parkingDist.astype('category')

    # http://www.qcode.us/codes/sacramento/view.php?topic=17-vi-17_608-17_608_030&frames=on

    # Most places have no max parking requirement, overwritten in the CBD below
    data['loMaxParkingPerUnit'] = np.inf
    data['hiMaxParkingPerUnit'] = np.inf

    data.loc[data.parkingDist == 'Central Business District', 'loMinParkingPerUnit'] = 0
    data.loc[data.parkingDist == 'Central Business District', 'hiMinParkingPerUnit'] = 0
    data.loc[(data.parkingDist == 'Central Business District') & (data.multiFamily == 'yes'), 'loMaxParkingPerUnit'] = 1
    data.loc[(data.parkingDist == 'Central Business District') & (data.multiFamily == 'yes'), 'hiMaxParkingPerUnit'] = 1

    data.loc[(data.parkingDist == 'Urban') & (data.multiFamily == 'yes'), 'loMinParkingPerUnit'] = 0.5
    data.loc[(data.parkingDist == 'Urban') & (data.multiFamily == 'yes'), 'hiMinParkingPerUnit'] = 0.5

    # != yes: no or conditional
    # NB not encoding exception for lots under 3200 sq feet
    data.loc[(data.parkingDist == 'Urban') & (data.multiFamily != 'yes'), 'loMinParkingPerUnit'] = 1
    data.loc[(data.parkingDist == 'Urban') & (data.multiFamily != 'yes'), 'hiMinParkingPerUnit'] = 1

    data.loc[data.parkingDist == 'Traditional', 'loMinParkingPerUnit'] = 1
    data.loc[data.parkingDist == 'Traditional', 'hiMinParkingPerUnit'] = 1

    data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily == 'yes'), 'loMinParkingPerUnit'] = 1.5
    data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily == 'yes'), 'hiMinParkingPerUnit'] = 1.5
    data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily != 'yes'), 'loMinParkingPerUnit'] = 1
    data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily != 'yes'), 'hiMinParkingPerUnit'] = 1

    # M-1, M-1(S) and M-2 zones conditionally permit multifamily housing iff it is in the central city or within 1/4 mile
    # of a light rail stop
    print('loading central city')
     # this file was created by hand based on the description in the code
    centralCity = readZippedShapefile(join(datadir, 'sacramento_central_city.zip')).to_crs(epsg=26942)

    print('loading light rail stations from GTFS')
    feed = ptg.feed(join(datadir, 'sacramento_gtfs_20180213.zip'))

    lightRailRoutes = feed.routes.route_id[feed.routes.route_type == 0]
    lightRailTrips = feed.trips.trip_id[feed.trips.route_id.isin(lightRailRoutes)]
    feed.stop_times.set_index(['trip_id', 'stop_sequence'], inplace=True)
    lightRailStopIds = feed.stop_times.loc[lightRailTrips, 'stop_id'].unique()
    feed.stops.set_index('stop_id', inplace=True)
    lightRailStops = feed.stops.loc[lightRailStopIds].copy()

    print(f'found {len(lightRailStops)} light rail stops')

    # convert to geodataframe
    lightRailStops['geometry'] = lightRailStops.apply(lambda stop: Point(stop.stop_lon, stop.stop_lat), 1)
    # GTFS is defined to be WGS 84
    lightRailStops = gp.GeoDataFrame(lightRailStops, geometry='geometry', crs={'init': 'epsg:4326'})
    lightRailStops = lightRailStops.to_crs(epsg=26942)

    # save memory
    del feed

    print('buffering light rail stops')
    lightRailStops['geometry'] = lightRailStops.buffer(5280 / 4 * FOOT_TO_METER, resolution=32)

    # For M and RMX-SPD-R St zones, we cut these zones out of the whole file, overlay them with the affected area, and
    # then merge them back in.
    print('adding multifamily as conditional use to industrial zones near light rail')
    industrialZoneLocs = data.zone.apply(lambda zone: zone.startswith('M-1') or zone.startswith('M-1(S)' or zone.startswith('M-2')))
    industrialZones = data[industrialZoneLocs]
    affectedAreas = lightRailStops.loc[:,['geometry']].copy()
    affectedAreas['lightRail'] = True # add a flag column so we know which resulting geometries overlapped
    # and add the central city
    # Do an overlay so that we split large industrial zones at the boundaries of the affected area
    splitIndustrialAreas = gp.overlay(industrialZones, affectedAreas, how='identity')
    centralCity['centralCity'] = True
    splitIndustrialAreas = gp.overlay(splitIndustrialAreas, centralCity, how='identity')

    splitIndustrialAreas['lightRail'] = splitIndustrialAreas.lightRail.fillna(False)
    splitIndustrialAreas['centralCity'] = splitIndustrialAreas.centralCity.fillna(False)

    # and set the multiFamily flag. Conditional at light rail, yes in central city
    # http://qcode.us/codes/sacramento/view.php?topic=17-ii-17_220-i-17_220_110&frames=on
    splitIndustrialAreas['multiFamily'] = splitIndustrialAreas\
        .apply(lambda x: 'yes' if x.centralCity else 'conditional' if x.lightRail else 'no', 'columns')

    print('setting density limits in RMX-SPD-R Street Corridor')
    rmxSpdRstLocs = data.zone == 'RMX-SPD-R Street Corridor'
    rmxSpdRst = data[rmxSpdRstLocs]

    affectedAreas = lightRailStops.loc[:,['geometry']].copy()
    affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped
    splitRmxSpd = gp.overlay(rmxSpdRst, affectedAreas, how='identity')
    splitRmxSpd['loMaxUnitsPerHectare'] = splitRmxSpd['hiMaxUnitsPerHectare'] =\
        splitRmxSpd.affected.apply(lambda x: 100 / ACRE_TO_HECTARE if x == 42 else 60 / ACRE_TO_HECTARE)

    # put it all back together into a single dataframe
    recombined = gp.GeoDataFrame(
        pd.concat([
            data[~(rmxSpdRstLocs | industrialZoneLocs)],
            splitRmxSpd,
            splitIndustrialAreas
        ]),
        geometry='geometry' ,
        crs={'init': 'epsg:26942'}
        )

    # drop the 'affected' column we were using as a flag
    del recombined['affected']

    return recombined
Esempio n. 19
0
        Wrangler.WranglerLogger.info("Processing operator %s" % operator)

        # get the stop labels for this operator
        operator_stop_labels = trn_stop_labels.loc[trn_stop_labels["Operator"]
                                                   == operator]
        Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" %
                                      operator_stop_labels.head())
        operator_stop_label_dict = operator_stop_labels.set_index(
            ["TM2 Node"]).to_dict(orient="index")

        # read GTFS
        fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator])
        service_ids_by_date = partridge.read_service_ids_by_date(fullpath)
        service_ids = service_ids_by_date[datetime.date(2015, 03, 11)]
        feed = partridge.feed(fullpath,
                              view={'trips.txt': {
                                  'service_id': service_ids
                              }})

        # lets see the stop_times with the stop names
        gtfs_stop_times = pandas.merge(
            left=feed.stop_times,
            right=feed.stops[["stop_id", "stop_name"
                              ]]).sort_values(by=["trip_id", "stop_sequence"])
        # and the route_id and direction_id
        gtfs_stop_times = pandas.merge(
            left=gtfs_stop_times,
            right=feed.trips[["trip_id", "route_id", "direction_id"]],
            how="left")
        # and route_long_name and route_type
        gtfs_stop_times = pandas.merge(
            left=gtfs_stop_times,