Python read_gtfsの例、gtfstk.read_gtfs Pythonの例

コード例 #1

0

ファイルを表示

#!/usr/bin/env python
# coding: utf-8

import gtfstk as gt

path = 'gtfs.zip'
feed = gt.read_gtfs(path, dist_units='km')

for agency in feed.agency.agency_id:
    route_ids_for_agency = list(
        feed.routes[feed.routes['agency_id'] == agency]['route_id'])
    little_feed = feed.restrict_to_routes(route_ids=route_ids_for_agency)
    gt.write_gtfs(little_feed, 'output/{}.zip'.format(agency))

コード例 #2

0

ファイルを表示

def read_gtfs(
    path,
    dt,  #date to validate feed upon, it can be like "Thrusday" or "20181201"
    dist_units=None):
    """
    Create a Feed instance from the given path and given distance units.
    The path should be a directory containing GTFS text files or a
    zip file that unzips as a collection of GTFS text files
    (and not as a directory containing GTFS text files).
    The distance units given must lie in :const:`constants.dist_units`
    Notes
    -----
    - Ignore non-GTFS files
    - Automatically strip whitespace from the column names in GTFS files
    - This is based on gtfstk library
    """
    gt_feed = gt.read_gtfs(path, dist_units)

    #Validate feed for an specific day (eigther a date or the day of week)========
    if not gt.valid_date(dt):
        dt = gt_feed.get_first_week()[parse(dt).weekday()]

    gt_feed = hp.validate_feed(gt_feed, dt)
    feed_dict = hp.feed_obj_to_dict(gt_feed)

    feed_dict['valid_date'] = dt

    #calculate PT segments========================================================

    PT_links_df = feed_dict['stop_times'].copy()
    #making sure trips are sorted by the trip sequence
    PT_links_df.sort_values(by=['trip_id', 'stop_sequence'], inplace=True)

    #converting the stop_times into pt links
    PT_links_df.rename(columns={
        'arrival_time': 'o_time',
        'stop_id': 'o_stop',
        'stop_sequence': 'o_sequence'
    },
                       inplace=True)

    PT_links_df[['d_time', 'd_stop', 'd_sequence'
                 ]] = PT_links_df[['o_time', 'o_stop', 'o_sequence']].shift(-1)

    PT_links_df = PT_links_df[
        PT_links_df['o_sequence'] < PT_links_df['d_sequence']].copy(
        )  #removes the last stops

    #Convert the time into seconds for easier time calculatins
    PT_links_df['o_time_sec'] = PT_links_df['o_time'].apply(hp.text2sec)
    PT_links_df['d_time_sec'] = PT_links_df['d_time'].apply(hp.text2sec)
    PT_links_df[
        'duration'] = PT_links_df['d_time_sec'] - PT_links_df['o_time_sec']

    #Add route_id using the trips table
    PT_links_df = PT_links_df.merge(feed_dict['trips'])

    #Add route type in text format to the link dataset
    PT_links_df = PT_links_df.merge(feed_dict['routes'])

    route_type = {
        '0': 'Tram, Streetcar, Light rail',
        '1': 'Subway, Metro',
        '2': 'Rail',
        '3': 'Bus',
        '4': 'Ferry',
        '5': 'Cable car',
        '6': 'Gondola, Suspended cable car',
        '7': 'Funicular'
    }

    PT_links_df['route_type'] = PT_links_df['route_type'].astype(str)
    PT_links_df['route_type'].replace(route_type, inplace=True)

    #add stop sequence to PT_links_df
    def stop_seq_for_trips(stop_times_df):
        """
        The objective is to create a dataframe of stop sequence for each trip
        The output format will be:
          first field is: trip_ids
          seocond field is: stop_ids separeated by comma in order of their sequence
        """
        def get_first_trip(group):
            stop_seq = ";".join(group['stop_id'].tolist()) + ";"
            trip_id = group['trip_id'].iat[0]
            trip_dict = {'stop_seq': stop_seq, 'trip_id': trip_id}
            return pd.DataFrame(trip_dict, index=[0])

        stop_seq_df = stop_times_df.groupby('trip_id').apply(
            get_first_trip).reset_index(drop=True)
        return stop_seq_df

    stop_seq_df = stop_seq_for_trips(feed_dict['stop_times'])
    PT_links_df = PT_links_df.merge(stop_seq_df)

    def remaining_stops(row):
        sid = row['o_stop'] + ";"
        seq = row['stop_seq']
        return seq.split(sid, 1)[-1]

    PT_links_df['stop_seq'] = PT_links_df.apply(remaining_stops, axis=1)

    # add stops lat and lon
    PT_links_df = PT_links_df.merge(
        feed_dict['stops'][['stop_id', 'stop_lat', 'stop_lon']],
        left_on='o_stop',
        right_on='stop_id',
        how='left').drop('stop_id', axis=1)
    PT_links_df.rename(columns={
        'stop_lat': 'o_stop_lat',
        'stop_lon': 'o_stop_lon'
    },
                       inplace=True)
    PT_links_df = PT_links_df.merge(
        feed_dict['stops'][['stop_id', 'stop_lat', 'stop_lon']],
        left_on='d_stop',
        right_on='stop_id',
        how='left').drop('stop_id', axis=1)
    PT_links_df.rename(columns={
        'stop_lat': 'd_stop_lat',
        'stop_lon': 'd_stop_lon'
    },
                       inplace=True)

    feed_dict['feed_segments'] = PT_links_df

    for key in ['_trips_i', '_calendar_i', '_calendar_dates_g']:
        if key in feed_dict:
            del feed_dict[key]

    return Feed(**feed_dict)

コード例 #3

0

ファイルを表示

ファイル: context.py プロジェクト: cjer/gtfstk

slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"),
                          reason="need --runslow option to run")

# Check if GeoPandas is installed
loader = importlib.find_loader('geopandas')
if loader is None:
    HAS_GEOPANDAS = False
else:
    HAS_GEOPANDAS = True

# Check if Folium is installed
loader = importlib.find_loader('folium')
if loader is None:
    HAS_FOLIUM = False
else:
    HAS_FOLIUM = True

# Load/create test feeds
DATA_DIR = Path('data')
sample = gtfstk.read_gtfs(DATA_DIR / 'sample_gtfs.zip', dist_units='km')
cairns = gtfstk.read_gtfs(DATA_DIR / 'cairns_gtfs.zip', dist_units='km')
cairns_shapeless = cairns.copy()
cairns_shapeless.shapes = None
t = cairns_shapeless.trips
t['shape_id'] = np.nan
cairns_shapeless.trips = t
week = cairns.get_first_week()
cairns_dates = [week[0], week[1]]
cairns_trip_stats = pd.read_csv(DATA_DIR / 'cairns_trip_stats.csv',
                                dtype=gtfstk.DTYPE)

コード例 #4

0

ファイルを表示

def url2gtfs(url):
    r = requests.get(url)
    with tempfile.NamedTemporaryFile(delete=False) as f:
      f.write(r._content)
      return gtfstk.read_gtfs(f.name, dist_units='mi')

コード例 #5

0

ファイルを表示

ファイル: test_main.py プロジェクト: rusty-s/clean_auckland_gtfs

import gtfstk as gt

from .context import clean_auckland_gtfs, DATA_DIR
from clean_auckland_gtfs import *

feed = gt.read_gtfs(DATA_DIR / 'raw_auckland_gtfs_20161122.zip',
                    dist_units='km')


def test_drop_school_routes():
    n = feed.routes.shape[0]
    feed1 = drop_school_routes(feed)

    # Should drop some routes
    k = feed1.routes.shape[0]
    assert k < n


def test_clean():
    n = feed.routes.shape[0]
    feed1 = clean(feed)

    # Should drop some routes
    k = feed1.routes.shape[0]
    assert k < n

    # Route short names should be unique
    j = feed1.routes.route_short_name.nunique()
    assert j == k

コード例 #6

0

ファイルを表示

ファイル: context.py プロジェクト: paulswartz/gtfstk

    reason="need --runslow option to run",
)

# Check if GeoPandas is installed
loader = importlib.find_loader("geopandas")
if loader is None:
    HAS_GEOPANDAS = False
else:
    HAS_GEOPANDAS = True

# Check if Folium is installed
loader = importlib.find_loader("folium")
if loader is None:
    HAS_FOLIUM = False
else:
    HAS_FOLIUM = True

# Load/create test feeds
DATA_DIR = Path("data")
sample = gtfstk.read_gtfs(DATA_DIR / "sample_gtfs.zip", dist_units="km")
cairns = gtfstk.read_gtfs(DATA_DIR / "cairns_gtfs.zip", dist_units="km")
cairns_shapeless = cairns.copy()
cairns_shapeless.shapes = None
t = cairns_shapeless.trips
t["shape_id"] = np.nan
cairns_shapeless.trips = t
week = cairns.get_first_week()
cairns_dates = [week[0], week[1]]
cairns_trip_stats = pd.read_csv(DATA_DIR / "cairns_trip_stats.csv",
                                dtype=gtfstk.DTYPE)

コード例 #7

0

ファイルを表示

ファイル: gtfs_geo.py プロジェクト: nlehuby/gtfs_geo

def export_gtfs_as_geo(input_gtfs_file, output_file_name):
    working_directory = tempfile.TemporaryDirectory()
    feed = gtfstk.read_gtfs(input_gtfs_file, dist_units='km')

    feed_w_shapes = gtfstk.miscellany.create_shapes(feed)

    # keep only a relevant subset
    feed_w_shapes_selected = feed_w_shapes.trips[[
        'route_id', 'shape_id', 'trip_id'
    ]]
    feed_w_shapes_dedup = feed_w_shapes_selected.drop_duplicates(
        subset=['route_id', 'shape_id'])

    trip_stats = feed_w_shapes.compute_trip_stats()

    trips_full = feed_w_shapes_dedup.merge(trip_stats,
                                           left_on='trip_id',
                                           right_on='trip_id',
                                           suffixes=('', '_'))
    trips_full_selected = trips_full[[
        'route_id', 'shape_id', 'trip_id', 'start_stop_id', 'end_stop_id',
        'num_stops', 'is_loop'
    ]]

    # id to human readeable info
    trips_full_s1 = trips_full_selected.merge(feed.stops,
                                              left_on='start_stop_id',
                                              right_on='stop_id',
                                              suffixes=('', '_'))
    trips_full_s1 = trips_full_s1[[
        'route_id', 'shape_id', 'trip_id', 'stop_name', 'end_stop_id',
        'num_stops', 'is_loop'
    ]]
    trips_full_s1.rename(columns={"stop_name": "origin_stop_name"},
                         inplace=True)

    trips_full_s2 = trips_full_s1.merge(feed.stops,
                                        left_on='end_stop_id',
                                        right_on='stop_id',
                                        suffixes=('', '_'))
    trips_full_s2 = trips_full_s2[[
        'route_id', 'shape_id', 'trip_id', 'origin_stop_name', 'stop_name',
        'num_stops', 'is_loop'
    ]]
    trips_full_s2.rename(columns={"stop_name": "destination_stop_name"},
                         inplace=True)

    trips_full_w_routes = trips_full_s2.merge(feed.routes, on='route_id')

    trips_full_w_agency = trips_full_w_routes.merge(feed.agency,
                                                    on='agency_id')

    trips_full_w_agency['route_mode'] = trips_full_w_agency[
        'route_type'].apply(lambda x: route_type_to_mode(x))
    trips_full_w_agency['trip_name'] = trips_full_w_agency[
        'route_id'] + "_" + trips_full_w_agency['trip_id']
    trips_full_w_agency['file_name'] = trips_full_w_agency['trip_name'].apply(
        lambda x: x.replace(' ', 'u').replace(':', 'u').replace('/', 'u'))

    # write outputs
    for id_, elem in trips_full_w_agency.iterrows():
        with open(
                os.path.join(working_directory.name,
                             "{}.geojson".format(elem["file_name"])),
                'w') as fp:
            as_geojson = feed_w_shapes.trip_to_geojson(elem["trip_id"],
                                                       include_stops=True)
            as_geojson['features'][0]['properties'] = json.loads(
                elem.to_json())
            #put the stops in the right order
            stop_id_s_in_order = list(
                feed_w_shapes.stop_times[feed_w_shapes.stop_times["trip_id"] ==
                                         elem["trip_id"]].sort_values(
                                             by=['stop_sequence'])['stop_id'])
            new_FeatureCollection = []
            new_FeatureCollection.append(as_geojson['features'][0])
            for stop_id in stop_id_s_in_order:
                feature = [
                    elem for elem in as_geojson['features']
                    if elem['properties'].get('stop_id') == stop_id
                ]
                new_FeatureCollection.append(feature[0])
            as_geojson['features'] = new_FeatureCollection
            json.dump(as_geojson, fp)

    trips_full_w_agency = trips_full_w_agency[[
        'file_name', 'origin_stop_name', 'destination_stop_name', 'num_stops',
        'is_loop', 'route_short_name', 'route_long_name', 'route_mode',
        'route_color', 'agency_name', 'agency_url'
    ]]
    trips_full_w_agency.to_csv(
        os.path.join(working_directory.name, "trips.csv"))

    feed_w_shapes.stops.rename(columns={"stop_lat": "latitude"}, inplace=True)
    feed_w_shapes.stops.rename(columns={"stop_lon": "longitude"}, inplace=True)

    if "location_type" in feed_w_shapes.stops.columns:
        feed_w_shapes.stops.fillna({'location_type': 0}, inplace=True)
        feed_w_shapes.stops['stop_type'] = feed_w_shapes.stops[
            'location_type'].apply(lambda x: location_type_to_stop_type(x))
    else:
        feed_w_shapes.stops['stop_type'] = "stops"

    stop_types = feed_w_shapes.stops['stop_type'].unique()
    for stop_type_name in stop_types:
        stops = feed_w_shapes.stops[feed_w_shapes.stops['stop_type'] ==
                                    stop_type_name]
        stops.to_csv(os.path.join(working_directory.name,
                                  "{}.csv".format(stop_type_name)),
                     float_format='%.6f')

    shutil.make_archive(
        output_file_name.split('.')[0], 'zip', working_directory.name)
    working_directory.cleanup()