Esempio n. 1
0
def find_dbfnames():
    dbfnames = []
    for sdir in settings.DB_DIRS:
        # If is a regular file, just use this directly.
        if os.path.isfile(sdir):
            dbfnames.append(sdir)
            continue
        # Directories: all sub-directory sqlite files.
        for cur_dir, subdirs, files in os.walk(sdir):
            for ending in settings.DB_ENDINGS:
                dbfnames.extend(glob(os.path.join(cur_dir, ending)))
    dbfnames = list(set(dbfnames))  # remove any duplicates
    # Remove common prefix
    if len(dbfnames) == 1:
        commonprefix = ""
    else:
        commonprefix = os.path.commonprefix(dbfnames)
        dbfnames = [fname[len(commonprefix):] for fname in dbfnames]
    # exclude tests:
    dbfnames = sorted([e for e in dbfnames if "proc/test/" not in e])
    valid_dbfnames = []
    timezone_dict = {}
    for dbf in dbfnames:
        try:
            timezone_dict[dbf] = gtfs.GTFS(commonprefix +
                                           dbf).get_timezone_string()
            valid_dbfnames.append(dbf)
        except OperationalError as e:
            print("database " + dbf + " is not available due to: \n" +
                  e.message)

    data = {'dbfnames': valid_dbfnames, 'timezones': timezone_dict}
    dbfname_cache = json.dumps(data)
    return dbfnames, commonprefix, dbfname_cache
Esempio n. 2
0
def view_line_data():
    #print request.args
    dbfname = get_dbfname(request.args.get('dbfname', None))
    shapes = request.args.get('use_shapes', None)
    if shapes == "1":
        shapes = True
    else:
        shapes = False
    G = gtfs.GTFS(dbfname)  # handles connections to database etc.
    data = G.get_all_route_shapes(use_shapes=shapes)

    routes = []
    for raw_route in data:
        agency = raw_route["agency"]
        lats = [float(lat) for lat in raw_route['lats']]
        lons = [float(lon) for lon in raw_route['lons']]
        route_type = int(raw_route['type'])
        name = str(raw_route['name'])
        agency_name = str(raw_route['agency_name'])
        route = {
            "agency": agency,
            "lats": lats,
            "lons": lons,
            "route_type": route_type,
            "name": name,
            "agency_name": agency_name
        }
        routes.append(route)
    return json.dumps(routes)
Esempio n. 3
0
def view_spreading_explorer():
    dbfname = get_dbfname(request.args.get('dbfname', None))
    shapes = request.args.get('use_shapes', None)
    tstart = request.args.get('tstart', None)
    tend = request.args.get('tend', None)
    lat = request.args.get('lat', None)
    lon = request.args.get('lon', None)
    if not dbfname:
        return json.dumps({})
    if tstart:
        tstart = int(tstart)
    if tend:
        tend = int(tend)
    if lat:
        lat = float(lat)
    if lon:
        lon = float(lon)
    if shapes == "1":
        shapes = True
    else:
        shapes = False
    # handles connections to database etc.
    G = gtfs.GTFS(dbfname)
    data = G.get_spreading_trips(tstart,
                                 lat,
                                 lon,
                                 tend - tstart,
                                 use_shapes=shapes)
    #  add shapes later: use_shapes=shapes)
    return json.dumps(data)
Esempio n. 4
0
def get_gtfs_stats():
    dbfname = get_dbfname(request.args.get('dbfname', None))
    if not dbfname:
        return json.dumps({})
    G = gtfs.GTFS(dbfname)
    data = stats.get_stats(G)
    return json.dumps(data)
Esempio n. 5
0
def view_stop_data():
    #print request.args
    tstart = int(request.args.get('tstart', None))
    tend = int(request.args.get('tend', None))
    dbfname = get_dbfname(request.args.get('dbfname', None))
    G = gtfs.GTFS(dbfname)  # handles connections to database etc.
    stopdata = G.get_stop_count_data(tstart, tend)
    return stopdata.to_json(orient="records")
Esempio n. 6
0
def get_start_and_end_time_ut():
    dbfname = get_dbfname(request.args.get('dbfname', ""))
    if dbfname is "null":
        dbfname = ""
    G = gtfs.GTFS(dbfname)
    start, end = G.get_approximate_schedule_time_span_in_ut()
    data = {"start_time_ut": start, "end_time_ut": end}
    return json.dumps(data)
Esempio n. 7
0
def load_or_import_example_gtfs(verbose=False):
    imported_database_path = "test_db_kuopio.sqlite"
    if not os.path.exists(
            imported_database_path
    ):  # reimport only if the imported database does not already exist
        print("Importing gtfs zip file")
        import_gtfs.import_gtfs(
            ["data/gtfs_kuopio_finland.zip"
             ],  # input: list of GTFS zip files (or directories)
            imported_database_path,  # output: where to create the new sqlite3 database
            print_progress=
            verbose,  # whether to print progress when importing data
            location_name="Kuopio")

        # Not this is an optional step, which is not necessary for many things.
        print("Computing walking paths using OSM")
        G = gtfs.GTFS(imported_database_path)
        G.meta['download_date'] = "2017-03-15"

        osm_path = "data/kuopio_extract_mapzen_2017_03_15.osm.pbf"

        # when using with the Kuopio test data set,
        # this should raise a warning due to no nearby OSM nodes for one of the stops.
        osm_transfers.add_walk_distances_to_db_python(imported_database_path,
                                                      osm_path)

        print(
            "Note: for large cities we have also a faster option for computing footpaths that uses Java.)"
        )
        dir_path = os.path.dirname(os.path.realpath(__file__))
        java_path = os.path.join(dir_path, "../java_routing/")
        print("Please see the contents of " + java_path + " for more details.")

    # Now you can access the imported database using a GTFS-object as an interface:
    G = gtfs.GTFS(imported_database_path)

    if verbose:
        print("Location name:" + G.get_location_name())  # should print Kuopio
        print("Time span of the data in unixtime: " +
              str(G.get_approximate_schedule_time_span_in_ut()))
        # prints the time span in unix time
    return G
Esempio n. 8
0
def get_trip_counts_per_day():
    dbfname = get_dbfname(request.args.get('dbfname', None))
    if not dbfname:
        return json.dumps({})
    g = gtfs.GTFS(dbfname)
    data = g.get_trip_counts_per_day()
    data_dict = {
        "trip_counts": [int(c) for c in data["trip_counts"].values],
        "dates": [str(date) for date in data["date_str"].values]
    }
    return json.dumps(data_dict)
Esempio n. 9
0
def view_segment_data():
    #print request.args
    tstart = int(request.args.get('tstart', None))
    tend = int(request.args.get('tend', None))
    dbfname = get_dbfname(request.args.get('dbfname', None))
    shapes = request.args.get('use_shapes', None)
    if shapes == "1":
        shapes = True
    else:
        shapes = False
    G = gtfs.GTFS(dbfname)  # handles connections to database etc.
    data = G.get_segment_count_data(tstart, tend, use_shapes=shapes)
    return json.dumps(data)
Esempio n. 10
0
    def _update_metadata(self):
        # Update metadata
        G_orig = self.gtfs
        if self.update_metadata:
            print("Updating metadata")
            logging.info("Updating metadata")
            G_copy = gtfs.GTFS(self.copy_db_conn)
            G_copy.meta['copied_from'] = self.this_db_path
            G_copy.meta['copy_time_ut'] = time.time()
            G_copy.meta['copy_time'] = time.ctime()

            # Copy some keys directly.
            try:
                for key in [
                        'original_gtfs',
                        'download_date',
                        'location_name',
                        'timezone',
                ]:
                    G_copy.meta[key] = G_orig.meta[key]
            # This part is for gtfs objects with multiple sources
            except:
                for k, v in G_copy.meta.items():
                    if 'feed_' in k:
                        G_copy.meta[k] = G_orig.meta[k]
                for key in [
                        'location_name',
                        'timezone',
                ]:
                    G_copy.meta[key] = G_orig.meta[key]
            # Update *all* original metadata under orig_ namespace.
            G_copy.meta.update(
                ('orig_' + k, v) for k, v in G_orig.meta.items())

            stats.update_stats(G_copy)

            # print "Vacuuming..."
            self.copy_db_conn.execute('VACUUM;')
            # print "Analyzing..."
            self.copy_db_conn.execute('ANALYZE;')
            self.copy_db_conn.commit()
        return
Esempio n. 11
0
def get_scheduled_trips_within_interval():
    tstart = request.args.get('tstart', None)
    tend = request.args.get('tend', None)
    dbfname = get_dbfname(request.args.get('dbfname', None))
    shapes = request.args.get('use_shapes', None)

    if shapes == "1":
        shapes = True
    else:
        shapes = False
    if tstart:
        tstart = int(tstart)
    if tend:
        tend = int(tend)

    G = gtfs.GTFS(dbfname)  # handles connections to database etc.

    trips = G.get_trip_trajectories_within_timespan(start=tstart,
                                                    end=tend,
                                                    use_shapes=False)
    return json.dumps(trips)
Esempio n. 12
0
def import_gtfs(gtfs_sources,
                output,
                preserve_connection=False,
                print_progress=True,
                location_name=None,
                **kwargs):
    """Import a GTFS database

    gtfs_sources: str, dict, list
        Paths to the gtfs zip file or to the directory containing the GTFS data.
        Alternatively, a dict can be provide that maps gtfs filenames
        (like 'stops.txt' and 'agencies.txt') to their string presentations.

    output: str or sqlite3.Connection
        path to the new database to be created, or an existing
        sqlite3 connection
    preserve_connection: bool, optional
        Whether to close the connection in the end, or not.
    print_progress: bool, optional
        Whether to print progress output
    location_name: str, optional
        set the location of this database
    """
    if isinstance(output, sqlite3.Connection):
        conn = output
    else:
        # if os.path.isfile(output):
        #  raise RuntimeError('File already exists')
        conn = sqlite3.connect(output)
    if not isinstance(gtfs_sources, list):
        gtfs_sources = [gtfs_sources]
    cur = conn.cursor()
    time_import_start = time.time()

    # These are a bit unsafe, but make importing much faster,
    # especially on scratch.
    cur.execute('PRAGMA page_size = 4096;')
    cur.execute('PRAGMA mmap_size = 1073741824;')
    cur.execute('PRAGMA cache_size = -2000000;')
    cur.execute('PRAGMA temp_store=2;')
    # Changes of isolation level are python3.6 workarounds -
    # eventually will probably be fixed and this can be removed.
    conn.isolation_level = None  # change to autocommit mode (former default)
    cur.execute('PRAGMA journal_mode = OFF;')
    #cur.execute('PRAGMA journal_mode = WAL;')
    cur.execute('PRAGMA synchronous = OFF;')
    conn.isolation_level = ''  # change back to python default.
    # end python3.6 workaround

    # Do the actual importing.
    loaders = [
        L(gtfssource=gtfs_sources, print_progress=print_progress, **kwargs)
        for L in Loaders
    ]

    for loader in loaders:
        loader.assert_exists_if_required()

    # Do initial import.  This consists of making tables, raw insert
    # of the CSVs, and then indexing.

    for loader in loaders:
        loader.import_(conn)

    # Do any operations that require all tables present.
    for Loader in loaders:
        Loader.post_import_round2(conn)

    # Make any views
    for Loader in loaders:
        Loader.make_views(conn)

    # Make any views
    for F in postprocessors:
        F(conn)

    # Set up same basic metadata.
    from gtfspy import gtfs as mod_gtfs
    G = mod_gtfs.GTFS(output)
    G.meta['gen_time_ut'] = time.time()
    G.meta['gen_time'] = time.ctime()
    G.meta['import_seconds'] = time.time() - time_import_start
    G.meta['download_date'] = ''
    G.meta['location_name'] = ''
    G.meta['n_gtfs_sources'] = len(gtfs_sources)

    # Extract things from GTFS
    download_date_strs = []
    for i, source in enumerate(gtfs_sources):
        if len(gtfs_sources) == 1:
            prefix = ""
        else:
            prefix = "feed_" + str(i) + "_"
        if isinstance(source, string_types):
            G.meta[prefix +
                   'original_gtfs'] = decode_six(source) if source else None
            # Extract GTFS date.  Last date pattern in filename.
            filename_date_list = re.findall(r'\d{4}-\d{2}-\d{2}', source)
            if filename_date_list:
                date_str = filename_date_list[-1]
                G.meta[prefix + 'download_date'] = date_str
                download_date_strs.append(date_str)
            if location_name:
                G.meta['location_name'] = location_name
            else:
                location_name_list = re.findall(r'/([^/]+)/\d{4}-\d{2}-\d{2}',
                                                source)
                if location_name_list:
                    G.meta[prefix + 'location_name'] = location_name_list[-1]
                else:
                    try:
                        G.meta[prefix +
                               'location_name'] = source.split("/")[-4]
                    except:
                        G.meta[prefix + 'location_name'] = source

    if G.meta['download_date'] == "":
        unique_download_dates = list(set(download_date_strs))
        if len(unique_download_dates) == 1:
            G.meta['download_date'] = unique_download_dates[0]

    G.meta['timezone'] = cur.execute(
        'SELECT timezone FROM agencies LIMIT 1').fetchone()[0]
    stats.update_stats(G)
    del G

    if print_progress:
        print("Vacuuming...")
    # Next 3 lines are python 3.6 work-arounds again.
    conn.isolation_level = None  # former default of autocommit mode
    cur.execute('VACUUM;')
    conn.isolation_level = ''  # back to python default
    # end python3.6 workaround
    if print_progress:
        print("Analyzing...")
    cur.execute('ANALYZE')
    if not (preserve_connection is True):
        conn.close()