def find_dbfnames(): dbfnames = [] for sdir in settings.DB_DIRS: # If is a regular file, just use this directly. if os.path.isfile(sdir): dbfnames.append(sdir) continue # Directories: all sub-directory sqlite files. for cur_dir, subdirs, files in os.walk(sdir): for ending in settings.DB_ENDINGS: dbfnames.extend(glob(os.path.join(cur_dir, ending))) dbfnames = list(set(dbfnames)) # remove any duplicates # Remove common prefix if len(dbfnames) == 1: commonprefix = "" else: commonprefix = os.path.commonprefix(dbfnames) dbfnames = [fname[len(commonprefix):] for fname in dbfnames] # exclude tests: dbfnames = sorted([e for e in dbfnames if "proc/test/" not in e]) valid_dbfnames = [] timezone_dict = {} for dbf in dbfnames: try: timezone_dict[dbf] = gtfs.GTFS(commonprefix + dbf).get_timezone_string() valid_dbfnames.append(dbf) except OperationalError as e: print("database " + dbf + " is not available due to: \n" + e.message) data = {'dbfnames': valid_dbfnames, 'timezones': timezone_dict} dbfname_cache = json.dumps(data) return dbfnames, commonprefix, dbfname_cache
def view_line_data(): #print request.args dbfname = get_dbfname(request.args.get('dbfname', None)) shapes = request.args.get('use_shapes', None) if shapes == "1": shapes = True else: shapes = False G = gtfs.GTFS(dbfname) # handles connections to database etc. data = G.get_all_route_shapes(use_shapes=shapes) routes = [] for raw_route in data: agency = raw_route["agency"] lats = [float(lat) for lat in raw_route['lats']] lons = [float(lon) for lon in raw_route['lons']] route_type = int(raw_route['type']) name = str(raw_route['name']) agency_name = str(raw_route['agency_name']) route = { "agency": agency, "lats": lats, "lons": lons, "route_type": route_type, "name": name, "agency_name": agency_name } routes.append(route) return json.dumps(routes)
def view_spreading_explorer(): dbfname = get_dbfname(request.args.get('dbfname', None)) shapes = request.args.get('use_shapes', None) tstart = request.args.get('tstart', None) tend = request.args.get('tend', None) lat = request.args.get('lat', None) lon = request.args.get('lon', None) if not dbfname: return json.dumps({}) if tstart: tstart = int(tstart) if tend: tend = int(tend) if lat: lat = float(lat) if lon: lon = float(lon) if shapes == "1": shapes = True else: shapes = False # handles connections to database etc. G = gtfs.GTFS(dbfname) data = G.get_spreading_trips(tstart, lat, lon, tend - tstart, use_shapes=shapes) # add shapes later: use_shapes=shapes) return json.dumps(data)
def get_gtfs_stats(): dbfname = get_dbfname(request.args.get('dbfname', None)) if not dbfname: return json.dumps({}) G = gtfs.GTFS(dbfname) data = stats.get_stats(G) return json.dumps(data)
def view_stop_data(): #print request.args tstart = int(request.args.get('tstart', None)) tend = int(request.args.get('tend', None)) dbfname = get_dbfname(request.args.get('dbfname', None)) G = gtfs.GTFS(dbfname) # handles connections to database etc. stopdata = G.get_stop_count_data(tstart, tend) return stopdata.to_json(orient="records")
def get_start_and_end_time_ut(): dbfname = get_dbfname(request.args.get('dbfname', "")) if dbfname is "null": dbfname = "" G = gtfs.GTFS(dbfname) start, end = G.get_approximate_schedule_time_span_in_ut() data = {"start_time_ut": start, "end_time_ut": end} return json.dumps(data)
def load_or_import_example_gtfs(verbose=False): imported_database_path = "test_db_kuopio.sqlite" if not os.path.exists( imported_database_path ): # reimport only if the imported database does not already exist print("Importing gtfs zip file") import_gtfs.import_gtfs( ["data/gtfs_kuopio_finland.zip" ], # input: list of GTFS zip files (or directories) imported_database_path, # output: where to create the new sqlite3 database print_progress= verbose, # whether to print progress when importing data location_name="Kuopio") # Not this is an optional step, which is not necessary for many things. print("Computing walking paths using OSM") G = gtfs.GTFS(imported_database_path) G.meta['download_date'] = "2017-03-15" osm_path = "data/kuopio_extract_mapzen_2017_03_15.osm.pbf" # when using with the Kuopio test data set, # this should raise a warning due to no nearby OSM nodes for one of the stops. osm_transfers.add_walk_distances_to_db_python(imported_database_path, osm_path) print( "Note: for large cities we have also a faster option for computing footpaths that uses Java.)" ) dir_path = os.path.dirname(os.path.realpath(__file__)) java_path = os.path.join(dir_path, "../java_routing/") print("Please see the contents of " + java_path + " for more details.") # Now you can access the imported database using a GTFS-object as an interface: G = gtfs.GTFS(imported_database_path) if verbose: print("Location name:" + G.get_location_name()) # should print Kuopio print("Time span of the data in unixtime: " + str(G.get_approximate_schedule_time_span_in_ut())) # prints the time span in unix time return G
def get_trip_counts_per_day(): dbfname = get_dbfname(request.args.get('dbfname', None)) if not dbfname: return json.dumps({}) g = gtfs.GTFS(dbfname) data = g.get_trip_counts_per_day() data_dict = { "trip_counts": [int(c) for c in data["trip_counts"].values], "dates": [str(date) for date in data["date_str"].values] } return json.dumps(data_dict)
def view_segment_data(): #print request.args tstart = int(request.args.get('tstart', None)) tend = int(request.args.get('tend', None)) dbfname = get_dbfname(request.args.get('dbfname', None)) shapes = request.args.get('use_shapes', None) if shapes == "1": shapes = True else: shapes = False G = gtfs.GTFS(dbfname) # handles connections to database etc. data = G.get_segment_count_data(tstart, tend, use_shapes=shapes) return json.dumps(data)
def _update_metadata(self): # Update metadata G_orig = self.gtfs if self.update_metadata: print("Updating metadata") logging.info("Updating metadata") G_copy = gtfs.GTFS(self.copy_db_conn) G_copy.meta['copied_from'] = self.this_db_path G_copy.meta['copy_time_ut'] = time.time() G_copy.meta['copy_time'] = time.ctime() # Copy some keys directly. try: for key in [ 'original_gtfs', 'download_date', 'location_name', 'timezone', ]: G_copy.meta[key] = G_orig.meta[key] # This part is for gtfs objects with multiple sources except: for k, v in G_copy.meta.items(): if 'feed_' in k: G_copy.meta[k] = G_orig.meta[k] for key in [ 'location_name', 'timezone', ]: G_copy.meta[key] = G_orig.meta[key] # Update *all* original metadata under orig_ namespace. G_copy.meta.update( ('orig_' + k, v) for k, v in G_orig.meta.items()) stats.update_stats(G_copy) # print "Vacuuming..." self.copy_db_conn.execute('VACUUM;') # print "Analyzing..." self.copy_db_conn.execute('ANALYZE;') self.copy_db_conn.commit() return
def get_scheduled_trips_within_interval(): tstart = request.args.get('tstart', None) tend = request.args.get('tend', None) dbfname = get_dbfname(request.args.get('dbfname', None)) shapes = request.args.get('use_shapes', None) if shapes == "1": shapes = True else: shapes = False if tstart: tstart = int(tstart) if tend: tend = int(tend) G = gtfs.GTFS(dbfname) # handles connections to database etc. trips = G.get_trip_trajectories_within_timespan(start=tstart, end=tend, use_shapes=False) return json.dumps(trips)
def import_gtfs(gtfs_sources, output, preserve_connection=False, print_progress=True, location_name=None, **kwargs): """Import a GTFS database gtfs_sources: str, dict, list Paths to the gtfs zip file or to the directory containing the GTFS data. Alternatively, a dict can be provide that maps gtfs filenames (like 'stops.txt' and 'agencies.txt') to their string presentations. output: str or sqlite3.Connection path to the new database to be created, or an existing sqlite3 connection preserve_connection: bool, optional Whether to close the connection in the end, or not. print_progress: bool, optional Whether to print progress output location_name: str, optional set the location of this database """ if isinstance(output, sqlite3.Connection): conn = output else: # if os.path.isfile(output): # raise RuntimeError('File already exists') conn = sqlite3.connect(output) if not isinstance(gtfs_sources, list): gtfs_sources = [gtfs_sources] cur = conn.cursor() time_import_start = time.time() # These are a bit unsafe, but make importing much faster, # especially on scratch. cur.execute('PRAGMA page_size = 4096;') cur.execute('PRAGMA mmap_size = 1073741824;') cur.execute('PRAGMA cache_size = -2000000;') cur.execute('PRAGMA temp_store=2;') # Changes of isolation level are python3.6 workarounds - # eventually will probably be fixed and this can be removed. conn.isolation_level = None # change to autocommit mode (former default) cur.execute('PRAGMA journal_mode = OFF;') #cur.execute('PRAGMA journal_mode = WAL;') cur.execute('PRAGMA synchronous = OFF;') conn.isolation_level = '' # change back to python default. # end python3.6 workaround # Do the actual importing. loaders = [ L(gtfssource=gtfs_sources, print_progress=print_progress, **kwargs) for L in Loaders ] for loader in loaders: loader.assert_exists_if_required() # Do initial import. This consists of making tables, raw insert # of the CSVs, and then indexing. for loader in loaders: loader.import_(conn) # Do any operations that require all tables present. for Loader in loaders: Loader.post_import_round2(conn) # Make any views for Loader in loaders: Loader.make_views(conn) # Make any views for F in postprocessors: F(conn) # Set up same basic metadata. from gtfspy import gtfs as mod_gtfs G = mod_gtfs.GTFS(output) G.meta['gen_time_ut'] = time.time() G.meta['gen_time'] = time.ctime() G.meta['import_seconds'] = time.time() - time_import_start G.meta['download_date'] = '' G.meta['location_name'] = '' G.meta['n_gtfs_sources'] = len(gtfs_sources) # Extract things from GTFS download_date_strs = [] for i, source in enumerate(gtfs_sources): if len(gtfs_sources) == 1: prefix = "" else: prefix = "feed_" + str(i) + "_" if isinstance(source, string_types): G.meta[prefix + 'original_gtfs'] = decode_six(source) if source else None # Extract GTFS date. Last date pattern in filename. filename_date_list = re.findall(r'\d{4}-\d{2}-\d{2}', source) if filename_date_list: date_str = filename_date_list[-1] G.meta[prefix + 'download_date'] = date_str download_date_strs.append(date_str) if location_name: G.meta['location_name'] = location_name else: location_name_list = re.findall(r'/([^/]+)/\d{4}-\d{2}-\d{2}', source) if location_name_list: G.meta[prefix + 'location_name'] = location_name_list[-1] else: try: G.meta[prefix + 'location_name'] = source.split("/")[-4] except: G.meta[prefix + 'location_name'] = source if G.meta['download_date'] == "": unique_download_dates = list(set(download_date_strs)) if len(unique_download_dates) == 1: G.meta['download_date'] = unique_download_dates[0] G.meta['timezone'] = cur.execute( 'SELECT timezone FROM agencies LIMIT 1').fetchone()[0] stats.update_stats(G) del G if print_progress: print("Vacuuming...") # Next 3 lines are python 3.6 work-arounds again. conn.isolation_level = None # former default of autocommit mode cur.execute('VACUUM;') conn.isolation_level = '' # back to python default # end python3.6 workaround if print_progress: print("Analyzing...") cur.execute('ANALYZE') if not (preserve_connection is True): conn.close()