def gtfs_download(self, url, dt, zone): """Do downloading of one file.""" print("Downloading", self.slug, url, zone, dt) # Use only standard library functions to avoid dependencies. #furl = urllib.urlopen(url) opener = FancyURLopener() # We have to set up an authentication method on the opener if # we will need to authenticate. This does HTTP BASIC only so # far. if 'authentication' in self.data: auth_name = self.data['authentication'] auth = auth_data['sites'][auth_name] # A callback method which performs the authentication. # Return (user, pass) tuple. opener.prompt_user_passwd = \ lambda host, realm: (auth['username'], auth['password']) # URL parameters auth method if 'url_suffix' in auth: url = url + auth['url_suffix'] if "{API_KEY}" in url: try: auth_name = self.data['authentication'] except KeyError: auth_name = self.name auth = auth_data['sites'][auth_name] url = url.format(API_KEY=auth['API_KEY']) # Make GTFS path. gtfs_path = self.path_gtfszip(dt, zone) util.makedirs(os.path.dirname(gtfs_path)) # Open the URL. print("**** Connecting to %s" % url) # Open GTFS and relay data from web to file. with util.create_file(gtfs_path) as tmp_gtfs_path: opener.retrieve(url, tmp_gtfs_path) self.test_corrupted_zip(gtfs_path)
def setUp(self): """This method is run once before _each_ test method is executed""" self.gtfs_source_dir = self.__class__.gtfs_source_dir self.gtfs = self.__class__.G self.extract_output_dir = os.path.join( self.gtfs_source_dir, "../", "test_gtfspy_extracts_8211231/") if not os.path.exists(self.extract_output_dir): makedirs(self.extract_output_dir)
def write_temporal_network(gtfs, output_filename, start_time_ut=None, end_time_ut=None): """ Parameters ---------- gtfs : gtfspy.GTFS output_filename : str path to the directory where to store the extracts start_time_ut: int | None start time of the extract in unixtime (seconds after epoch) end_time_ut: int | None end time of the extract in unixtime (seconds after epoch) """ util.makedirs(os.path.dirname(os.path.abspath(output_filename))) pandas_data_frame = temporal_network(gtfs, start_time_ut=start_time_ut, end_time_ut=end_time_ut) pandas_data_frame.to_csv(output_filename, encoding='utf-8', index=False)
def write_temporal_networks_by_route_type(gtfs, extract_output_dir): """ Write temporal networks by route type to disk. Parameters ---------- gtfs: gtfspy.GTFS extract_output_dir: str """ util.makedirs(extract_output_dir) for route_type in route_types.TRANSIT_ROUTE_TYPES: pandas_data_frame = temporal_network(gtfs, start_time_ut=None, end_time_ut=None, route_type=route_type) tag = route_types.ROUTE_TYPE_TO_LOWERCASE_TAG[route_type] out_file_name = os.path.join(extract_output_dir, tag + ".tnet") pandas_data_frame.to_csv(out_file_name, encoding='utf-8', index=False)
def distance_vs_rows_histogram(a2aa, img_dir=None): ignore_stops = stops_to_exclude(return_sqlite_list=True) measure = "mean" fig = plt.figure(figsize=(7, 7)) ax = fig.add_subplot(111) n_value = 180 for n, sign in zip([-1 * n_value, n_value], ["<=", ">="]): df = a2aa.get_rows_with_abs_change_greater_than_n(ignore_stops, measure, n, sign, unit="s") n, bins, patches = ax.hist(np.array(df["before_" + measure]), normed=True, facecolor='green', alpha=0.75) plt.ylim(0, 0.2) plt.xlabel("travel time") plt.ylabel("number of stop_pairs") if not img_dir: img_dir = makedirs( "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps") plt.savefig(os.path.join(img_dir, "distance_vs_volume_of_change_" + str(n) + ".png"), format="png", dpi=300)
def get_zone_to_all(a2aa, measure_mode, measure="mean", rerun=True): """ Returns rows for each combination of zone type :param a2aa: :param measure: :param rerun: :return: """ all_dfs, all_stops = analysis_zones() dfs = {} pickle_path = os.path.join( makedirs( "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps" ), measure_mode + "_z2a_dataframe.pickle") if rerun: for ba in ["before", "after"]: for (i_name, i) in all_dfs: dfs[(i_name, ba)] = a2aa.get_rows_based_on_stop_list( i["stop_I"], all_stops["stop_I"], measure, measure_mode, unit="s") pickle.dump(dfs, open(pickle_path, 'wb'), -1) else: dfs = pickle.load(open(pickle_path, 'rb')) return dfs, all_dfs
def get_combinations(a2aa, measure="mean", mode="temporal_distance", rerun=True, unit="s"): """ Returns rows for each combination of zone type :param a2aa: :param measure: :param rerun: :param mode: :param unit: :return: """ all_dfs, _ = analysis_zones() combinations = itertools.product(all_dfs, all_dfs) dfs = {} pickle_path = os.path.join( makedirs( "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps" ), "dataframe.pickle") if rerun: for ((i_name, i), (j_name, j)) in combinations: dfs[(i_name, j_name)] = a2aa.get_rows_based_on_stop_list(i["stop_I"], j["stop_I"], measure, mode, unit=unit) pickle.dump(dfs, open(pickle_path, 'wb'), -1) else: dfs = pickle.load(open(pickle_path, 'rb')) return combinations, dfs, all_dfs
def write_static_networks(gtfs, output_dir, fmt=None): """ Parameters ---------- gtfs: gtfspy.GTFS output_dir: (str, unicode) a path where to write fmt: None, optional defaulting to "edg" and writing results as ".edg" files If "csv" csv files are produced instead """ if fmt is None: fmt = "edg" single_layer_networks = stop_to_stop_networks_by_type(gtfs) util.makedirs(output_dir) for route_type, net in single_layer_networks.items(): tag = route_types.ROUTE_TYPE_TO_LOWERCASE_TAG[route_type] file_name = os.path.join(output_dir, "network_" + tag + "." + fmt) if len(net.edges()) > 0: _write_stop_to_stop_network_edges(net, file_name, fmt=fmt)
def __init__(self): self.G = GTFS(GTFS_DATA_BASEDIR) self.day_start_ut = self.G.get_suitable_date_for_daily_extract( ut=True) + 3600 self.start_time = self.day_start_ut + 8 * 3600 self.end_time = self.day_start_ut + 11 * 3600 self.profiles = {} self.journey_analyzer = None # self.analysis_start_time # self.analysis_end_time makedirs(RESULTS_DIRECTORY) print("Retrieving transit events") self.connections = [] for e in self.G.generate_routable_transit_events( start_time_ut=self.start_time, end_time_ut=self.end_time): self.connections.append( Connection(int(e.from_stop_I), int(e.to_stop_I), int(e.dep_time_ut), int(e.arr_time_ut), int(e.trip_I))) print("Retrieving walking network") self.net = self.G.get_walk_transfer_stop_to_stop_network()
def _pickle_results(self, profiles, pickle_subdir, target): pickle_path = makedirs(os.path.join(self.pickle_dir, str(pickle_subdir))) pickle_path = os.path.join(pickle_path, str(target) + ".pickle") profiles = dict((key, value.get_final_optimal_labels()) for (key, value) in profiles.items()) """for key, values in profiles.items(): values.sort(key=lambda x: x.departure_time, reverse=True) new_values = compute_pareto_front(values) profiles[key] = new_values """ pickle.dump(profiles, open(pickle_path, 'wb'), -1) profiles = None gc.collect()
def single_stop_change_histogram(target, measure, direction="to", indicator="diff_mean", a2aa=None, img_dir=None, ax=None, return_ax=False, cdf=False, color='blue', label=''): if not a2aa: a2aa = AllToAllDifferenceAnalyzer(GTFS_PATH, A2AA_DB_OLD_PATH, A2AA_DB_LM_PATH, A2AA_OUTPUT_DB_PATH) if not ax: fig = plt.figure(figsize=(7, 7)) ax = fig.add_subplot(111, title="") if measure == "n_boardings": yedges = np.arange(-2.0, 2.0, 0.1) unit = "s" else: yedges = range(-25, 25, 1) unit = "m" if indicator == "diff_mean_relative": yedges = np.arange(-0.7, 0.7, 0.05) unit = "s" df = a2aa.get_data_for_target(target, measure, direction=direction, unit=unit, ignore_stops=True) if cdf: values, base = np.histogram(np.array(df[indicator]), bins=yedges) # evaluate the cumulative cumulative = np.cumsum(values) # plot the cumulative function ax.plot(base[:-1], cumulative, c=color, label=label) plt.ylim(0, max(cumulative)) else: n, bins, patches = ax.hist(np.array(df[indicator]), bins=yedges, normed=True, facecolor='green', alpha=0.75) plt.ylim(0, 0.2) # ax.plot([0, 90], [0, 90], c="r") if return_ax: return ax plt.xlabel("") plt.ylabel("") if not img_dir: img_dir = makedirs( "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps") plt.savefig(os.path.join( img_dir, "diff_" + str(target) + "-" + measure + "-" + indicator + ".pdf"), format="pdf", dpi=300)
def __init__(self, city_publish_tuple, feeds=None, download_date=None): # print(city_publish_tuple, feeds, download_date) # Feed parameters self.feeds = feeds self.city_id = city_publish_tuple.id self.lat = float(city_publish_tuple.lat) self.lon = float(city_publish_tuple.lon) self.publishable = city_publish_tuple.publishable self.extract_start_date = city_publish_tuple.extract_start_date self.buffer_distance = float(city_publish_tuple.buffer) self.name = city_publish_tuple.name if not download_date: if city_publish_tuple.download_date: self.download_date = city_publish_tuple.download_date else: raise Exception('No download date specified!') else: self.download_date = download_date # Create output directory: assert isinstance(self.city_id, str) assert isinstance(self.download_date, str) self.output_directory = util.makedirs(os.path.join(TO_PUBLISH_ROOT_OUTPUT_DIR, self.city_id, self.download_date)) # create if any(x in self.feeds for x in COUNTRY_FEED_LIST): country_feed = [feed for feed in self.feeds if feed in COUNTRY_FEED_LIST] country_feed = country_feed[0] output_sub_dir_country = util.makedirs(os.path.join(COUNTRY_FEEDS_DIR, country_feed, self.download_date)) self.raw_db_path = os.path.join(output_sub_dir_country, ExtractPipeline.TEMP_FILE_PREFIX + SQLITE_ENDING) else: self.raw_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + SQLITE_ENDING) self.main_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "_main" + SQLITE_ENDING) self.week_db_path = os.path.join(self.output_directory, "week" + SQLITE_ENDING) self.day_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "_day" + SQLITE_ENDING) self.week_gtfs_path = os.path.join(self.output_directory, "week." + GTFS_ZIPFILE_BASENAME) self.temporal_network_fname = os.path.join(self.output_directory, "network_temporal_day.csv") self.temporal_network_week_fname = os.path.join(self.output_directory, "network_temporal_week.csv") self.network_node_info_fname = os.path.join(self.output_directory, "network_nodes.csv") self.network_combined_fname = os.path.join(self.output_directory, "network_combined.csv") self.stops_geojson_fname = os.path.join(self.output_directory, "stops.geojson") self.sections_geojson_fname = os.path.join(self.output_directory, "sections.geojson") self.routes_geojson_fname = os.path.join(self.output_directory, "routes.geojson") self.stats_fname = os.path.join(self.output_directory, "stats.csv") self.notes_fname = os.path.join(self.output_directory, "notes.txt") self.log_fname = os.path.join(TO_PUBLISH_ROOT_OUTPUT_DIR, self.city_id + "_" + self.download_date + ".txt") self.coordinate_corrections = pandas.read_csv("coordinate_corrections.csv", sep=",") # GTFS Warning containers: self.tv_warnings = None # timetable validation warnings self.iv_warnings = None # import validation warnings self.raw_import_warnings_summary_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "raw_db_import_warnings_summary.log") self.raw_import_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "raw_db_import_warnings_details.log") self.main_db_timetable_warnings_summary_fname= os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "main_db_timetable_warnings_summary.log") self.main_db_timetable_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "main_db_timetable_warnings_details.log") self.week_db_timetable_warnings_summary_fname = os.path.join(self.output_directory, "week_db_timetable_warnings_summary.log") self.week_db_timetable_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "week_db_timetable_warnings_details.log") self.weekly_extract_dates_plot_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "extract_start_date_plot.pdf") self.thumbnail_path = os.path.join(self.output_directory, "thumbnail.jpg") self.zip_file_name = os.path.join(self.output_directory, self.city_id + ".zip")
import os from gtfspy import util COUNTRY_FEED_LIST = ['denmark', 'sweden', 'finland-matka', 'belgium', 'switzerland', 'israel', 'netherlands', 'norway'] __THIS_DIR = os.path.dirname(os.path.realpath(__file__)) RAW_DATA_DIR = os.path.join(__THIS_DIR, "../../scratch/rawgtfs/") assert os.path.exists(RAW_DATA_DIR) ALL_RAW_GTFS_DIRS = [] TO_PUBLISH_ROOT_OUTPUT_DIR = os.path.join(__THIS_DIR, "../../scratch/to_publish/") # TO_PUBLISH_ROOT_OUTPUT_DIR = os.path.join(__THIS_DIR, "copies_from_hammer/") COUNTRY_FEEDS_DIR = os.path.join(__THIS_DIR, "../../scratch/country_feeds_for_publish") SQLITE_ENDING = ".sqlite" THUMBNAIL_DIR = os.path.join(__THIS_DIR, TO_PUBLISH_ROOT_OUTPUT_DIR, "thumbnails/") util.makedirs(THUMBNAIL_DIR) GTFS_ZIPFILE_BASENAME = "gtfs.zip"