Beispiel #1
0
 def gtfs_download(self, url, dt, zone):
     """Do downloading of one file."""
     print("Downloading", self.slug, url, zone, dt)
     # Use only standard library functions to avoid dependencies.
     #furl = urllib.urlopen(url)
     opener = FancyURLopener()
     # We have to set up an authentication method on the opener if
     # we will need to authenticate.  This does HTTP BASIC only so
     # far.
     if 'authentication' in self.data:
         auth_name = self.data['authentication']
         auth = auth_data['sites'][auth_name]
         # A callback method which performs the authentication.
         # Return (user, pass) tuple.
         opener.prompt_user_passwd = \
             lambda host, realm: (auth['username'], auth['password'])
         # URL parameters auth method
         if 'url_suffix' in auth:
             url = url + auth['url_suffix']
     if "{API_KEY}" in url:
         try:
             auth_name = self.data['authentication']
         except KeyError:
             auth_name = self.name
         auth = auth_data['sites'][auth_name]
         url = url.format(API_KEY=auth['API_KEY'])
     # Make GTFS path.
     gtfs_path = self.path_gtfszip(dt, zone)
     util.makedirs(os.path.dirname(gtfs_path))
     # Open the URL.
     print("**** Connecting to %s" % url)
     # Open GTFS and relay data from web to file.
     with util.create_file(gtfs_path) as tmp_gtfs_path:
         opener.retrieve(url, tmp_gtfs_path)
     self.test_corrupted_zip(gtfs_path)
Beispiel #2
0
 def setUp(self):
     """This method is run once before _each_ test method is executed"""
     self.gtfs_source_dir = self.__class__.gtfs_source_dir
     self.gtfs = self.__class__.G
     self.extract_output_dir = os.path.join(
         self.gtfs_source_dir, "../", "test_gtfspy_extracts_8211231/")
     if not os.path.exists(self.extract_output_dir):
         makedirs(self.extract_output_dir)
Beispiel #3
0
def write_temporal_network(gtfs, output_filename, start_time_ut=None, end_time_ut=None):
    """
    Parameters
    ----------
    gtfs : gtfspy.GTFS
    output_filename : str
        path to the directory where to store the extracts
    start_time_ut: int | None
        start time of the extract in unixtime (seconds after epoch)
    end_time_ut: int | None
        end time of the extract in unixtime (seconds after epoch)
    """
    util.makedirs(os.path.dirname(os.path.abspath(output_filename)))
    pandas_data_frame = temporal_network(gtfs, start_time_ut=start_time_ut, end_time_ut=end_time_ut)
    pandas_data_frame.to_csv(output_filename, encoding='utf-8', index=False)
Beispiel #4
0
def write_temporal_networks_by_route_type(gtfs, extract_output_dir):
    """
    Write temporal networks by route type to disk.

    Parameters
    ----------
    gtfs: gtfspy.GTFS
    extract_output_dir: str
    """
    util.makedirs(extract_output_dir)
    for route_type in route_types.TRANSIT_ROUTE_TYPES:
        pandas_data_frame = temporal_network(gtfs, start_time_ut=None, end_time_ut=None, route_type=route_type)
        tag = route_types.ROUTE_TYPE_TO_LOWERCASE_TAG[route_type]
        out_file_name = os.path.join(extract_output_dir, tag + ".tnet")
        pandas_data_frame.to_csv(out_file_name, encoding='utf-8', index=False)
def distance_vs_rows_histogram(a2aa, img_dir=None):
    ignore_stops = stops_to_exclude(return_sqlite_list=True)
    measure = "mean"
    fig = plt.figure(figsize=(7, 7))
    ax = fig.add_subplot(111)
    n_value = 180
    for n, sign in zip([-1 * n_value, n_value], ["<=", ">="]):
        df = a2aa.get_rows_with_abs_change_greater_than_n(ignore_stops,
                                                          measure,
                                                          n,
                                                          sign,
                                                          unit="s")
        n, bins, patches = ax.hist(np.array(df["before_" + measure]),
                                   normed=True,
                                   facecolor='green',
                                   alpha=0.75)
        plt.ylim(0, 0.2)

    plt.xlabel("travel time")
    plt.ylabel("number of stop_pairs")
    if not img_dir:
        img_dir = makedirs(
            "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps")
    plt.savefig(os.path.join(img_dir, "distance_vs_volume_of_change_" +
                             str(n) + ".png"),
                format="png",
                dpi=300)
def get_zone_to_all(a2aa, measure_mode, measure="mean", rerun=True):
    """
    Returns rows for each combination of zone type
    :param a2aa:
    :param measure:
    :param rerun:
    :return:
    """
    all_dfs, all_stops = analysis_zones()

    dfs = {}
    pickle_path = os.path.join(
        makedirs(
            "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps"
        ), measure_mode + "_z2a_dataframe.pickle")

    if rerun:
        for ba in ["before", "after"]:
            for (i_name, i) in all_dfs:
                dfs[(i_name, ba)] = a2aa.get_rows_based_on_stop_list(
                    i["stop_I"],
                    all_stops["stop_I"],
                    measure,
                    measure_mode,
                    unit="s")

        pickle.dump(dfs, open(pickle_path, 'wb'), -1)
    else:
        dfs = pickle.load(open(pickle_path, 'rb'))
    return dfs, all_dfs
def get_combinations(a2aa,
                     measure="mean",
                     mode="temporal_distance",
                     rerun=True,
                     unit="s"):
    """
    Returns rows for each combination of zone type
    :param a2aa:
    :param measure:
    :param rerun:
    :param mode:
    :param unit:
    :return:
    """
    all_dfs, _ = analysis_zones()
    combinations = itertools.product(all_dfs, all_dfs)
    dfs = {}
    pickle_path = os.path.join(
        makedirs(
            "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps"
        ), "dataframe.pickle")

    if rerun:
        for ((i_name, i), (j_name, j)) in combinations:
            dfs[(i_name,
                 j_name)] = a2aa.get_rows_based_on_stop_list(i["stop_I"],
                                                             j["stop_I"],
                                                             measure,
                                                             mode,
                                                             unit=unit)

        pickle.dump(dfs, open(pickle_path, 'wb'), -1)
    else:
        dfs = pickle.load(open(pickle_path, 'rb'))
    return combinations, dfs, all_dfs
Beispiel #8
0
def write_static_networks(gtfs, output_dir, fmt=None):
    """
    Parameters
    ----------
    gtfs: gtfspy.GTFS
    output_dir: (str, unicode)
        a path where to write
    fmt: None, optional
        defaulting to "edg" and writing results as ".edg" files
         If "csv" csv files are produced instead
    """
    if fmt is None:
        fmt = "edg"
    single_layer_networks = stop_to_stop_networks_by_type(gtfs)
    util.makedirs(output_dir)
    for route_type, net in single_layer_networks.items():
        tag = route_types.ROUTE_TYPE_TO_LOWERCASE_TAG[route_type]
        file_name = os.path.join(output_dir, "network_" + tag + "." + fmt)
        if len(net.edges()) > 0:
            _write_stop_to_stop_network_edges(net, file_name, fmt=fmt)
 def __init__(self):
     self.G = GTFS(GTFS_DATA_BASEDIR)
     self.day_start_ut = self.G.get_suitable_date_for_daily_extract(
         ut=True) + 3600
     self.start_time = self.day_start_ut + 8 * 3600
     self.end_time = self.day_start_ut + 11 * 3600
     self.profiles = {}
     self.journey_analyzer = None
     # self.analysis_start_time
     # self.analysis_end_time
     makedirs(RESULTS_DIRECTORY)
     print("Retrieving transit events")
     self.connections = []
     for e in self.G.generate_routable_transit_events(
             start_time_ut=self.start_time, end_time_ut=self.end_time):
         self.connections.append(
             Connection(int(e.from_stop_I), int(e.to_stop_I),
                        int(e.dep_time_ut), int(e.arr_time_ut),
                        int(e.trip_I)))
     print("Retrieving walking network")
     self.net = self.G.get_walk_transfer_stop_to_stop_network()
Beispiel #10
0
 def _pickle_results(self, profiles, pickle_subdir, target):
     pickle_path = makedirs(os.path.join(self.pickle_dir, str(pickle_subdir)))
     pickle_path = os.path.join(pickle_path, str(target) + ".pickle")
     profiles = dict((key, value.get_final_optimal_labels()) for (key, value) in profiles.items())
     """for key, values in profiles.items():
         values.sort(key=lambda x: x.departure_time, reverse=True)
         new_values = compute_pareto_front(values)
         profiles[key] = new_values
         """
     pickle.dump(profiles, open(pickle_path, 'wb'), -1)
     profiles = None
     gc.collect()
def single_stop_change_histogram(target,
                                 measure,
                                 direction="to",
                                 indicator="diff_mean",
                                 a2aa=None,
                                 img_dir=None,
                                 ax=None,
                                 return_ax=False,
                                 cdf=False,
                                 color='blue',
                                 label=''):
    if not a2aa:
        a2aa = AllToAllDifferenceAnalyzer(GTFS_PATH, A2AA_DB_OLD_PATH,
                                          A2AA_DB_LM_PATH, A2AA_OUTPUT_DB_PATH)
    if not ax:
        fig = plt.figure(figsize=(7, 7))
        ax = fig.add_subplot(111, title="")

    if measure == "n_boardings":
        yedges = np.arange(-2.0, 2.0, 0.1)
        unit = "s"
    else:
        yedges = range(-25, 25, 1)
        unit = "m"
    if indicator == "diff_mean_relative":
        yedges = np.arange(-0.7, 0.7, 0.05)
        unit = "s"

    df = a2aa.get_data_for_target(target,
                                  measure,
                                  direction=direction,
                                  unit=unit,
                                  ignore_stops=True)
    if cdf:
        values, base = np.histogram(np.array(df[indicator]), bins=yedges)
        # evaluate the cumulative
        cumulative = np.cumsum(values)
        # plot the cumulative function
        ax.plot(base[:-1], cumulative, c=color, label=label)
        plt.ylim(0, max(cumulative))

    else:
        n, bins, patches = ax.hist(np.array(df[indicator]),
                                   bins=yedges,
                                   normed=True,
                                   facecolor='green',
                                   alpha=0.75)
        plt.ylim(0, 0.2)

    # ax.plot([0, 90], [0, 90], c="r")
    if return_ax:
        return ax
    plt.xlabel("")
    plt.ylabel("")
    if not img_dir:
        img_dir = makedirs(
            "/home/clepe/production/results/helsinki/figs/all_to_all/heatmaps")
    plt.savefig(os.path.join(
        img_dir,
        "diff_" + str(target) + "-" + measure + "-" + indicator + ".pdf"),
                format="pdf",
                dpi=300)
    def __init__(self, city_publish_tuple, feeds=None,
                 download_date=None):
        # print(city_publish_tuple, feeds, download_date)

        # Feed parameters
        self.feeds = feeds
        self.city_id = city_publish_tuple.id

        self.lat = float(city_publish_tuple.lat)
        self.lon = float(city_publish_tuple.lon)
        self.publishable = city_publish_tuple.publishable
        self.extract_start_date = city_publish_tuple.extract_start_date

        self.buffer_distance = float(city_publish_tuple.buffer)
        self.name = city_publish_tuple.name

        if not download_date:
            if city_publish_tuple.download_date:
                self.download_date = city_publish_tuple.download_date
            else:
                raise Exception('No download date specified!')
        else:
            self.download_date = download_date

        # Create output directory:
        assert isinstance(self.city_id, str)
        assert isinstance(self.download_date, str)
        self.output_directory = util.makedirs(os.path.join(TO_PUBLISH_ROOT_OUTPUT_DIR, self.city_id, self.download_date))

        # create
        if any(x in self.feeds for x in COUNTRY_FEED_LIST):
            country_feed = [feed for feed in self.feeds if feed in COUNTRY_FEED_LIST]
            country_feed = country_feed[0]
            output_sub_dir_country = util.makedirs(os.path.join(COUNTRY_FEEDS_DIR, country_feed, self.download_date))
            self.raw_db_path = os.path.join(output_sub_dir_country, ExtractPipeline.TEMP_FILE_PREFIX + SQLITE_ENDING)
        else:
            self.raw_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + SQLITE_ENDING)


        self.main_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "_main" + SQLITE_ENDING)
        self.week_db_path = os.path.join(self.output_directory, "week" + SQLITE_ENDING)
        self.day_db_path = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "_day" + SQLITE_ENDING)

        self.week_gtfs_path = os.path.join(self.output_directory, "week." + GTFS_ZIPFILE_BASENAME)

        self.temporal_network_fname = os.path.join(self.output_directory, "network_temporal_day.csv")
        self.temporal_network_week_fname = os.path.join(self.output_directory, "network_temporal_week.csv")
        self.network_node_info_fname = os.path.join(self.output_directory, "network_nodes.csv")
        self.network_combined_fname = os.path.join(self.output_directory, "network_combined.csv")

        self.stops_geojson_fname = os.path.join(self.output_directory, "stops.geojson")
        self.sections_geojson_fname = os.path.join(self.output_directory, "sections.geojson")
        self.routes_geojson_fname = os.path.join(self.output_directory, "routes.geojson")

        self.stats_fname = os.path.join(self.output_directory, "stats.csv")

        self.notes_fname = os.path.join(self.output_directory, "notes.txt")

        self.log_fname = os.path.join(TO_PUBLISH_ROOT_OUTPUT_DIR, self.city_id + "_" + self.download_date + ".txt")

        self.coordinate_corrections = pandas.read_csv("coordinate_corrections.csv", sep=",")

        # GTFS Warning containers:
        self.tv_warnings = None  # timetable validation warnings
        self.iv_warnings = None  # import validation warnings

        self.raw_import_warnings_summary_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "raw_db_import_warnings_summary.log")
        self.raw_import_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "raw_db_import_warnings_details.log")

        self.main_db_timetable_warnings_summary_fname= os.path.join(self.output_directory,  ExtractPipeline.TEMP_FILE_PREFIX + "main_db_timetable_warnings_summary.log")
        self.main_db_timetable_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "main_db_timetable_warnings_details.log")

        self.week_db_timetable_warnings_summary_fname = os.path.join(self.output_directory, "week_db_timetable_warnings_summary.log")
        self.week_db_timetable_warnings_details_fname = os.path.join(self.output_directory, ExtractPipeline.TEMP_FILE_PREFIX + "week_db_timetable_warnings_details.log")

        self.weekly_extract_dates_plot_fname = os.path.join(self.output_directory,
                                                            ExtractPipeline.TEMP_FILE_PREFIX + "extract_start_date_plot.pdf")
        self.thumbnail_path = os.path.join(self.output_directory, "thumbnail.jpg")
        self.zip_file_name = os.path.join(self.output_directory, self.city_id + ".zip")
Beispiel #13
0
import os
from gtfspy import util


COUNTRY_FEED_LIST = ['denmark', 'sweden', 'finland-matka', 'belgium', 'switzerland', 'israel', 'netherlands', 'norway']

__THIS_DIR = os.path.dirname(os.path.realpath(__file__))

RAW_DATA_DIR = os.path.join(__THIS_DIR, "../../scratch/rawgtfs/")
assert os.path.exists(RAW_DATA_DIR)
ALL_RAW_GTFS_DIRS = []
TO_PUBLISH_ROOT_OUTPUT_DIR = os.path.join(__THIS_DIR, "../../scratch/to_publish/")
# TO_PUBLISH_ROOT_OUTPUT_DIR = os.path.join(__THIS_DIR, "copies_from_hammer/")
COUNTRY_FEEDS_DIR = os.path.join(__THIS_DIR, "../../scratch/country_feeds_for_publish")
SQLITE_ENDING = ".sqlite"

THUMBNAIL_DIR = os.path.join(__THIS_DIR, TO_PUBLISH_ROOT_OUTPUT_DIR, "thumbnails/")
util.makedirs(THUMBNAIL_DIR)

GTFS_ZIPFILE_BASENAME = "gtfs.zip"