Esempio n. 1
0
def collect_continents_subregion_tables(confirmation_required=True):
    """
    :param confirmation_required: [bool] whether to ask for a confirmation before starting to collect the information
    """
    if confirmed("To collect information about subregions of each continent? ",
                 confirmation_required=confirmation_required):
        try:
            home_link = 'https://download.geofabrik.de/'
            source = requests.get(home_link)
            soup = bs4.BeautifulSoup(source.text,
                                     'lxml').find_all('td',
                                                      {'class': 'subregion'})
            source.close()
            continent_names = [td.a.text for td in soup]
            continent_links = [
                urllib.parse.urljoin(home_link, td.a['href']) for td in soup
            ]
            subregion_tables = dict(
                zip(continent_names,
                    [get_subregion_table(url) for url in continent_links]))
            save_pickle(subregion_tables,
                        cd_dat("GeoFabrik-continents-subregion-tables.pickle"))
        except Exception as e:
            print(
                "Failed to collect the required information ... {}.".format(e))
    else:
        print(
            "The information collection process was not activated. The existing local copy will be loaded instead."
        )
Esempio n. 2
0
def collect_bbbike_subregion_catalogue(confirmation_required=True):
    """
    :param confirmation_required:
    """
    if confirmed("To collect BBBike subregion catalogue? ", confirmation_required=confirmation_required):
        try:
            home_url = 'http://download.bbbike.org/osm/bbbike/'
            bbbike_subregion_catalogue = pd.read_html(home_url, header=0, parse_dates=['Last Modified'])[0].drop(0)
            bbbike_subregion_catalogue.Name = bbbike_subregion_catalogue.Name.map(lambda x: x.strip('/'))

            save_pickle(bbbike_subregion_catalogue, cd_dat("BBBike-subregion-catalogue.pickle"))

            bbbike_subregion_names = bbbike_subregion_catalogue.Name.tolist()
            save_pickle(bbbike_subregion_names, cd_dat("BBBike-subregion-name-list.pickle"))

        except Exception as e:
            print("Failed to get the required information ... {}.".format(e))
    else:
        print("The information collection process was not activated. The existing local copy will be loaded instead.")
Esempio n. 3
0
def collect_bbbike_download_catalogue(confirmation_required=True):
    """
    :param confirmation_required: [bool]
    """
    if confirmed("To collect BBBike download dictionary? ", confirmation_required=confirmation_required):
        try:
            bbbike_subregion_names = fetch_bbbike_subregion_catalogue("BBBike-subregion-name-list", update=True)
            download_catalogue = [
                fetch_bbbike_subregion_download_catalogue(subregion_name, update=True, confirmation_required=False)
                for subregion_name in bbbike_subregion_names]

            subregion_name, subregion_download_catalogue = bbbike_subregion_names[0], download_catalogue[0]

            # Available file formats
            file_fmt = [re.sub('{}|CHECKSUM'.format(subregion_name), '', f)
                        for f in subregion_download_catalogue.Filename]
            save_pickle(file_fmt[:-2], cd_dat("BBBike-osm-file-formats.pickle"))

            # Available data types
            data_typ = subregion_download_catalogue.DataType.tolist()
            save_pickle(data_typ[:-2], cd_dat("BBBike-osm-data-types.pickle"))

            # available_file_formats = dict(zip(file_fmt, file_ext))

            downloads_dictionary = dict(zip(bbbike_subregion_names, download_catalogue))
            save_pickle(downloads_dictionary, cd_dat("BBBike-download-catalogue.pickle"))
        except Exception as e:
            print("Failed to collect BBBike download dictionary. {}".format(e))
    else:
        print("The information collection process was not activated. The existing local copy will be loaded instead.")
Esempio n. 4
0
def read_osm_pbf(subregion_name,
                 data_dir=None,
                 parsed=True,
                 file_size_limit=50,
                 fmt_other_tags=True,
                 fmt_single_geom=True,
                 fmt_multi_geom=True,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_osm_pbf=True):
    """
    :param subregion_name: [str] e.g. 'london'
    :param data_dir: [str or None] customised path of a .osm.pbf file
    :param parsed: [bool]
    :param file_size_limit: [numbers.Number] limit of file size (in MB),  e.g. 50, or 100(default)
    :param fmt_other_tags: [bool]
    :param fmt_single_geom: [bool]
    :param fmt_multi_geom: [bool]
    :param update: [bool]
    :param download_confirmation_required: [bool]
    :param pickle_it: [bool]
    :param rm_osm_pbf: [bool]
    :return: [dict] or None

    If 'subregion' is the name of the subregion, the default file path will be used.
    """
    assert isinstance(file_size_limit, int) or file_size_limit is None

    osm_pbf_filename, path_to_osm_pbf = get_default_path_to_osm_file(
        subregion_name, ".osm.pbf", mkdir=False)
    if not data_dir:  # Go to default file path
        path_to_osm_pbf = path_to_osm_pbf
    else:
        osm_pbf_dir = regulate_input_data_dir(data_dir)
        path_to_osm_pbf = os.path.join(osm_pbf_dir, osm_pbf_filename)

    subregion_filename = os.path.basename(path_to_osm_pbf)

    path_to_pickle = path_to_osm_pbf.replace(
        ".osm.pbf", ".pickle" if parsed else "-raw.pickle")
    if os.path.isfile(path_to_pickle) and not update:
        osm_pbf_data = load_pickle(path_to_pickle)
    else:
        # If the target file is not available, try downloading it first.
        download_subregion_osm_file(
            subregion_name,
            osm_file_format=".osm.pbf",
            download_dir=data_dir,
            update=update,
            download_confirmation_required=download_confirmation_required,
            verbose=False)

        file_size_in_mb = round(
            os.path.getsize(path_to_osm_pbf) / (1024**2), 1)

        if file_size_limit and file_size_in_mb > file_size_limit:
            chunks_no = math.ceil(
                file_size_in_mb / file_size_limit
            )  # Parsing the '.osm.pbf' file in a chunk-wise way
        else:
            chunks_no = None

        print("\nParsing \"{}\" ... ".format(subregion_filename), end="")
        try:
            osm_pbf_data = parse_osm_pbf(path_to_osm_pbf, chunks_no, parsed,
                                         fmt_other_tags, fmt_single_geom,
                                         fmt_multi_geom)
            print("Successfully.\n")
        except Exception as e:
            print("Failed. {}\n".format(e))
            osm_pbf_data = None

        if pickle_it:
            save_pickle(osm_pbf_data, path_to_pickle)
        if rm_osm_pbf:
            remove_subregion_osm_file(path_to_osm_pbf)

    return osm_pbf_data
Esempio n. 5
0
def read_shp_zip(subregion_name,
                 layer,
                 feature=None,
                 data_dir=None,
                 update=False,
                 download_confirmation_required=True,
                 pickle_it=False,
                 rm_extracts=False,
                 rm_shp_zip=False):
    """
    :param subregion_name: [str] e.g. 'england', 'oxfordshire', or 'europe'; case-insensitive
    :param layer: [str] e.g. 'railways'
    :param feature: [str] e.g. 'rail'; if None, all available features included; default None
    :param data_dir: [str or None]
    :param update: [bool] whether to update the relevant file/information; default False
    :param download_confirmation_required: [bool]
    :param pickle_it: [bool] default False
    :param rm_extracts: [bool] whether to delete extracted files from the .shp.zip file; default False
    :param rm_shp_zip: [bool] whether to delete the downloaded .shp.zip file; default False
    :return: [GeoDataFrame]
    """

    shp_zip_filename, path_to_shp_zip = get_default_path_to_osm_file(
        subregion_name, ".shp.zip", mkdir=False)
    extract_dir = os.path.splitext(path_to_shp_zip)[0]
    if data_dir:
        shp_zip_dir = regulate_input_data_dir(data_dir)
        path_to_shp_zip = os.path.join(shp_zip_dir, shp_zip_filename)
        extract_dir = os.path.join(shp_zip_dir, os.path.basename(extract_dir))

    # Make a local path for saving a pickle file for .shp data
    sub_name = "-".join(
        x for x in
        [shp_zip_filename.replace("-latest-free.shp.zip", ""), layer, feature]
        if x)
    path_to_shp_pickle = os.path.join(extract_dir, sub_name + ".shp.pickle")

    if os.path.isfile(path_to_shp_pickle) and not update:
        shp_data = load_pickle(path_to_shp_pickle)
    else:
        # Download the requested OSM file urlretrieve(download_url, file_path)
        if not os.path.exists(extract_dir):
            download_subregion_osm_file(
                shp_zip_filename,
                osm_file_format=".shp.zip",
                download_dir=data_dir,
                update=update,
                download_confirmation_required=download_confirmation_required,
                verbose=False)
        if os.path.isfile(path_to_shp_zip):
            extract_shp_zip(path_to_shp_zip, extract_dir, layer=layer)

        path_to_shp = glob.glob(
            os.path.join(extract_dir, "*{}*.shp".format(layer)))
        if len(path_to_shp) == 0:
            shp_data = None
        elif len(path_to_shp) == 1:
            shp_data = gpd.read_file(
                path_to_shp[0])  # gpd.GeoDataFrame(read_shp_file(path_to_shp))
            if feature:
                path_to_shp_feat = path_to_shp[0].replace(
                    layer, layer + "_" + feature)
                shp_data = gpd.GeoDataFrame(
                    shp_data[shp_data.fclass == feature])
                shp_data.crs = {
                    'no_defs': True,
                    'ellps': 'WGS84',
                    'datum': 'WGS84',
                    'proj': 'longlat'
                }
                shp_data.to_file(path_to_shp_feat, driver='ESRI Shapefile')
        else:  # len(path_to_shp) > 1:
            if not feature:
                path_to_orig_shp = [
                    p for p in path_to_shp
                    if layer + '_a' in p or layer + '_free' in p
                ]
                if len(path_to_orig_shp) == 1:  # "_a*.shp" is not available
                    shp_data = gpd.read_file(path_to_orig_shp[0])
                else:
                    shp_data = [gpd.read_file(p) for p in path_to_shp]
                    shp_data = pd.concat(shp_data, axis=0, ignore_index=True)
            else:  # feature is None
                path_to_shp_feat = [
                    p for p in path_to_shp if layer + "_" + feature not in p
                ]
                if len(path_to_shp_feat) == 1:  # "_a*.shp" does not exist
                    shp_data = gpd.read_file(path_to_shp_feat[0])
                    shp_data = shp_data[shp_data.fclass == feature]
                else:  # both "_a*" and "_free*" .shp for feature is available
                    shp_data = [
                        dat[dat.fclass == feature]
                        for dat in (gpd.read_file(p) for p in path_to_shp_feat)
                    ]
                    shp_data = pd.concat(shp_data, axis=0, ignore_index=True)
                shp_data.crs = {
                    'no_defs': True,
                    'ellps': 'WGS84',
                    'datum': 'WGS84',
                    'proj': 'longlat'
                }
                shp_data.to_file(path_to_shp_feat[0].replace(
                    layer, layer + "_" + feature),
                                 driver='ESRI Shapefile')

        if pickle_it:
            save_pickle(shp_data, path_to_shp_pickle)

        if os.path.exists(extract_dir) and rm_extracts:
            # import shutil; shutil.rmtree(extract_dir)
            for f in glob.glob(os.path.join(extract_dir, "gis_osm*")):
                # if layer not in f:
                os.remove(f)

        if os.path.isfile(path_to_shp_zip) and rm_shp_zip:
            remove_subregion_osm_file(path_to_shp_zip)

    return shp_data
Esempio n. 6
0
def collect_region_subregion_tier(confirmation_required=True):
    """
    :param confirmation_required: [bool] whether to confirm before starting to collect region-subregion tier
    """

    # Find out the all regions and their subregions
    def compile_region_subregion_tier(sub_reg_tbls):
        """
        :param sub_reg_tbls: [pandas.DataFrame] obtained from fetch_continents_subregion_tables()
        :return: ([dict], [list]) a dictionary of region-subregion, and a list of (sub)regions without subregions
        """
        having_subregions = copy.deepcopy(sub_reg_tbls)
        region_subregion_tiers = copy.deepcopy(sub_reg_tbls)

        non_subregions_list = []
        for k, v in sub_reg_tbls.items():
            if v is not None and isinstance(v, pd.DataFrame):
                region_subregion_tiers = update_nested_dict(
                    sub_reg_tbls, {k: set(v.Subregion)})
            else:
                non_subregions_list.append(k)

        for x in non_subregions_list:
            having_subregions.pop(x)

        having_subregions_temp = copy.deepcopy(having_subregions)

        while having_subregions_temp:

            for region_name, subregion_table in having_subregions.items():
                #
                subregion_names, subregion_links = subregion_table.Subregion, subregion_table.SubregionURL
                sub_subregion_tables = dict(
                    zip(subregion_names, [
                        get_subregion_table(link) for link in subregion_links
                    ]))

                subregion_index, without_subregion_ = compile_region_subregion_tier(
                    sub_subregion_tables)
                non_subregions_list += without_subregion_

                region_subregion_tiers.update({region_name: subregion_index})

                having_subregions_temp.pop(region_name)

        # Russian Federation in both pages of Asia and Europe, so that there are duplicates in non_subregions_list
        non_subregions_list = list(
            more_itertools.unique_everseen(non_subregions_list))
        return region_subregion_tiers, non_subregions_list

    if confirmed(
            "To compile a region-subregion tier? (Note that it may take a few minutes.) ",
            confirmation_required=confirmation_required):
        try:
            subregion_tables = fetch_continents_subregion_tables(update=True)
            region_subregion_tier, non_subregions = compile_region_subregion_tier(
                subregion_tables)
            save_pickle(region_subregion_tier,
                        cd_dat("GeoFabrik-region-subregion-tier.pickle"))
            save_json(region_subregion_tier,
                      cd_dat("GeoFabrik-region-subregion-tier.json"))
            save_pickle(non_subregions,
                        cd_dat("GeoFabrik-non-subregion-list.pickle"))
        except Exception as e:
            print("Failed to get the required information ... {}.".format(e))
    else:
        print(
            "The information collection process was not activated. The existing local copy will be loaded instead."
        )
Esempio n. 7
0
def get_incident_location_furlongs(route_name=None, weather_category=None,
                                   shift_yards_same_elr=220, shift_yards_diff_elr=220,
                                   update=False, verbose=False):
    """
    Get data of furlongs for incident locations.

    :param route_name: name of a Route; if ``None`` (default), all available Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None`` (default), all available weather categories
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that ``StartELR == EndELR``, defaults to ``220``
    :type shift_yards_same_elr: int or float
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that ``StartELR != EndELR``, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of furlongs for incident locations
    :rtype: pandas.DataFrame or None

    **Test**::

        >>> from coordinator.furlong import get_incident_location_furlongs

        weather_category     = None
        shift_yards_same_elr = 220
        shift_yards_diff_elr = 440
        update               = True
        verbose              = True

        >>> il_furlongs = get_incident_location_furlongs(update=True, verbose=True)

        >>> il_furlongs.tail()

        >>> il_furlongs = get_incident_location_furlongs(route_name='Anglia', update=True, verbose=True)

        >>> il_furlongs.tail()

    """

    filename = "incident-location-furlongs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr,
                                    shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        incident_location_furlongs = load_pickle(path_to_pickle)

    else:
        try:
            use_col_names = ['Section_Length_Adj', 'Critical_FurlongIDs']

            adjusted_mileages_same_start_end_elrs = get_adjusted_mileages_same_start_end_elrs(
                route_name, weather_category, shift_yards_same_elr, verbose=verbose)
            ilf_same = adjusted_mileages_same_start_end_elrs[use_col_names]

            adjusted_mileages_diff_start_end_elrs = get_adjusted_mileages_diff_start_end_elrs(
                route_name, weather_category, shift_yards_diff_elr, verbose=verbose)
            ilf_diff = adjusted_mileages_diff_start_end_elrs[use_col_names]

            furlongs_dat = pd.concat([ilf_same, ilf_diff])

            incident_locations = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, verbose=verbose)

            # Merge the above data sets
            incident_location_furlongs = incident_locations.join(furlongs_dat, how='right')
            incident_location_furlongs.drop(['StartMileage_num', 'EndMileage_num'], axis=1, inplace=True)
            incident_location_furlongs.index = range(len(incident_location_furlongs))

            save_pickle(incident_location_furlongs, path_to_pickle, verbose=verbose)

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
            incident_location_furlongs = None

    return incident_location_furlongs
Esempio n. 8
0
def collect_subregion_info_catalogue(confirmation_required=True):
    """
    :param confirmation_required: [bool] whether to ask for a confirmation before starting to collect information
    """
    if confirmed(
            "To collect all available subregion links? (Note that it may take a few minutes.) ",
            confirmation_required=confirmation_required):

        home_url = 'http://download.geofabrik.de/'

        try:
            source = requests.get(home_url)
            soup = bs4.BeautifulSoup(source.text, 'lxml')
            source.close()
            avail_subregions = [
                td.a.text for td in soup.find_all('td', {'class': 'subregion'})
            ]
            avail_subregion_urls = [
                urllib.parse.urljoin(home_url, td.a['href'])
                for td in soup.find_all('td', {'class': 'subregion'})
            ]
            avail_subregion_url_tables = [
                get_subregion_table(sub_url)
                for sub_url in avail_subregion_urls
            ]
            avail_subregion_url_tables = [
                tbl for tbl in avail_subregion_url_tables if tbl is not None
            ]

            subregion_url_tables = list(avail_subregion_url_tables)

            while subregion_url_tables:

                subregion_url_tables_ = []

                for subregion_url_table in subregion_url_tables:
                    subregions = list(subregion_url_table.Subregion)
                    subregion_urls = list(subregion_url_table.SubregionURL)
                    subregion_url_tables_0 = [
                        get_subregion_table(subregion_url)
                        for subregion_url in subregion_urls
                    ]
                    subregion_url_tables_ += [
                        tbl for tbl in subregion_url_tables_0
                        if tbl is not None
                    ]

                    # (Note that 'Russian Federation' data is available in both 'Asia' and 'Europe')
                    avail_subregions += subregions
                    avail_subregion_urls += subregion_urls
                    avail_subregion_url_tables += subregion_url_tables_

                subregion_url_tables = list(subregion_url_tables_)

            # Save a list of available subregions locally
            save_pickle(avail_subregions,
                        cd_dat("GeoFabrik-subregion-name-list.pickle"))

            # Subregion index - {Subregion: URL}
            subregion_url_index = dict(
                zip(avail_subregions, avail_subregion_urls))
            # Save subregion_index to local disk
            save_pickle(
                subregion_url_index,
                cd_dat("GeoFabrik-subregion-name-url-dictionary.pickle"))
            save_json(subregion_url_index,
                      cd_dat("GeoFabrik-subregion-name-url-dictionary.json"))

            # All available URLs for downloading
            home_subregion_url_table = get_subregion_table(home_url)
            avail_subregion_url_tables.append(home_subregion_url_table)
            subregion_downloads_index = pd.DataFrame(
                pd.concat(avail_subregion_url_tables, ignore_index=True))
            subregion_downloads_index.drop_duplicates(inplace=True)
            subregion_downloads_index_json = subregion_downloads_index.set_index(
                'Subregion').to_json()

            # Save subregion_index_downloads to local disk
            save_pickle(
                subregion_downloads_index,
                cd_dat("GeoFabrik-subregion-downloads-catalogue.pickle"))
            save_json(subregion_downloads_index_json,
                      cd_dat("GeoFabrik-subregion-downloads-catalogue.json"))

        except Exception as e:
            print("Failed to get the required information ... {}.".format(e))

    else:
        print("The information collection process was not activated.")
Esempio n. 9
0
def get_furlongs_data(route_name=None, weather_category=None,
                      shift_yards_same_elr=220, shift_yards_diff_elr=220,
                      update=False, verbose=False) -> pd.DataFrame:
    """
    Get furlongs data.

    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param weather_category: weather category, defaults to ``None``
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR, defaults to ``220``
    :type shift_yards_same_elr: int or float
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR != EndELR, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of furlongs for incident locations
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_furlongs_data

        weather_category     = None
        shift_yards_same_elr = 220
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        route_name = None
        furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr,
                                          shift_yards_diff_elr, update, verbose)
        print(furlongs_data)

        route_name = 'Anglia'
        furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr,
                                          shift_yards_diff_elr, update, verbose)
        print(furlongs_data)
    """

    filename = "furlongs"
    pickle_filename = make_filename(
        filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        furlongs_data = load_pickle(path_to_pickle)

    else:
        try:
            # Data of incident furlongs: both start and end identified by the same ELR
            furlongs_data_same_elr = get_furlongs_same_start_end_elrs(
                route_name=route_name, weather_category=weather_category,
                shift_yards_same_elr=shift_yards_same_elr, verbose=verbose)

            # Data of incident furlongs: start and end are identified by different ELRs
            furlongs_data_diff_elr = get_furlongs_diff_start_end_elrs(
                route_name=route_name, weather_category=weather_category,
                shift_yards_diff_elr=shift_yards_diff_elr, verbose=verbose)

            # Merge the above two data sets
            furlongs_data = furlongs_data_same_elr.append(furlongs_data_diff_elr)
            furlongs_data.drop_duplicates(['AssetNumber', 'StructuredPlantNumber'], inplace=True)
            furlongs_data.sort_index(inplace=True)

            save_pickle(furlongs_data, path_to_pickle, verbose=verbose)

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
            furlongs_data = None

    return furlongs_data
Esempio n. 10
0
def get_furlongs_diff_start_end_elrs(route_name=None, weather_category=None, shift_yards_diff_elr=220,
                                     update=False, verbose=False):
    """
    Get furlongs data for incident locations each identified by the same start and end ELRs,
    i.e. StartELR != EndELR.

    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None`` (default), all weather categories
    :type weather_category: str or None
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: furlongs data of incident locations each identified by the same start and end ELRs
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_furlongs_diff_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        furlongs_diff_start_end_elr = get_furlongs_diff_start_end_elrs(
            route_name, weather_category, shift_yards_diff_elr, update, verbose)
        print(furlongs_diff_start_end_elr)
    """

    filename = "furlongs-diff-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        furlongs_diff_start_end_elr = load_pickle(path_to_pickle)
        return furlongs_diff_start_end_elr

    else:
        adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_diff_elr,
                                                                 verbose=verbose)

        try:
            # Get furlong information
            nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)
            # Form a list containing all the furlong IDs
            furlong_ids = list(set(itertools.chain(*adj_mileages.Critical_FurlongIDs)))

            # Select critical (i.e. incident) furlongs
            furlongs_diff_start_end_elr = nr_furlong_data.loc[furlong_ids]

            # Save 'incident_furlongs_diff_start_end_elr'
            save_pickle(furlongs_diff_start_end_elr, path_to_pickle, verbose=verbose)

            return furlongs_diff_start_end_elr

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Esempio n. 11
0
def get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr,
                                              update=False, verbose=False):
    """
    Get adjusted mileages for each incident location where StartELR != EndELR.

    :param route_name: name of a Route; if ``None``, all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None``, all weather categories
    :type weather_category: str or None
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: adjusted mileages for each incident location where StartELR != EndELR
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_adjusted_mileages_diff_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_diff_elr, update, verbose)
        print(adj_mileages)
    """

    filename = "adjusted-mileages-diff-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        return load_pickle(path_to_pickle)

    else:
        try:
            # Get data for which the 'StartELR' and 'EndELR' are DIFFERENT
            incident_locations_diff_start_end_elr = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, start_and_end_elr='diff', verbose=verbose)
            # Get connecting points for different (ELRs, mileages)
            connecting_nodes = get_connecting_nodes(incident_locations_diff_start_end_elr,
                                                    route_name, update=False, verbose=False)

            # Find End Mileage and Start Mileage of StartELR and EndELR, respectively
            locations_conn = incident_locations_diff_start_end_elr.join(
                connecting_nodes.set_index(['StanoxSection'], append=True),
                on=list(connecting_nodes.index.names) + ['StanoxSection'], rsuffix='_conn').dropna()
            locations_conn.drop(columns=[x for x in locations_conn.columns if '_conn' in x],
                                inplace=True)
            # Remove the data records where connecting nodes are unknown
            locations_conn = locations_conn[~((locations_conn.StartELR_EndMileage == '') |
                                              (locations_conn.EndELR_StartMileage == ''))]
            # Convert str mileages to num
            num_conn_colnames = ['StartELR_EndMileage_num', 'EndELR_StartMileage_num',
                                 'ConnELR_StartMileage_num', 'ConnELR_EndMileage_num']
            str_conn_colnames = ['StartELR_EndMileage', 'EndELR_StartMileage',
                                 'ConnELR_StartMileage', 'ConnELR_EndMileage']
            locations_conn[num_conn_colnames] = locations_conn[str_conn_colnames].applymap(
                nr_mileage_str_to_num)

            # Get furlong information
            nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)

            adjusted_conn_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(
                    nr_furlong_data, x.ConnELR, x.ConnELR_StartMileage_num, x.ConnELR_EndMileage_num, 0)
                if x.ConnELR != '' else tuple(['', '', np.nan, np.nan, 0.0, []]),
                axis=1)
            adjusted_conn_mileages = pd.DataFrame(adjusted_conn_elr_mileages.tolist(),
                                                  index=locations_conn.index,
                                                  columns=['Conn_StartMileage_Adj',
                                                           'ConnELR_EndMileage_Adj',
                                                           'Conn_StartMileage_num_Adj',
                                                           'ConnELR_EndMileage_num_Adj',
                                                           'ConnELR_Length_Adj',  # yards
                                                           'ConnELR_Critical_FurlongIDs'])

            # Processing Start locations
            adjusted_start_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(
                    nr_furlong_data, x.StartELR, x.StartMileage_num, x.StartELR_EndMileage_num,
                    shift_yards_diff_elr),
                axis=1)

            # Create a dataframe adjusted mileage data of the Start ELRs
            adjusted_start_mileages = pd.DataFrame(adjusted_start_elr_mileages.tolist(),
                                                   index=locations_conn.index,
                                                   columns=['StartMileage_Adj',
                                                            'StartELR_EndMileage_Adj',
                                                            'StartMileage_num_Adj',
                                                            'StartELR_EndMileage_num_Adj',
                                                            'StartELR_Length_Adj',  # yards
                                                            'StartELR_Critical_FurlongIDs'])

            # Processing End locations
            adjusted_end_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(nr_furlong_data, x.EndELR, x.EndELR_StartMileage_num,
                                                   x.EndMileage_num, shift_yards_diff_elr),
                axis=1)

            # Create a dataframe of adjusted mileage data of the EndELRs
            adjusted_end_mileages = pd.DataFrame(adjusted_end_elr_mileages.tolist(),
                                                 index=locations_conn.index,
                                                 columns=['EndELR_StartMileage_Adj', 'EndMileage_Adj',
                                                          'EndELR_StartMileage_num_Adj',
                                                          'EndMileage_num_Adj',
                                                          'EndELR_Length_Adj',  # yards
                                                          'EndELR_Critical_FurlongIDs'])

            # Combine 'adjusted_start_mileages' and 'adjusted_end_mileages'
            adj_mileages = adjusted_start_mileages.join(adjusted_conn_mileages).join(
                adjusted_end_mileages)

            adj_mileages.dropna(subset=['StartMileage_num_Adj', 'EndMileage_num_Adj'], inplace=True)

            adj_mileages['Section_Length_Adj'] = list(zip(
                adj_mileages.StartELR_Length_Adj, adj_mileages.ConnELR_Length_Adj,
                adj_mileages.EndELR_Length_Adj))

            adj_mileages['Critical_FurlongIDs'] = \
                adj_mileages.StartELR_Critical_FurlongIDs + \
                adj_mileages.EndELR_Critical_FurlongIDs + \
                adj_mileages.ConnELR_Critical_FurlongIDs
            adj_mileages.Critical_FurlongIDs = adj_mileages.Critical_FurlongIDs.map(
                lambda x: list(set(x)))

            save_pickle(adj_mileages, path_to_pickle, verbose=update)

            return adj_mileages

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Esempio n. 12
0
def get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr,
                                              update=False, verbose=False):
    """
    Get adjusted mileages for each incident location where StartELR == EndELR.

    :param route_name: name of a Route; if ``None``, all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None``, all weather categories
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR
    :type shift_yards_same_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: adjusted mileages for each incident location where StartELR == EndELR
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_adjusted_mileages_same_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_same_elr = 220
        update               = True
        verbose              = True

        adj_mileages = get_adjusted_mileages_same_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_same_elr, update, verbose)
        print(adj_mileages)
    """

    filename = "adjusted-mileages-same-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        adj_mileages = load_pickle(path_to_pickle)
        return adj_mileages

    else:
        try:
            # Get data of incident locations where the 'StartELR' and 'EndELR' are THE SAME
            incident_locations = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, start_and_end_elr='same', verbose=verbose)

            # Get furlong information as reference
            ref_furlongs = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)

            # Calculate adjusted furlong locations for each incident (for vegetation conditions)
            # noinspection PyTypeChecker
            adjusted_mileages = incident_locations.apply(
                lambda x: adjust_incident_mileages(
                    ref_furlongs, x.StartELR, x.StartMileage_num, x.EndMileage_num,
                    shift_yards_same_elr),
                axis=1)

            # Get adjusted mileage data
            adj_mileages = pd.DataFrame(list(adjusted_mileages), index=incident_locations.index,
                                        columns=['StartMileage_Adj', 'EndMileage_Adj',
                                                 'StartMileage_num_Adj', 'EndMileage_num_Adj',
                                                 'Section_Length_Adj',  # yards
                                                 'Critical_FurlongIDs'])

            save_pickle(adj_mileages, path_to_pickle, verbose=verbose)

            return adj_mileages

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Esempio n. 13
0
def get_connecting_nodes(diff_start_end_elr_dat, route_name=None, update=False, verbose=False):
    """
    Get data of connecting points for different ELRs.

    :param diff_start_end_elr_dat: data frame where StartELR != EndELR
    :type diff_start_end_elr_dat: pandas.DataFrame
    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of connecting points for different ELRs
    :rtype: pandas.DataFrame

    **Test**::

        from mssqlserver.metex import view_metex_schedule8_incident_locations
        from models.prototype.furlong import get_connecting_nodes

        update = False
        verbose = True

        route_name = None
        diff_start_end_elr_dat = view_metex_schedule8_incident_locations(
            route_name=route_name, start_and_end_elr='diff', verbose=verbose)
        connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose)
        print(connecting_nodes)

        route_name = 'Anglia'
        diff_start_end_elr_dat = view_metex_schedule8_incident_locations(
            route_name=route_name, start_and_end_elr='diff', verbose=verbose)
        connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose)
        print(connecting_nodes)
    """

    filename = "connections-between-different-ELRs"
    pickle_filename = make_filename(filename, route_name)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        return load_pickle(path_to_pickle, verbose=verbose)

    else:
        try:
            pickle_filename_temp = make_filename(filename)
            path_to_pickle_temp = cdd_geodata(pickle_filename_temp)

            if os.path.isfile(path_to_pickle_temp) and not update:
                connecting_nodes_all = load_pickle(path_to_pickle_temp)
                connecting_nodes = get_subset(connecting_nodes_all, route_name)

            else:
                diff_elr_mileages = diff_start_end_elr_dat.drop_duplicates()

                em = ELRMileages()
                print("Searching for connecting ELRs ... ", end="") if verbose else ""
                mileage_file_dir = cdd_railway_codes("line data\\elrs-and-mileages\\mileages")

                # noinspection PyTypeChecker
                conn_mileages = diff_elr_mileages.apply(
                    lambda x: em.get_conn_mileages(x.StartELR, x.EndELR, update,
                                                   pickle_mileage_file=True,
                                                   data_dir=mileage_file_dir), axis=1)

                print("\nFinished.") if verbose else ""

                conn_mileages_data = pd.DataFrame(conn_mileages.to_list(), index=diff_elr_mileages.index,
                                                  columns=['StartELR_EndMileage', 'ConnELR',
                                                           'ConnELR_StartMileage',
                                                           'ConnELR_EndMileage', 'EndELR_StartMileage'])

                connecting_nodes = diff_elr_mileages.join(conn_mileages_data)
                connecting_nodes.set_index(['StartELR', 'StartMileage', 'EndELR', 'EndMileage'],
                                           inplace=True)

            save_pickle(connecting_nodes, path_to_pickle, verbose=verbose)

            return connecting_nodes

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))