Ejemplo n.º 1
0
    def download_previous_vintage(self) -> None:
        """Downloads the previous NRN vintage and extracts the English GeoPackage as <source>_old.gpkg."""

        logger.info("Retrieving previous NRN vintage.")

        # Determine download requirement.
        if self.nrn_old_path["gpkg"].exists():
            logger.warning(
                f"Previous NRN vintage already exists: \"{self.nrn_old_path['gpkg']}\". Skipping step."
            )

        else:

            # Download previous NRN vintage.
            logger.info("Downloading previous NRN vintage.")
            download_url = None

            try:

                # Get download url.
                download_url = helpers.load_yaml(
                    filepath.parents[1] /
                    "downloads.yaml")["previous_nrn_vintage"][self.source]

                # Get raw content stream from download url.
                download = helpers.get_url(download_url,
                                           stream=True,
                                           timeout=30,
                                           verify=True)

                # Copy download content to file.
                with open(self.nrn_old_path["zip"], "wb") as f:
                    shutil.copyfileobj(download.raw, f)

            except (requests.exceptions.RequestException, shutil.Error) as e:
                logger.exception(
                    f"Unable to download previous NRN vintage: \"{download_url}\"."
                )
                logger.exception(e)
                sys.exit(1)

            # Extract zipped data.
            logger.info("Extracting zipped data for previous NRN vintage.")

            gpkg_download = [
                f for f in zipfile.ZipFile(self.nrn_old_path["zip"],
                                           "r").namelist()
                if f.lower().startswith("nrn") and Path(f).suffix == ".gpkg"
            ][0]

            with zipfile.ZipFile(self.nrn_old_path["zip"], "r") as zip_f:
                with zip_f.open(gpkg_download) as zsrc, open(
                        self.nrn_old_path["gpkg"], "wb") as zdest:
                    shutil.copyfileobj(zsrc, zdest)

            # Remove temporary files.
            logger.info("Removing temporary files for previous NRN vintage.")

            if self.nrn_old_path["zip"].exists():
                self.nrn_old_path["zip"].unlink()
Ejemplo n.º 2
0
    def configure_release_version(self) -> None:
        """Configures the major and minor release versions for the current NRN vintage."""

        logger.info("Configuring NRN release version.")

        # Extract the version number and release year for current source from the release notes.
        release_year = None
        release_notes_path = filepath.parent / "distribution_docs/release_notes.yaml"
        release_notes = helpers.load_yaml(release_notes_path)

        try:

            # Extract previous release version and date.
            version, release_date = itemgetter("edition", "release_date")(release_notes[self.source])

            # Standardize raw variables.
            self.major_version, self.minor_version = map(int, str(version).split("."))
            release_year = int(str(release_date)[:4])

            # Configure new release version.
            if release_year == datetime.now().year:
                self.minor_version += 1
            else:
                self.major_version += 1
                self.minor_version = 0

        except (IndexError, ValueError) as e:
            logger.exception(f"Unable to extract version number and / or release date from \"{release_notes}\".")
            logger.exception(e)
            sys.exit(1)

        logger.info(f"Configured NRN release version: {self.major_version}.{self.minor_version}")
Ejemplo n.º 3
0
    def __init__(self, article):
        data = helpers.load_yaml(article)
        self.title = helpers.read_key(data[0], 'title')
        self.date = helpers.read_key(data[0], 'date')
        self.is_publish = helpers.read_key(data[0], 'publish')

        self.name = str.lower(os.path.splitext(article)[0])

        markdown = helpers.read_key(data[1], 'markdown')
        self.text = m(markdown,
                      output_format='html5',
                      extensions=['markdown.extensions.sane_lists'])
Ejemplo n.º 4
0
    def compile_source_attributes(self) -> None:
        """Compiles the yaml files in the sources' directory into a dictionary."""

        logger.info("Compiling source attribute yamls.")
        self.source_attributes = dict()

        # Iterate source yamls.
        for f in filter(Path.is_file,
                        Path(self.source_attribute_path).glob("*.yaml")):

            # Load yaml and store contents.
            self.source_attributes[f.stem] = helpers.load_yaml(f)
Ejemplo n.º 5
0
    def compile_source_attributes(self):
        """Compiles the yaml files in the sources' directory into a dictionary."""

        logger.info("Identifying source attribute files.")
        files = [os.path.join(self.source_attribute_path, f) for f in os.listdir(self.source_attribute_path) if
                 f.endswith(".yaml")]

        logger.info("Compiling source attribute yamls.")
        self.source_attributes = dict()

        for f in files:
            # Load yaml and store contents.
            self.source_attributes[os.path.splitext(os.path.basename(f))[0]] = helpers.load_yaml(f)
 def update_config(self):
     config = helpers.load_yaml(self._yaml_path)
     radius = float(config["cell_detector"]["radius"])
     tolerance = float(config["cell_detector"]["tolerance"])
     pixels_per_micrometer = float(
         config["cell_detector"]["pixels_per_micrometer"])
     image_height = config["images"]["height"]
     image_width = config["images"]["width"]
     image_depth = config["images"]["depth"]
     self._cell_shape = (image_height, image_width, image_depth)
     self._min_rad = int((radius - tolerance) * pixels_per_micrometer)
     self._max_rad = int((radius + tolerance) * pixels_per_micrometer)
     if config["cell_detector"]["minimum_distance"]:
         self._min_dist = int(config["minimum_distance"])
     else:
         self._min_dist = self._min_rad
 def cells_to_array(self, cells):
     """
     :param cells:
     :return:
     """
     image = helpers.load_yaml(self._yaml_path)["images"]
     image_shape = (image["height"], image["width"], image["depth"])
     x = np.empty(
         (len(cells), image_shape[0], image_shape[1], image_shape[2]),
         dtype=np.float32)
     for row, cell in zip(*(x, cells)):
         cell_image = cell.get_image(dy=image_shape[0], dx=image_shape[1])
         width, height, depth = cell_image.shape
         if cell_image.shape != (80, 80, 3):
             print("cell wrong size!!")
         row[0:width, 0:height, 0:depth] = cell_image
     return x
Ejemplo n.º 8
0
 def run(self):
     index_files = glob(self._index_location + "/*.yml")
     for index_file in index_files:
         if "index_template" not in index_file:
             data = load_yaml(index_file)
             if "Projects" not in data:
                 raise Exception("Invalid index file")
             for entry in data["Projects"]:
                 title, section, container_name, target_file_url = self._get_wiki_info(entry)
                 if section not in self._projects_data:
                     self._projects_data[str(section)] = {
                         "title": title,
                         "items": []
                     }
                 self._projects_data[str(section)]["items"].append({
                     "name": container_name,
                     "url": target_file_url
                 })
     self._generate_wiki_content()
Ejemplo n.º 9
0
def build():
    config_file = CONFIG_FILE
    helpers.check_file(config_file)
    config_yaml = helpers.load_yaml(config_file)[0]

    selected_theme = helpers.read_key(config_yaml, 'theme')
    blog_name = helpers.read_key(config_yaml, 'name')
    description = helpers.read_key(config_yaml, 'description')
    language = helpers.read_key(config_yaml, 'language')

    builder = Builder(theme=selected_theme,
                      name=blog_name,
                      description=description,
                      lang=language)
    helpers.chdir_to_articles()

    for article in os.listdir('.'):
        if os.path.isfile(article) and not article.startswith('.'):
            builder.build_article(article)
    builder.build_overview()
    def diagnose(self):
        sample_paths = helpers.load_yaml(
            self._yaml_path)["diagnose"]["samples"]
        samples = self.load_samples(sample_paths)
        for sample in samples:
            print("Diagnosing sample " + sample.get_id() + "...")
            for image in sample.get_images():
                if image.get_id() == "01":
                    self._cell_detector.run(image)
                    x = self.cells_to_array(image.get_cells())
                    predictions = self._neural_network.predict(x)
                    for prediction, cell in zip(*(predictions,
                                                  image.get_cells())):
                        cell.set_prediction(np.argmax(prediction))
                        cell.set_confidence(np.max(prediction))

                    directory = "../results/cell_detector4/" + sample.get_id()
                    if not os.path.isdir(directory):
                        os.makedirs(directory)
                    cv2.imwrite(directory + "/" + image.get_name() + ".jpg",
                                image.draw_cells())
Ejemplo n.º 11
0
 def run(self):
     index_files = glob(self._index_location + "/*.yml")
     for index_file in index_files:
         if "index_template" not in index_file:
             data = load_yaml(index_file)
             if "Projects" not in data:
                 raise Exception("Invalid index file")
             for entry in data["Projects"]:
                 title, section, container_name, target_file_url = self._get_wiki_info(
                     entry)
                 if section not in self._projects_data:
                     self._projects_data[str(section)] = {
                         "title": title,
                         "items": []
                     }
                 self._projects_data[str(section)]["items"].append({
                     "name":
                     container_name,
                     "url":
                     target_file_url
                 })
     self._generate_wiki_content()
Ejemplo n.º 12
0
    def compile_target_attributes(self):
        """Compiles the target (distribution format) yaml file into a dictionary."""

        logger.info("Compiling target attribute yaml.")
        self.target_attributes = dict()

        # Load yaml.
        target_attributes_yaml = helpers.load_yaml(os.path.abspath("../distribution_format.yaml"))

        # Store yaml contents for all contained table names.
        logger.info("Compiling attributes for target tables.")

        for table in target_attributes_yaml:
            self.target_attributes[table] = {"spatial": target_attributes_yaml[table]["spatial"], "fields": dict()}

            for field, vals in target_attributes_yaml[table]["fields"].items():
                # Compile field attributes.
                try:
                    self.target_attributes[table]["fields"][field] = str(vals[0])
                except (AttributeError, KeyError, ValueError):
                    logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field))
                    sys.exit(1)
def fit_or_load_models(settings, data_name, seed, data, fit):
    """ either fits and saves or loads all models """
    device = data.tensors[0].device

    # get model class
    model = model_class_mapper[data_name]

    # load training hyperparameters
    training_config = load_yaml('configs/training/{}.yaml'.format(data_name))

    # not considering until success attack settings
    _ = settings.pop('lots-until-success')
    _ = settings.pop('pgd-until-success')

    # fit or load models
    models, losses = dict(), dict()

    for attack_name, attack_config in settings.items():
        if fit:
            print('fitting {} model using {}'.format(data_name, attack_name))
            models[attack_name], losses[attack_name] = fit_and_save(
                name=attack_name,
                model_class=model,
                seed=seed,
                training_config=dict(training_config),
                adversarial_config=dict(attack_config),
                training_data=data,
                data_name=data_name)
        else:
            models[attack_name] = model(seed).to(device)
            model_path = 'results/{}/models/{}_classifier.pt'.format(
                data_name, attack_name)
            _ = models[attack_name].load_state_dict(torch.load(model_path))
            losses_path = 'results/{}/losses/{}_losses.pt'.format(
                data_name, attack_name)
            losses[attack_name] = torch.load(losses_path)
        _ = models[attack_name].eval()

    return models, losses
 def update_config(self):
     config = helpers.load_yaml(self._yaml_path)
     self._kernel_size = [(value, value) for value in config["neural_network"]["kernel_size"]]
     self._epochs = config["neural_network"]["epochs"]
     self._train_batches = config["train"]["batches"]
     self._evaluate_batches = config["evaluate"]["batches"]
     self._batch_size = config["neural_network"]["batch_size"]
     self._drop_prob = config["neural_network"]["drop_prob"]
     self._conv_depth = config["neural_network"]["conv_depth"]
     self._hidden_size = config["neural_network"]["hidden_size"]
     self._learn_rate = config["neural_network"]["learn_rate"]
     self._activation = config["neural_network"]["activation"]
     self._augment = config["neural_network"]["augment"]
     self._threshold = config["evaluate"]["threshold"]
     self._channel_shift = config["neural_network"]["channel_shift"]
     self._init = config["neural_network"]["init"]
     self._pool_size = [(value, value) for value in config["neural_network"]["pool_size"]]
     self._train_data = "../" + config["train"]["data"]
     self._evaluate_data = config["evaluate"]["data"]
     self._classes = config["classes"]
     self._image_shape = (config["images"]["height"],
                          config["images"]["width"],
                          config["images"]["depth"])
     self.reset_history()
Ejemplo n.º 15
0
    def compile_target_attributes(self) -> None:
        """Compiles the yaml file for the target (Geo)DataFrames (distribution format) into a dictionary."""

        logger.info("Compiling target attribute yaml.")
        table = field = None

        # Load yaml.
        self.target_attributes = helpers.load_yaml(filepath.parents[1] /
                                                   "distribution_format.yaml")

        # Remove field length from dtype attribute.
        logger.info("Configuring target attributes.")
        try:

            for table in self.target_attributes:
                for field, vals in self.target_attributes[table][
                        "fields"].items():
                    self.target_attributes[table]["fields"][field] = vals[0]

        except (AttributeError, KeyError, ValueError):
            logger.exception(
                f"Invalid schema definition for table: {table}, field: {field}."
            )
            sys.exit(1)
Ejemplo n.º 16
0
def show_statistics():
    articles = 0
    drafts = 0
    word_count_total = 0

    helpers.chdir_to_articles()

    for article in os.listdir('.'):
        if os.path.isfile(article) and not article.startswith('.'):
            article_yaml = helpers.load_yaml(article)
            is_publish = helpers.read_key(article_yaml[0], 'publish')
            markdown = helpers.read_key(article_yaml[1], 'markdown')

            if not is_publish:
                drafts = drafts + 1
            articles = articles + 1

            word_count = len(markdown.split())
            word_count_total += word_count

    print('{} article(s): {} to publish, {} draft(s)'.format(
        str(articles), str(articles - drafts), str(drafts)))
    print('{} word(s) total, {} word(s) average'.format(
        str(word_count_total), str(round(word_count_total / articles))))
Ejemplo n.º 17
0
import os
from buildbot.plugins import util, steps, schedulers

import helpers
import gitpoller

config = helpers.load_yaml('services_config.yaml')

# Increment this if you wipe the DB to prevent reusing build version numbers.
DATABASE_VERSION = 1

SOURCE_GIT_URL = 'https://github.com'
POLL_INTERVAL_SECONDS = 600

# Deploy any branches that have any associated realm
BRANCH_TO_REALM_MAPPING = config['branch_to_realm_mapping']
DEPLOY_BRANCHES = list(BRANCH_TO_REALM_MAPPING)

# The Docker Hub registry doesn't need a hostname
REGISTRY = 'klaital'

# List of workers that can concurrently build
WORKERNAMES = [
    "klaital-standardservice-worker",
]

SERVICES = config['services']

for ms in SERVICES:
    SERVICES[ms]['poll_branches'] = DEPLOY_BRANCHES
                              f'NREL-1p7-103-step{istep-1}.yaml')
fname_modeling_options = os.path.join(run_dir,
                                      f'modeling_options.{istep}.yaml')
fname_analysis_options = os.path.join(run_dir,
                                      f'analysis_options.{istep}.yaml')

if MPI:
    rank = MPI.COMM_WORLD.Get_rank()
else:
    rank = 0

if rank == 0:
    print('STEP', istep)

    ## Update analysis options
    aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml'))
    aopt['general']['folder_output'] = f'outputs.{istep}'
    aopt['general']['fname_output'] = f'NREL-1p7-103-step{istep}'

    # - blade-mass opt constrained by tip deflection
    aopt['driver']['optimization']['flag'] = True
    aopt['design_variables']['blade']['structure']['spar_cap_ss'][
        'flag'] = True
    aopt['design_variables']['blade']['structure']['spar_cap_ps'][
        'flag'] = True
    aopt['constraints']['blade']['tip_deflection']['flag'] = True
    aopt['merit_figure'] = 'blade_mass'
    save_yaml(fname_analysis_options, aopt)

    ## Update modeling options
    mopt = load_yaml(
Ejemplo n.º 19
0
                              f'NREL-2p5-116-step{istep-1}.yaml')
fname_modeling_options = os.path.join(run_dir,
                                      f'modeling_options.{istep}.yaml')
fname_analysis_options = os.path.join(run_dir,
                                      f'analysis_options.{istep}.yaml')

if MPI:
    rank = MPI.COMM_WORLD.Get_rank()
else:
    rank = 0

if rank == 0:
    print('STEP', istep)

    ## Update analysis options
    aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml'))
    aopt['general']['folder_output'] = f'outputs.{istep}'
    aopt['general']['fname_output'] = f'NREL-2p5-116-step{istep}'

    # - constrained structural opt for tower mass
    aopt['driver']['optimization']['flag'] = True
    #aopt['driver']['optimization']['tol'] = 1e-6
    #aopt['driver']['optimization']['max_iter'] = 50
    aopt['design_variables']['tower']['layer_thickness']['flag'] = True
    aopt['design_variables']['tower']['outer_diameter']['flag'] = True
    aopt['design_variables']['tower']['outer_diameter']['upper_bound'] = 4.0
    aopt['constraints']['tower']['stress']['flag'] = True
    aopt['constraints']['tower']['global_buckling']['flag'] = True
    aopt['constraints']['tower']['shell_buckling']['flag'] = True
    aopt['merit_figure'] = 'tower_mass'
    save_yaml(fname_analysis_options, aopt)
Ejemplo n.º 20
0
import datetime
from flask import Flask, jsonify, redirect, render_template, url_for
from flask_moment import Moment
from helpers import (build_schedule, elapsed_percent, ensure_logs_dir,
                     human_readable_time, load_yaml, schedule_expired,
                     setup_logging)
from vlc_client.vlc_client import VLCClient

app = Flask(__name__)
moment = Moment(app)
config = load_yaml('config.yaml')

ensure_logs_dir(config['LOGGING']['path'])
log_file = setup_logging(config['LOGGING']['path'])
app.logger.addHandler(log_file)

vlc = VLCClient(config['VLC'])
SCHEDULE = {"exp": datetime.datetime.now(), "playlist": {}, "current": {}}


@app.route('/')
def index():
    # TODO 6.26.2020: Throw a 404 if vlc is not running instead of breaking app

    # rebuild the schedule if the cache has expired
    if schedule_expired(SCHEDULE):
        try:
            current = vlc.get_status()
            playlist = vlc.get_playlist()
            SCHEDULE.clear()
            SCHEDULE.update(
run_dir = './'
fname_wt_input = os.path.join(run_dir, 'NREL-2p5-116.start.yaml')
fname_modeling_options = os.path.join(run_dir, 'modeling_options_wisdem.yaml')
fname_analysis_options = os.path.join(run_dir,
                                      f'analysis_options.{istep}.yaml')

if MPI:
    rank = MPI.COMM_WORLD.Get_rank()
else:
    rank = 0

if rank == 0:
    print('STEP', istep)

    ## Update analysis options
    aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml'))
    aopt['general']['folder_output'] = f'outputs.{istep}'
    aopt['general']['fname_output'] = f'NREL-2p5-116-step{istep}'
    save_yaml(fname_analysis_options, aopt)

tt = time.time()

# step 1: manually updated turbine rating, rotor size -- NO OPT
# - rated_power: 2500000.0 W
# - rotor_diameter: 116 m
# - hub_height: 80 m
wt_opt, modeling_options, opt_options = run_wisdem(fname_wt_input,
                                                   fname_modeling_options,
                                                   fname_analysis_options)

if rank == 0:
Ejemplo n.º 22
0
def main(gpkg_in, gpkg_out, layer_in, layer_out):

    # command line system arguments
    gpkg_in = (sys.argv[1])
    gpkg_out = (sys.argv[2])
    layer_in = (sys.argv[3])
    layer_out = (sys.argv[4])

    # database name which will be used for stage 2
    nrn_db = "nrn"

    # load sql yaml file
    sql_load = helpers.load_yaml("sql/sql.yaml")

    # default postgres connection needed to create the nrn database
    conn = connect(
        dbname="postgres",
        user="******",
        host="localhost",
        password="******"
    )

    # postgres database url for geoprocessing
    nrn_url = URL(
        drivername='postgresql+psycopg2', host='localhost',
        database=nrn_db, username='******',
        port='5432', password='******'
    )

    # engine to connect to nrn database
    engine = create_engine(nrn_url)

    # get the isolation level for autocommit
    autocommit = extensions.ISOLATION_LEVEL_AUTOCOMMIT

    # set the isolation level for the connection's cursors
    # will raise ActiveSqlTransaction exception otherwise
    conn.set_isolation_level(autocommit)

    # connect to default connection
    cursor = conn.cursor()

    # drop the nrn database if it exists, then create it if not
    try:
        logger.info("Dropping PostgreSQL database.")
        cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {};").format(sql.Identifier(nrn_db)))
    except Exception:
        logger.exception("Could not drop database.")

    try:
        logger.info("Creating PostgreSQL database.")
        cursor.execute(sql.SQL("CREATE DATABASE {};").format(sql.Identifier(nrn_db)))
    except Exception:
        logger.exception("Failed to create PostgreSQL database.")

    logger.info("Closing default PostgreSQL connection.")
    cursor.close()
    conn.close()

    # connection parameters for newly created database
    nrn_conn = connect(
        dbname=nrn_db,
        user="******",
        host="localhost",
        password="******"
    )

    nrn_conn.set_isolation_level(autocommit)

    # connect to nrn database
    nrn_cursor = nrn_conn.cursor()
    try:
        logger.info("Creating spatially enabled PostgreSQL database.")
        nrn_cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS postgis;"))
    except Exception:
        logger.exception("Cannot create PostGIS extension.")

    try:
        logger.info("Creating grid function.")
        nrn_cursor.execute(sql.SQL(sql_load["hex_grid"]["function"]))
    except Exception:
        logger.exception("Cannot create PostGIS function.")

    logger.info("Closing NRN PostgreSQL connection.")
    nrn_cursor.close()
    nrn_conn.close()

    # incoming NRN gpkg
    logger.info("Reading incoming GeoPackage.")
    gdf = gpd.read_file(gpkg_in, layer=layer_in)

    # reproject to epsg:3348
    logger.info("Reprojecting to EPSG:3348.")
    gdf = gdf.to_crs({'init': 'epsg:3348'})

    # calculate years since revision using current year (startTime.year) and "REVDATE"
    logger.info("Calculating years since last revision using current year")
    gdf["SINREV"] = startTime.year - gdf["REVDATE"].str[:4].astype("int64")

    # create representative point for each line segment
    logger.info("Generating representative point for each line segment.")
    gdf["geometry"] = gdf.geometry.representative_point()

    logger.info("Extracting total bounds.")
    minx, miny, maxx, maxy = gdf.geometry.total_bounds

    logger.info("Importing GeoDataFrame into PostGIS.")
    gdf.postgis.to_postgis(con=engine, table_name="reprept", geometry='POINT', if_exists='replace')

    logger.info("Generating hex grid based on total bounds.")
    hex_grid_query = sql_load["gen_hex_grid"]["query"].format(minx, miny, maxx, maxy)

    logger.info("Generating hex grid.")
    grid = gpd.GeoDataFrame.from_postgis(hex_grid_query, engine, geom_col="geom")

    logger.info("Aggregating mean years since last revision over hex grid.")
    aggregate = sql_load["aggregate"]["query"]

    logger.info("Extracting aggregations from PostGIS.")
    gdf = gpd.GeoDataFrame.from_postgis(aggregate, engine, geom_col="geom")

    # overwrite the incoming geopackage
    logger.info("Writing final GeoPackage layer.")
    gdf.to_file(gpkg_out, layer=layer_out, driver="GPKG")
Ejemplo n.º 23
0
    def export_data(self) -> None:
        """Exports and packages all data."""

        logger.info("Exporting output data.")

        # Configure export progress bar.
        file_count = 0
        for lang, dfs in self.dframes.items():
            for frmt in self.formats:
                count = len(set(dfs).intersection(set(self.distribution_formats[lang][frmt]["conform"])))
                file_count += (len(self.kml_groups[lang]) * count) if frmt == "kml" else count
        export_progress = trange(file_count, desc="Exporting data", bar_format=self.bar_format)

        # Iterate export formats and languages.
        for lang, dfs in self.dframes.items():
            for frmt in self.formats:

                # Retrieve export specifications.
                export_specs = self.distribution_formats[lang][frmt]

                # Filter required dataframes.
                dframes = {name: df.copy(deep=True) for name, df in dfs.items() if name in export_specs["conform"]}

                # Configure export directory.
                export_dir, export_file = itemgetter("dir", "file")(export_specs["data"])
                export_dir = self.output_path / self.format_path(export_dir) / self.format_path(export_file)

                # Configure mapped layer names.
                nln_map = {table: self.format_path(export_specs["conform"][table]["name"]) for table in dframes}

                # Configure export kwargs.
                kwargs = {
                    "driver": {"gml": "GML", "gpkg": "GPKG", "kml": "KML", "shp": "ESRI Shapefile"}[frmt],
                    "type_schemas": helpers.load_yaml(filepath.parents[1] / "distribution_format.yaml"),
                    "export_schemas": export_specs,
                    "nln_map": nln_map,
                    "keep_uuid": False,
                    "outer_pbar": export_progress,
                    "epsg": 4617,
                    "geom_type": {table: df.geom_type.iloc[0] for table, df in dframes.items() if "geometry" in
                                  df.columns}
                }

                # Configure KML.
                if frmt == "kml":

                    # Configure export names.
                    self.kml_groups[lang]["name"] = self.kml_groups[lang]["name"].map(
                        lambda name: str(export_dir).replace("<name>", name))

                    # Iterate export datasets.
                    for table, df in dframes.items():

                        # Map dataframe queries (more efficient than iteratively querying).
                        self.kml_groups[lang]["df"] = self.kml_groups[lang]["query"].map(
                            lambda query: df.query(query).copy(deep=True))

                        # Iterate KML groups.
                        for kml_group in self.kml_groups[lang].itertuples(index=False):

                            # Export data.
                            kml_name, kml_df = attrgetter("name", "df")(kml_group)
                            helpers.export({table: kml_df}, kml_name, **kwargs)

                # Configure non-KML.
                else:
                    # Export data.
                    helpers.export(dframes, export_dir, **kwargs)

        # Close progress bar.
        export_progress.close()
Ejemplo n.º 24
0
    def update_distribution_docs(self) -> None:
        """
        Writes updated documentation to data/processed for:
            - completion rates
            - release notes
        """

        def write_documents(data: dict, filename: str) -> None:
            """
            Updates a document template with a dictionary and exports:
                1) an rst file representing the updated template.
                2) a yaml file containing the updated dictionary.

            :param dict data: dictionary of values used to populate the document template.
            :param str filename: basename of a document in ../distribution_docs to be updated.
            """

            # Configure source and destination paths.
            src = filepath.parent / f"distribution_docs/{filename}.rst"
            dst = self.output_path / filename

            try:

                # Load document as jinja template.
                with open(src, "r") as doc:
                    template = jinja2.Template(doc.read())

                # Update template.
                updated_doc = template.render(data)

            except (jinja2.TemplateError, jinja2.TemplateAssertionError, jinja2.UndefinedError) as e:
                logger.exception(f"Unable to render updated Jinja2.Template for: {src}.")
                logger.exception(e)
                sys.exit(1)

            # Export updated document.
            try:

                # Write rst.
                with open(dst.with_suffix(".rst"), "w") as doc:
                    doc.write(updated_doc)

                # Write yaml.
                with open(dst.with_suffix(".yaml"), "w") as doc:
                    yaml.dump(data, doc)

            except (ValueError, yaml.YAMLError) as e:
                logger.exception(f"Unable to write document: {dst}.")
                logger.exception(e)
                sys.exit(1)

        # Update release notes.
        logger.info(f"Updating documentation: release notes.")

        # Compile previous data.
        data = helpers.load_yaml(filepath.parent / "distribution_docs/release_notes.yaml")

        # Update release notes - edition, release date, validity date.
        data[self.source]["edition"] = f"{self.major_version}.{self.minor_version}"
        data[self.source]["release_date"] = datetime.now().strftime("%Y-%m")
        data[self.source]["validity_date"] = datetime.now().strftime("%Y-%m")

        # Update release notes - number of kilometers.
        # Note: EPSG:3348 used to get geometry lengths in meters.
        kms = int(round(self.dframes["en"]["roadseg"].to_crs("EPSG:3348").length.sum() / 1000, 0))
        data[self.source]["number_of_kilometers"] = f"{kms:,d}"

        # Write updated documents.
        write_documents(data, "release_notes")

        # Update completion rates.
        logger.info(f"Updating documentation: completion rates.")

        # Compile previous data.
        data = helpers.load_yaml(filepath.parent / "distribution_docs/completion_rates.yaml")

        # Update completion rates.

        # Iterate dataframe and column names.
        for table, df in self.dframes["en"].items():
            for col in data[table]:

                # Configure column completion rate.
                # Note: Values between 0 and 1 are rounded to 1, values between 99 and 100 are rounded to 99.
                completion_rate = (len(df.loc[~df[col].isin({"Unknown", -1})]) / len(df)) * 100
                if 0 < completion_rate < 1:
                    completion_rate = 1
                if 99 < completion_rate < 100:
                    completion_rate = 99

                # Update column value for source.
                data[table][col][self.source] = int(completion_rate)

                # Update column average.
                vals = itemgetter(*set(data[table][col]) - {"avg"})(data[table][col])
                data[table][col]["avg"] = int(round(sum(map(int, vals)) / len(vals), 0))

        # Write updated documents.
        write_documents(data, "completion_rates")
Ejemplo n.º 25
0
def main(osm_in, nrn_in, out, out_layer):

    # command line system arguments
    osm_in = (sys.argv[1])
    nrn_in = (sys.argv[2])
    out = (sys.argv[3])
    out_layer = (sys.argv[4])

    # database name which will be used for stage 2
    nrn_db = "nrn"

    # load sql yaml file
    sql_load = helpers.load_yaml("sql/sql.yaml")

    # default postgres connection needed to create the nrn database
    conn = connect(
        dbname="postgres",
        user="******",
        host="localhost",
        password="******"
    )

    # postgres database url for geoprocessing
    nrn_url = URL(
        drivername='postgresql+psycopg2', host='localhost',
        database=nrn_db, username='******',
        port='5432', password='******'
    )

    # engine to connect to nrn database
    engine = create_engine(nrn_url)

    # get the isolation level for autocommit
    autocommit = extensions.ISOLATION_LEVEL_AUTOCOMMIT

    # set the isolation level for the connection's cursors
    # will raise ActiveSqlTransaction exception otherwise
    conn.set_isolation_level(autocommit)

    # connect to default connection
    cursor = conn.cursor()

    # drop the nrn database if it exists, then create it if not
    try:
        logger.info("Dropping PostgreSQL database.")
        cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {};").format(sql.Identifier(nrn_db)))
    except Exception:
        logger.exception("Could not drop database.")

    try:
        logger.info("Creating PostgreSQL database.")
        cursor.execute(sql.SQL("CREATE DATABASE {};").format(sql.Identifier(nrn_db)))
    except Exception:
        logger.exception("Failed to create PostgreSQL database.")

    logger.info("Closing default PostgreSQL connection.")
    cursor.close()
    conn.close()

    # connection parameters for newly created database
    nrn_conn = connect(
        dbname=nrn_db,
        user="******",
        host="localhost",
        password="******"
    )

    nrn_conn.set_isolation_level(autocommit)

    # connect to nrn database
    nrn_cursor = nrn_conn.cursor()
    try:
        logger.info("Creating spatially enabled PostgreSQL database.")
        nrn_cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS postgis;"))
    except Exception:
        logger.exception("Cannot create PostGIS extension.")

    try:
        logger.info("Creating grid function.")
        nrn_cursor.execute(sql.SQL(sql_load["hex_grid"]["function"]))
    except Exception:
        logger.exception("Cannot create PostGIS function.")

    logger.info("Closing NRN PostgreSQL connection.")
    nrn_cursor.close()
    nrn_conn.close()

    # Download provincial boundaries from STC website.
    logger.info("Downloading provincial boundaries.")
    pr_url = "http://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/files-fichiers/2016/lpr_000b16a_e.zip"
    urllib.request.urlretrieve(pr_url, '../data/interim/pr.zip')
    with zipfile.ZipFile("../data/interim/pr.zip", "r") as zip_ref:
        zip_ref.extractall("../data/interim/pr")

    logger.info("Reading incoming provincial boundaries.")
    pr = gpd.read_file("../data/interim/pr/lpr_000b16a_e.shp")

    # Assign geodataframe total bounds to min and max XY.
    minx, miny, maxx, maxy = pr.geometry.total_bounds

    # Assign list to variable extent.
    extent = [minx, maxx, miny, maxy]

    # Convert extent to string and separate by comma.
    extent = ','.join(map(str, extent))

    # Creates a hexagon grid using the provincial boundary extent and QGIS Processing.
    logger.info("Generating grid.")
    qgis_processing.gen_grid(extent)

    # Incoming OSM data
    logger.info("Reading incoming OSM data.")
    osm = gpd.read_file(osm_in)

    # Incoming NRN GPKG
    logger.info("Reading incoming GPKG.")
    gdf = gpd.read_file(nrn_in)

    logger.info("Reading incoming grid.")
    grid = gpd.read_file("../data/interim/output.gpkg", driver="GPKG")
    grid.crs = {"init": "epsg:3348"}

    logger.info("Importing GeoDataFrame into PostGIS.")
    gdf.postgis.to_postgis(con=engine, table_name="nrn", geometry='LineString', if_exists='replace')

    logger.info("Importing OSM data into PostGIS.")
    osm.postgis.to_postgis(con=engine, table_name="osm", geometry='LineString', if_exists='replace')

    logger.info("Importing grid data into PostGIS.")
    grid.postgis.to_postgis(con=engine, table_name="hex_grid", geometry='Polygon', if_exists='replace')

    logger.info("Comparing NRN and OSM road network length.")
    length = sql_load["length"]["query"]

    logger.info("Extracting final from PostGIS.")
    gdf = gpd.GeoDataFrame.from_postgis(length, engine, geom_col="geom")
    gdf.crs = {"init": "epsg:3348"}

    logger.info("Writing output GeoPackage.")
    gdf.to_file(out, layer=out_layer, driver="GPKG")
Ejemplo n.º 26
0
    def compile_domains(self):
        """Compiles field domains for the target dataframes."""

        logging.info("Compiling field domains.")
        self.domains = dict()

        for suffix in ("en", "fr"):

            # Load yaml.
            logger.info("Loading \"{}\" field domains yaml.".format(suffix))
            domains_yaml = helpers.load_yaml(os.path.abspath("../field_domains_{}.yaml".format(suffix)))

            # Compile domain values.
            logger.info("Compiling \"{}\" domain values.".format(suffix))

            for table in domains_yaml:
                # Register table.
                if table not in self.domains.keys():
                    self.domains[table] = dict()

                for field, vals in domains_yaml[table].items():
                    # Register field.
                    if field not in self.domains[table].keys():
                        self.domains[table][field] = {"values": list(), "all": None}

                    try:

                        # Configure reference domain.
                        while isinstance(vals, str):
                            if vals.find(";") > 0:
                                table_ref, field_ref = vals.split(";")
                            else:
                                table_ref, field_ref = table, vals
                            vals = domains_yaml[table_ref][field_ref]

                        # Compile domain values.
                        if vals is None:
                            self.domains[table][field]["values"] = self.domains[table][field]["all"] = None
                            continue

                        elif isinstance(vals, dict):
                            self.domains[table][field]["values"].extend(vals.values())
                            if self.domains[table][field]["all"] is None:
                                self.domains[table][field]["all"] = vals
                            else:
                                self.domains[table][field]["all"] = \
                                    {k: [v, vals[k]] for k, v in self.domains[table][field]["all"].items()}

                        elif isinstance(vals, list):
                            self.domains[table][field]["values"].extend(vals)
                            if self.domains[table][field]["all"] is None:
                                self.domains[table][field]["all"] = vals
                            else:
                                self.domains[table][field]["all"] = list(zip(self.domains[table][field]["all"], vals))

                        else:
                            logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field))
                            sys.exit(1)

                    except (AttributeError, KeyError, ValueError):
                        logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field))
                        sys.exit(1)

        logging.info("Identifying field domain functions.")
        self.domains_funcs = list()

        # Identify functions from field_map_functions.
        for func in [f for f in getmembers(field_map_functions) if isfunction(f[1])]:
            if "domain" in func[1].__code__.co_varnames:
                self.domains_funcs.append(func[0])
Ejemplo n.º 27
0
    def __init__(self, source: str, remove: bool = False) -> None:
        """
        Initializes an NRN stage.

        :param str source: abbreviation for the source province / territory.
        :param bool remove: removes pre-existing files within the data/processed directory for the specified source,
            excluding change logs, default False.
        """

        self.stage = 5
        self.source = source.lower()
        self.remove = remove
        self.major_version = None
        self.minor_version = None

        # Configure and validate input data path.
        self.data_path = filepath.parents[2] / f"data/interim/{self.source}.gpkg"
        if not self.data_path.exists():
            logger.exception(f"Input data not found: {self.data_path}.")
            sys.exit(1)

        # Configure output path.
        self.output_path = filepath.parents[2] / f"data/processed/{self.source}"

        # Conditionally clear output namespace.
        namespace = list(filter(lambda f: f.stem != f"{self.source}_change_logs", self.output_path.glob("*")))

        if len(namespace):
            logger.warning("Output namespace already occupied.")

            if self.remove:
                logger.warning("Parameter remove=True: Removing conflicting files.")

                for f in namespace:
                    logger.info(f"Removing conflicting file: \"{f}\".")

                    if f.is_file():
                        f.unlink()
                    else:
                        helpers.rm_tree(f)

            else:
                logger.exception("Parameter remove=False: Unable to proceed while output namespace is occupied. Set "
                                 "remove=True (-r) or manually clear the output namespace.")
                sys.exit(1)

        # Configure field defaults and domains.
        self.defaults = {lang: helpers.compile_default_values(lang=lang) for lang in ("en", "fr")}
        self.domains = helpers.compile_domains(mapped_lang="fr")

        # Configure export formats.
        distribution_formats_path = filepath.parent / "distribution_formats"
        self.formats = [f.stem for f in (distribution_formats_path / "en").glob("*")]
        self.distribution_formats = {
            "en": {frmt: helpers.load_yaml(distribution_formats_path / f"en/{frmt}.yaml") for frmt in self.formats},
            "fr": {frmt: helpers.load_yaml(distribution_formats_path / f"fr/{frmt}.yaml") for frmt in self.formats}
        }

        # Define custom progress bar format.
        # Note: the only change from default is moving the percentage to the right end of the progress bar.
        self.bar_format = "{desc}: |{bar}| {percentage:3.0f}% {r_bar}"

        # Load data.
        self.dframes = helpers.load_gpkg(self.data_path)