def download_previous_vintage(self) -> None: """Downloads the previous NRN vintage and extracts the English GeoPackage as <source>_old.gpkg.""" logger.info("Retrieving previous NRN vintage.") # Determine download requirement. if self.nrn_old_path["gpkg"].exists(): logger.warning( f"Previous NRN vintage already exists: \"{self.nrn_old_path['gpkg']}\". Skipping step." ) else: # Download previous NRN vintage. logger.info("Downloading previous NRN vintage.") download_url = None try: # Get download url. download_url = helpers.load_yaml( filepath.parents[1] / "downloads.yaml")["previous_nrn_vintage"][self.source] # Get raw content stream from download url. download = helpers.get_url(download_url, stream=True, timeout=30, verify=True) # Copy download content to file. with open(self.nrn_old_path["zip"], "wb") as f: shutil.copyfileobj(download.raw, f) except (requests.exceptions.RequestException, shutil.Error) as e: logger.exception( f"Unable to download previous NRN vintage: \"{download_url}\"." ) logger.exception(e) sys.exit(1) # Extract zipped data. logger.info("Extracting zipped data for previous NRN vintage.") gpkg_download = [ f for f in zipfile.ZipFile(self.nrn_old_path["zip"], "r").namelist() if f.lower().startswith("nrn") and Path(f).suffix == ".gpkg" ][0] with zipfile.ZipFile(self.nrn_old_path["zip"], "r") as zip_f: with zip_f.open(gpkg_download) as zsrc, open( self.nrn_old_path["gpkg"], "wb") as zdest: shutil.copyfileobj(zsrc, zdest) # Remove temporary files. logger.info("Removing temporary files for previous NRN vintage.") if self.nrn_old_path["zip"].exists(): self.nrn_old_path["zip"].unlink()
def configure_release_version(self) -> None: """Configures the major and minor release versions for the current NRN vintage.""" logger.info("Configuring NRN release version.") # Extract the version number and release year for current source from the release notes. release_year = None release_notes_path = filepath.parent / "distribution_docs/release_notes.yaml" release_notes = helpers.load_yaml(release_notes_path) try: # Extract previous release version and date. version, release_date = itemgetter("edition", "release_date")(release_notes[self.source]) # Standardize raw variables. self.major_version, self.minor_version = map(int, str(version).split(".")) release_year = int(str(release_date)[:4]) # Configure new release version. if release_year == datetime.now().year: self.minor_version += 1 else: self.major_version += 1 self.minor_version = 0 except (IndexError, ValueError) as e: logger.exception(f"Unable to extract version number and / or release date from \"{release_notes}\".") logger.exception(e) sys.exit(1) logger.info(f"Configured NRN release version: {self.major_version}.{self.minor_version}")
def __init__(self, article): data = helpers.load_yaml(article) self.title = helpers.read_key(data[0], 'title') self.date = helpers.read_key(data[0], 'date') self.is_publish = helpers.read_key(data[0], 'publish') self.name = str.lower(os.path.splitext(article)[0]) markdown = helpers.read_key(data[1], 'markdown') self.text = m(markdown, output_format='html5', extensions=['markdown.extensions.sane_lists'])
def compile_source_attributes(self) -> None: """Compiles the yaml files in the sources' directory into a dictionary.""" logger.info("Compiling source attribute yamls.") self.source_attributes = dict() # Iterate source yamls. for f in filter(Path.is_file, Path(self.source_attribute_path).glob("*.yaml")): # Load yaml and store contents. self.source_attributes[f.stem] = helpers.load_yaml(f)
def compile_source_attributes(self): """Compiles the yaml files in the sources' directory into a dictionary.""" logger.info("Identifying source attribute files.") files = [os.path.join(self.source_attribute_path, f) for f in os.listdir(self.source_attribute_path) if f.endswith(".yaml")] logger.info("Compiling source attribute yamls.") self.source_attributes = dict() for f in files: # Load yaml and store contents. self.source_attributes[os.path.splitext(os.path.basename(f))[0]] = helpers.load_yaml(f)
def update_config(self): config = helpers.load_yaml(self._yaml_path) radius = float(config["cell_detector"]["radius"]) tolerance = float(config["cell_detector"]["tolerance"]) pixels_per_micrometer = float( config["cell_detector"]["pixels_per_micrometer"]) image_height = config["images"]["height"] image_width = config["images"]["width"] image_depth = config["images"]["depth"] self._cell_shape = (image_height, image_width, image_depth) self._min_rad = int((radius - tolerance) * pixels_per_micrometer) self._max_rad = int((radius + tolerance) * pixels_per_micrometer) if config["cell_detector"]["minimum_distance"]: self._min_dist = int(config["minimum_distance"]) else: self._min_dist = self._min_rad
def cells_to_array(self, cells): """ :param cells: :return: """ image = helpers.load_yaml(self._yaml_path)["images"] image_shape = (image["height"], image["width"], image["depth"]) x = np.empty( (len(cells), image_shape[0], image_shape[1], image_shape[2]), dtype=np.float32) for row, cell in zip(*(x, cells)): cell_image = cell.get_image(dy=image_shape[0], dx=image_shape[1]) width, height, depth = cell_image.shape if cell_image.shape != (80, 80, 3): print("cell wrong size!!") row[0:width, 0:height, 0:depth] = cell_image return x
def run(self): index_files = glob(self._index_location + "/*.yml") for index_file in index_files: if "index_template" not in index_file: data = load_yaml(index_file) if "Projects" not in data: raise Exception("Invalid index file") for entry in data["Projects"]: title, section, container_name, target_file_url = self._get_wiki_info(entry) if section not in self._projects_data: self._projects_data[str(section)] = { "title": title, "items": [] } self._projects_data[str(section)]["items"].append({ "name": container_name, "url": target_file_url }) self._generate_wiki_content()
def build(): config_file = CONFIG_FILE helpers.check_file(config_file) config_yaml = helpers.load_yaml(config_file)[0] selected_theme = helpers.read_key(config_yaml, 'theme') blog_name = helpers.read_key(config_yaml, 'name') description = helpers.read_key(config_yaml, 'description') language = helpers.read_key(config_yaml, 'language') builder = Builder(theme=selected_theme, name=blog_name, description=description, lang=language) helpers.chdir_to_articles() for article in os.listdir('.'): if os.path.isfile(article) and not article.startswith('.'): builder.build_article(article) builder.build_overview()
def diagnose(self): sample_paths = helpers.load_yaml( self._yaml_path)["diagnose"]["samples"] samples = self.load_samples(sample_paths) for sample in samples: print("Diagnosing sample " + sample.get_id() + "...") for image in sample.get_images(): if image.get_id() == "01": self._cell_detector.run(image) x = self.cells_to_array(image.get_cells()) predictions = self._neural_network.predict(x) for prediction, cell in zip(*(predictions, image.get_cells())): cell.set_prediction(np.argmax(prediction)) cell.set_confidence(np.max(prediction)) directory = "../results/cell_detector4/" + sample.get_id() if not os.path.isdir(directory): os.makedirs(directory) cv2.imwrite(directory + "/" + image.get_name() + ".jpg", image.draw_cells())
def run(self): index_files = glob(self._index_location + "/*.yml") for index_file in index_files: if "index_template" not in index_file: data = load_yaml(index_file) if "Projects" not in data: raise Exception("Invalid index file") for entry in data["Projects"]: title, section, container_name, target_file_url = self._get_wiki_info( entry) if section not in self._projects_data: self._projects_data[str(section)] = { "title": title, "items": [] } self._projects_data[str(section)]["items"].append({ "name": container_name, "url": target_file_url }) self._generate_wiki_content()
def compile_target_attributes(self): """Compiles the target (distribution format) yaml file into a dictionary.""" logger.info("Compiling target attribute yaml.") self.target_attributes = dict() # Load yaml. target_attributes_yaml = helpers.load_yaml(os.path.abspath("../distribution_format.yaml")) # Store yaml contents for all contained table names. logger.info("Compiling attributes for target tables.") for table in target_attributes_yaml: self.target_attributes[table] = {"spatial": target_attributes_yaml[table]["spatial"], "fields": dict()} for field, vals in target_attributes_yaml[table]["fields"].items(): # Compile field attributes. try: self.target_attributes[table]["fields"][field] = str(vals[0]) except (AttributeError, KeyError, ValueError): logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field)) sys.exit(1)
def fit_or_load_models(settings, data_name, seed, data, fit): """ either fits and saves or loads all models """ device = data.tensors[0].device # get model class model = model_class_mapper[data_name] # load training hyperparameters training_config = load_yaml('configs/training/{}.yaml'.format(data_name)) # not considering until success attack settings _ = settings.pop('lots-until-success') _ = settings.pop('pgd-until-success') # fit or load models models, losses = dict(), dict() for attack_name, attack_config in settings.items(): if fit: print('fitting {} model using {}'.format(data_name, attack_name)) models[attack_name], losses[attack_name] = fit_and_save( name=attack_name, model_class=model, seed=seed, training_config=dict(training_config), adversarial_config=dict(attack_config), training_data=data, data_name=data_name) else: models[attack_name] = model(seed).to(device) model_path = 'results/{}/models/{}_classifier.pt'.format( data_name, attack_name) _ = models[attack_name].load_state_dict(torch.load(model_path)) losses_path = 'results/{}/losses/{}_losses.pt'.format( data_name, attack_name) losses[attack_name] = torch.load(losses_path) _ = models[attack_name].eval() return models, losses
def update_config(self): config = helpers.load_yaml(self._yaml_path) self._kernel_size = [(value, value) for value in config["neural_network"]["kernel_size"]] self._epochs = config["neural_network"]["epochs"] self._train_batches = config["train"]["batches"] self._evaluate_batches = config["evaluate"]["batches"] self._batch_size = config["neural_network"]["batch_size"] self._drop_prob = config["neural_network"]["drop_prob"] self._conv_depth = config["neural_network"]["conv_depth"] self._hidden_size = config["neural_network"]["hidden_size"] self._learn_rate = config["neural_network"]["learn_rate"] self._activation = config["neural_network"]["activation"] self._augment = config["neural_network"]["augment"] self._threshold = config["evaluate"]["threshold"] self._channel_shift = config["neural_network"]["channel_shift"] self._init = config["neural_network"]["init"] self._pool_size = [(value, value) for value in config["neural_network"]["pool_size"]] self._train_data = "../" + config["train"]["data"] self._evaluate_data = config["evaluate"]["data"] self._classes = config["classes"] self._image_shape = (config["images"]["height"], config["images"]["width"], config["images"]["depth"]) self.reset_history()
def compile_target_attributes(self) -> None: """Compiles the yaml file for the target (Geo)DataFrames (distribution format) into a dictionary.""" logger.info("Compiling target attribute yaml.") table = field = None # Load yaml. self.target_attributes = helpers.load_yaml(filepath.parents[1] / "distribution_format.yaml") # Remove field length from dtype attribute. logger.info("Configuring target attributes.") try: for table in self.target_attributes: for field, vals in self.target_attributes[table][ "fields"].items(): self.target_attributes[table]["fields"][field] = vals[0] except (AttributeError, KeyError, ValueError): logger.exception( f"Invalid schema definition for table: {table}, field: {field}." ) sys.exit(1)
def show_statistics(): articles = 0 drafts = 0 word_count_total = 0 helpers.chdir_to_articles() for article in os.listdir('.'): if os.path.isfile(article) and not article.startswith('.'): article_yaml = helpers.load_yaml(article) is_publish = helpers.read_key(article_yaml[0], 'publish') markdown = helpers.read_key(article_yaml[1], 'markdown') if not is_publish: drafts = drafts + 1 articles = articles + 1 word_count = len(markdown.split()) word_count_total += word_count print('{} article(s): {} to publish, {} draft(s)'.format( str(articles), str(articles - drafts), str(drafts))) print('{} word(s) total, {} word(s) average'.format( str(word_count_total), str(round(word_count_total / articles))))
import os from buildbot.plugins import util, steps, schedulers import helpers import gitpoller config = helpers.load_yaml('services_config.yaml') # Increment this if you wipe the DB to prevent reusing build version numbers. DATABASE_VERSION = 1 SOURCE_GIT_URL = 'https://github.com' POLL_INTERVAL_SECONDS = 600 # Deploy any branches that have any associated realm BRANCH_TO_REALM_MAPPING = config['branch_to_realm_mapping'] DEPLOY_BRANCHES = list(BRANCH_TO_REALM_MAPPING) # The Docker Hub registry doesn't need a hostname REGISTRY = 'klaital' # List of workers that can concurrently build WORKERNAMES = [ "klaital-standardservice-worker", ] SERVICES = config['services'] for ms in SERVICES: SERVICES[ms]['poll_branches'] = DEPLOY_BRANCHES
f'NREL-1p7-103-step{istep-1}.yaml') fname_modeling_options = os.path.join(run_dir, f'modeling_options.{istep}.yaml') fname_analysis_options = os.path.join(run_dir, f'analysis_options.{istep}.yaml') if MPI: rank = MPI.COMM_WORLD.Get_rank() else: rank = 0 if rank == 0: print('STEP', istep) ## Update analysis options aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml')) aopt['general']['folder_output'] = f'outputs.{istep}' aopt['general']['fname_output'] = f'NREL-1p7-103-step{istep}' # - blade-mass opt constrained by tip deflection aopt['driver']['optimization']['flag'] = True aopt['design_variables']['blade']['structure']['spar_cap_ss'][ 'flag'] = True aopt['design_variables']['blade']['structure']['spar_cap_ps'][ 'flag'] = True aopt['constraints']['blade']['tip_deflection']['flag'] = True aopt['merit_figure'] = 'blade_mass' save_yaml(fname_analysis_options, aopt) ## Update modeling options mopt = load_yaml(
f'NREL-2p5-116-step{istep-1}.yaml') fname_modeling_options = os.path.join(run_dir, f'modeling_options.{istep}.yaml') fname_analysis_options = os.path.join(run_dir, f'analysis_options.{istep}.yaml') if MPI: rank = MPI.COMM_WORLD.Get_rank() else: rank = 0 if rank == 0: print('STEP', istep) ## Update analysis options aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml')) aopt['general']['folder_output'] = f'outputs.{istep}' aopt['general']['fname_output'] = f'NREL-2p5-116-step{istep}' # - constrained structural opt for tower mass aopt['driver']['optimization']['flag'] = True #aopt['driver']['optimization']['tol'] = 1e-6 #aopt['driver']['optimization']['max_iter'] = 50 aopt['design_variables']['tower']['layer_thickness']['flag'] = True aopt['design_variables']['tower']['outer_diameter']['flag'] = True aopt['design_variables']['tower']['outer_diameter']['upper_bound'] = 4.0 aopt['constraints']['tower']['stress']['flag'] = True aopt['constraints']['tower']['global_buckling']['flag'] = True aopt['constraints']['tower']['shell_buckling']['flag'] = True aopt['merit_figure'] = 'tower_mass' save_yaml(fname_analysis_options, aopt)
import datetime from flask import Flask, jsonify, redirect, render_template, url_for from flask_moment import Moment from helpers import (build_schedule, elapsed_percent, ensure_logs_dir, human_readable_time, load_yaml, schedule_expired, setup_logging) from vlc_client.vlc_client import VLCClient app = Flask(__name__) moment = Moment(app) config = load_yaml('config.yaml') ensure_logs_dir(config['LOGGING']['path']) log_file = setup_logging(config['LOGGING']['path']) app.logger.addHandler(log_file) vlc = VLCClient(config['VLC']) SCHEDULE = {"exp": datetime.datetime.now(), "playlist": {}, "current": {}} @app.route('/') def index(): # TODO 6.26.2020: Throw a 404 if vlc is not running instead of breaking app # rebuild the schedule if the cache has expired if schedule_expired(SCHEDULE): try: current = vlc.get_status() playlist = vlc.get_playlist() SCHEDULE.clear() SCHEDULE.update(
run_dir = './' fname_wt_input = os.path.join(run_dir, 'NREL-2p5-116.start.yaml') fname_modeling_options = os.path.join(run_dir, 'modeling_options_wisdem.yaml') fname_analysis_options = os.path.join(run_dir, f'analysis_options.{istep}.yaml') if MPI: rank = MPI.COMM_WORLD.Get_rank() else: rank = 0 if rank == 0: print('STEP', istep) ## Update analysis options aopt = load_yaml(os.path.join(run_dir, 'analysis_options.start.yaml')) aopt['general']['folder_output'] = f'outputs.{istep}' aopt['general']['fname_output'] = f'NREL-2p5-116-step{istep}' save_yaml(fname_analysis_options, aopt) tt = time.time() # step 1: manually updated turbine rating, rotor size -- NO OPT # - rated_power: 2500000.0 W # - rotor_diameter: 116 m # - hub_height: 80 m wt_opt, modeling_options, opt_options = run_wisdem(fname_wt_input, fname_modeling_options, fname_analysis_options) if rank == 0:
def main(gpkg_in, gpkg_out, layer_in, layer_out): # command line system arguments gpkg_in = (sys.argv[1]) gpkg_out = (sys.argv[2]) layer_in = (sys.argv[3]) layer_out = (sys.argv[4]) # database name which will be used for stage 2 nrn_db = "nrn" # load sql yaml file sql_load = helpers.load_yaml("sql/sql.yaml") # default postgres connection needed to create the nrn database conn = connect( dbname="postgres", user="******", host="localhost", password="******" ) # postgres database url for geoprocessing nrn_url = URL( drivername='postgresql+psycopg2', host='localhost', database=nrn_db, username='******', port='5432', password='******' ) # engine to connect to nrn database engine = create_engine(nrn_url) # get the isolation level for autocommit autocommit = extensions.ISOLATION_LEVEL_AUTOCOMMIT # set the isolation level for the connection's cursors # will raise ActiveSqlTransaction exception otherwise conn.set_isolation_level(autocommit) # connect to default connection cursor = conn.cursor() # drop the nrn database if it exists, then create it if not try: logger.info("Dropping PostgreSQL database.") cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {};").format(sql.Identifier(nrn_db))) except Exception: logger.exception("Could not drop database.") try: logger.info("Creating PostgreSQL database.") cursor.execute(sql.SQL("CREATE DATABASE {};").format(sql.Identifier(nrn_db))) except Exception: logger.exception("Failed to create PostgreSQL database.") logger.info("Closing default PostgreSQL connection.") cursor.close() conn.close() # connection parameters for newly created database nrn_conn = connect( dbname=nrn_db, user="******", host="localhost", password="******" ) nrn_conn.set_isolation_level(autocommit) # connect to nrn database nrn_cursor = nrn_conn.cursor() try: logger.info("Creating spatially enabled PostgreSQL database.") nrn_cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS postgis;")) except Exception: logger.exception("Cannot create PostGIS extension.") try: logger.info("Creating grid function.") nrn_cursor.execute(sql.SQL(sql_load["hex_grid"]["function"])) except Exception: logger.exception("Cannot create PostGIS function.") logger.info("Closing NRN PostgreSQL connection.") nrn_cursor.close() nrn_conn.close() # incoming NRN gpkg logger.info("Reading incoming GeoPackage.") gdf = gpd.read_file(gpkg_in, layer=layer_in) # reproject to epsg:3348 logger.info("Reprojecting to EPSG:3348.") gdf = gdf.to_crs({'init': 'epsg:3348'}) # calculate years since revision using current year (startTime.year) and "REVDATE" logger.info("Calculating years since last revision using current year") gdf["SINREV"] = startTime.year - gdf["REVDATE"].str[:4].astype("int64") # create representative point for each line segment logger.info("Generating representative point for each line segment.") gdf["geometry"] = gdf.geometry.representative_point() logger.info("Extracting total bounds.") minx, miny, maxx, maxy = gdf.geometry.total_bounds logger.info("Importing GeoDataFrame into PostGIS.") gdf.postgis.to_postgis(con=engine, table_name="reprept", geometry='POINT', if_exists='replace') logger.info("Generating hex grid based on total bounds.") hex_grid_query = sql_load["gen_hex_grid"]["query"].format(minx, miny, maxx, maxy) logger.info("Generating hex grid.") grid = gpd.GeoDataFrame.from_postgis(hex_grid_query, engine, geom_col="geom") logger.info("Aggregating mean years since last revision over hex grid.") aggregate = sql_load["aggregate"]["query"] logger.info("Extracting aggregations from PostGIS.") gdf = gpd.GeoDataFrame.from_postgis(aggregate, engine, geom_col="geom") # overwrite the incoming geopackage logger.info("Writing final GeoPackage layer.") gdf.to_file(gpkg_out, layer=layer_out, driver="GPKG")
def export_data(self) -> None: """Exports and packages all data.""" logger.info("Exporting output data.") # Configure export progress bar. file_count = 0 for lang, dfs in self.dframes.items(): for frmt in self.formats: count = len(set(dfs).intersection(set(self.distribution_formats[lang][frmt]["conform"]))) file_count += (len(self.kml_groups[lang]) * count) if frmt == "kml" else count export_progress = trange(file_count, desc="Exporting data", bar_format=self.bar_format) # Iterate export formats and languages. for lang, dfs in self.dframes.items(): for frmt in self.formats: # Retrieve export specifications. export_specs = self.distribution_formats[lang][frmt] # Filter required dataframes. dframes = {name: df.copy(deep=True) for name, df in dfs.items() if name in export_specs["conform"]} # Configure export directory. export_dir, export_file = itemgetter("dir", "file")(export_specs["data"]) export_dir = self.output_path / self.format_path(export_dir) / self.format_path(export_file) # Configure mapped layer names. nln_map = {table: self.format_path(export_specs["conform"][table]["name"]) for table in dframes} # Configure export kwargs. kwargs = { "driver": {"gml": "GML", "gpkg": "GPKG", "kml": "KML", "shp": "ESRI Shapefile"}[frmt], "type_schemas": helpers.load_yaml(filepath.parents[1] / "distribution_format.yaml"), "export_schemas": export_specs, "nln_map": nln_map, "keep_uuid": False, "outer_pbar": export_progress, "epsg": 4617, "geom_type": {table: df.geom_type.iloc[0] for table, df in dframes.items() if "geometry" in df.columns} } # Configure KML. if frmt == "kml": # Configure export names. self.kml_groups[lang]["name"] = self.kml_groups[lang]["name"].map( lambda name: str(export_dir).replace("<name>", name)) # Iterate export datasets. for table, df in dframes.items(): # Map dataframe queries (more efficient than iteratively querying). self.kml_groups[lang]["df"] = self.kml_groups[lang]["query"].map( lambda query: df.query(query).copy(deep=True)) # Iterate KML groups. for kml_group in self.kml_groups[lang].itertuples(index=False): # Export data. kml_name, kml_df = attrgetter("name", "df")(kml_group) helpers.export({table: kml_df}, kml_name, **kwargs) # Configure non-KML. else: # Export data. helpers.export(dframes, export_dir, **kwargs) # Close progress bar. export_progress.close()
def update_distribution_docs(self) -> None: """ Writes updated documentation to data/processed for: - completion rates - release notes """ def write_documents(data: dict, filename: str) -> None: """ Updates a document template with a dictionary and exports: 1) an rst file representing the updated template. 2) a yaml file containing the updated dictionary. :param dict data: dictionary of values used to populate the document template. :param str filename: basename of a document in ../distribution_docs to be updated. """ # Configure source and destination paths. src = filepath.parent / f"distribution_docs/{filename}.rst" dst = self.output_path / filename try: # Load document as jinja template. with open(src, "r") as doc: template = jinja2.Template(doc.read()) # Update template. updated_doc = template.render(data) except (jinja2.TemplateError, jinja2.TemplateAssertionError, jinja2.UndefinedError) as e: logger.exception(f"Unable to render updated Jinja2.Template for: {src}.") logger.exception(e) sys.exit(1) # Export updated document. try: # Write rst. with open(dst.with_suffix(".rst"), "w") as doc: doc.write(updated_doc) # Write yaml. with open(dst.with_suffix(".yaml"), "w") as doc: yaml.dump(data, doc) except (ValueError, yaml.YAMLError) as e: logger.exception(f"Unable to write document: {dst}.") logger.exception(e) sys.exit(1) # Update release notes. logger.info(f"Updating documentation: release notes.") # Compile previous data. data = helpers.load_yaml(filepath.parent / "distribution_docs/release_notes.yaml") # Update release notes - edition, release date, validity date. data[self.source]["edition"] = f"{self.major_version}.{self.minor_version}" data[self.source]["release_date"] = datetime.now().strftime("%Y-%m") data[self.source]["validity_date"] = datetime.now().strftime("%Y-%m") # Update release notes - number of kilometers. # Note: EPSG:3348 used to get geometry lengths in meters. kms = int(round(self.dframes["en"]["roadseg"].to_crs("EPSG:3348").length.sum() / 1000, 0)) data[self.source]["number_of_kilometers"] = f"{kms:,d}" # Write updated documents. write_documents(data, "release_notes") # Update completion rates. logger.info(f"Updating documentation: completion rates.") # Compile previous data. data = helpers.load_yaml(filepath.parent / "distribution_docs/completion_rates.yaml") # Update completion rates. # Iterate dataframe and column names. for table, df in self.dframes["en"].items(): for col in data[table]: # Configure column completion rate. # Note: Values between 0 and 1 are rounded to 1, values between 99 and 100 are rounded to 99. completion_rate = (len(df.loc[~df[col].isin({"Unknown", -1})]) / len(df)) * 100 if 0 < completion_rate < 1: completion_rate = 1 if 99 < completion_rate < 100: completion_rate = 99 # Update column value for source. data[table][col][self.source] = int(completion_rate) # Update column average. vals = itemgetter(*set(data[table][col]) - {"avg"})(data[table][col]) data[table][col]["avg"] = int(round(sum(map(int, vals)) / len(vals), 0)) # Write updated documents. write_documents(data, "completion_rates")
def main(osm_in, nrn_in, out, out_layer): # command line system arguments osm_in = (sys.argv[1]) nrn_in = (sys.argv[2]) out = (sys.argv[3]) out_layer = (sys.argv[4]) # database name which will be used for stage 2 nrn_db = "nrn" # load sql yaml file sql_load = helpers.load_yaml("sql/sql.yaml") # default postgres connection needed to create the nrn database conn = connect( dbname="postgres", user="******", host="localhost", password="******" ) # postgres database url for geoprocessing nrn_url = URL( drivername='postgresql+psycopg2', host='localhost', database=nrn_db, username='******', port='5432', password='******' ) # engine to connect to nrn database engine = create_engine(nrn_url) # get the isolation level for autocommit autocommit = extensions.ISOLATION_LEVEL_AUTOCOMMIT # set the isolation level for the connection's cursors # will raise ActiveSqlTransaction exception otherwise conn.set_isolation_level(autocommit) # connect to default connection cursor = conn.cursor() # drop the nrn database if it exists, then create it if not try: logger.info("Dropping PostgreSQL database.") cursor.execute(sql.SQL("DROP DATABASE IF EXISTS {};").format(sql.Identifier(nrn_db))) except Exception: logger.exception("Could not drop database.") try: logger.info("Creating PostgreSQL database.") cursor.execute(sql.SQL("CREATE DATABASE {};").format(sql.Identifier(nrn_db))) except Exception: logger.exception("Failed to create PostgreSQL database.") logger.info("Closing default PostgreSQL connection.") cursor.close() conn.close() # connection parameters for newly created database nrn_conn = connect( dbname=nrn_db, user="******", host="localhost", password="******" ) nrn_conn.set_isolation_level(autocommit) # connect to nrn database nrn_cursor = nrn_conn.cursor() try: logger.info("Creating spatially enabled PostgreSQL database.") nrn_cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS postgis;")) except Exception: logger.exception("Cannot create PostGIS extension.") try: logger.info("Creating grid function.") nrn_cursor.execute(sql.SQL(sql_load["hex_grid"]["function"])) except Exception: logger.exception("Cannot create PostGIS function.") logger.info("Closing NRN PostgreSQL connection.") nrn_cursor.close() nrn_conn.close() # Download provincial boundaries from STC website. logger.info("Downloading provincial boundaries.") pr_url = "http://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/files-fichiers/2016/lpr_000b16a_e.zip" urllib.request.urlretrieve(pr_url, '../data/interim/pr.zip') with zipfile.ZipFile("../data/interim/pr.zip", "r") as zip_ref: zip_ref.extractall("../data/interim/pr") logger.info("Reading incoming provincial boundaries.") pr = gpd.read_file("../data/interim/pr/lpr_000b16a_e.shp") # Assign geodataframe total bounds to min and max XY. minx, miny, maxx, maxy = pr.geometry.total_bounds # Assign list to variable extent. extent = [minx, maxx, miny, maxy] # Convert extent to string and separate by comma. extent = ','.join(map(str, extent)) # Creates a hexagon grid using the provincial boundary extent and QGIS Processing. logger.info("Generating grid.") qgis_processing.gen_grid(extent) # Incoming OSM data logger.info("Reading incoming OSM data.") osm = gpd.read_file(osm_in) # Incoming NRN GPKG logger.info("Reading incoming GPKG.") gdf = gpd.read_file(nrn_in) logger.info("Reading incoming grid.") grid = gpd.read_file("../data/interim/output.gpkg", driver="GPKG") grid.crs = {"init": "epsg:3348"} logger.info("Importing GeoDataFrame into PostGIS.") gdf.postgis.to_postgis(con=engine, table_name="nrn", geometry='LineString', if_exists='replace') logger.info("Importing OSM data into PostGIS.") osm.postgis.to_postgis(con=engine, table_name="osm", geometry='LineString', if_exists='replace') logger.info("Importing grid data into PostGIS.") grid.postgis.to_postgis(con=engine, table_name="hex_grid", geometry='Polygon', if_exists='replace') logger.info("Comparing NRN and OSM road network length.") length = sql_load["length"]["query"] logger.info("Extracting final from PostGIS.") gdf = gpd.GeoDataFrame.from_postgis(length, engine, geom_col="geom") gdf.crs = {"init": "epsg:3348"} logger.info("Writing output GeoPackage.") gdf.to_file(out, layer=out_layer, driver="GPKG")
def compile_domains(self): """Compiles field domains for the target dataframes.""" logging.info("Compiling field domains.") self.domains = dict() for suffix in ("en", "fr"): # Load yaml. logger.info("Loading \"{}\" field domains yaml.".format(suffix)) domains_yaml = helpers.load_yaml(os.path.abspath("../field_domains_{}.yaml".format(suffix))) # Compile domain values. logger.info("Compiling \"{}\" domain values.".format(suffix)) for table in domains_yaml: # Register table. if table not in self.domains.keys(): self.domains[table] = dict() for field, vals in domains_yaml[table].items(): # Register field. if field not in self.domains[table].keys(): self.domains[table][field] = {"values": list(), "all": None} try: # Configure reference domain. while isinstance(vals, str): if vals.find(";") > 0: table_ref, field_ref = vals.split(";") else: table_ref, field_ref = table, vals vals = domains_yaml[table_ref][field_ref] # Compile domain values. if vals is None: self.domains[table][field]["values"] = self.domains[table][field]["all"] = None continue elif isinstance(vals, dict): self.domains[table][field]["values"].extend(vals.values()) if self.domains[table][field]["all"] is None: self.domains[table][field]["all"] = vals else: self.domains[table][field]["all"] = \ {k: [v, vals[k]] for k, v in self.domains[table][field]["all"].items()} elif isinstance(vals, list): self.domains[table][field]["values"].extend(vals) if self.domains[table][field]["all"] is None: self.domains[table][field]["all"] = vals else: self.domains[table][field]["all"] = list(zip(self.domains[table][field]["all"], vals)) else: logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field)) sys.exit(1) except (AttributeError, KeyError, ValueError): logger.exception("Invalid schema definition for table: {}, field: {}.".format(table, field)) sys.exit(1) logging.info("Identifying field domain functions.") self.domains_funcs = list() # Identify functions from field_map_functions. for func in [f for f in getmembers(field_map_functions) if isfunction(f[1])]: if "domain" in func[1].__code__.co_varnames: self.domains_funcs.append(func[0])
def __init__(self, source: str, remove: bool = False) -> None: """ Initializes an NRN stage. :param str source: abbreviation for the source province / territory. :param bool remove: removes pre-existing files within the data/processed directory for the specified source, excluding change logs, default False. """ self.stage = 5 self.source = source.lower() self.remove = remove self.major_version = None self.minor_version = None # Configure and validate input data path. self.data_path = filepath.parents[2] / f"data/interim/{self.source}.gpkg" if not self.data_path.exists(): logger.exception(f"Input data not found: {self.data_path}.") sys.exit(1) # Configure output path. self.output_path = filepath.parents[2] / f"data/processed/{self.source}" # Conditionally clear output namespace. namespace = list(filter(lambda f: f.stem != f"{self.source}_change_logs", self.output_path.glob("*"))) if len(namespace): logger.warning("Output namespace already occupied.") if self.remove: logger.warning("Parameter remove=True: Removing conflicting files.") for f in namespace: logger.info(f"Removing conflicting file: \"{f}\".") if f.is_file(): f.unlink() else: helpers.rm_tree(f) else: logger.exception("Parameter remove=False: Unable to proceed while output namespace is occupied. Set " "remove=True (-r) or manually clear the output namespace.") sys.exit(1) # Configure field defaults and domains. self.defaults = {lang: helpers.compile_default_values(lang=lang) for lang in ("en", "fr")} self.domains = helpers.compile_domains(mapped_lang="fr") # Configure export formats. distribution_formats_path = filepath.parent / "distribution_formats" self.formats = [f.stem for f in (distribution_formats_path / "en").glob("*")] self.distribution_formats = { "en": {frmt: helpers.load_yaml(distribution_formats_path / f"en/{frmt}.yaml") for frmt in self.formats}, "fr": {frmt: helpers.load_yaml(distribution_formats_path / f"fr/{frmt}.yaml") for frmt in self.formats} } # Define custom progress bar format. # Note: the only change from default is moving the percentage to the right end of the progress bar. self.bar_format = "{desc}: |{bar}| {percentage:3.0f}% {r_bar}" # Load data. self.dframes = helpers.load_gpkg(self.data_path)