def render_template(self, dest_url, data): g.current_page = self.id title = self.title logger.info( f"Rendering Page '{dest_url}' (from template '{self.template_file}'" ) page_content_html = self._render_page_content(data) page_top_html = self._render_page_top(title) page_datasets_used_html = self._render_page_datasets_used() page_bottom_html = self._render_page_bottom() dest_path = f"{c.PUBLIC_DIR}/{dest_url}" dest_dir = PurePath(dest_path).parent Path(dest_dir).mkdir(parents=True, exist_ok=True) with open(dest_path, "w") as fh: page_size = fh.write(page_top_html + page_content_html + page_datasets_used_html + page_bottom_html) if g.current_page not in [ 'odp_about', 'odp_dataset', 'odp_datasets', 'odp_pages' ]: g.odp_pages.append({ "title": Template(title).render(data), "path": dest_url, "datasets": g.datasets_in_page[g.current_page], "size": page_size })
def PagesRendering(): logger.info("Pages Rendering...") for p in Pages(): p.render() fh = open(c.CONFIG_DATASETS_FILE) datasets_in_yaml = yaml.load(fh, Loader=yaml.FullLoader)['datasets'] data = [] for ds_id in datasets_in_yaml: row = datasets_in_yaml[ds_id] row['id'] = ds_id data.append(row) json_file = f"{c.PUBLIC_DATA_DIR}/odp_datasets.json" with open(json_file, "w", encoding='utf-8') as json_fh: out = json.dumps({"data": data}) json_fh.write(out) json_file = f"{c.PUBLIC_DATA_DIR}/odp_pages.json" with open(json_file, "w", encoding='utf-8') as json_fh: out = json.dumps({"data": g.odp_pages}) json_fh.write(out) fh = open(c.CONFIG_ODP_PAGES_FILE) odp_pages_in_yaml = yaml.load(fh, Loader=yaml.FullLoader)['pages'] for p_id in odp_pages_in_yaml: p = Page(p_id, odp_pages_in_yaml[p_id]) p.render()
def csv_to_json(self, path): Path(c.PUBLIC_DATA_DIR).mkdir(parents=True, exist_ok=True) json_file = f"{c.PUBLIC_DATA_DIR}/{self.data_file}" delimiter = self.csv_delimiter encoding = self.encoding if hasattr(self, 'encoding') else 'utf-8' logger.info(f"Generating '{json_file}' from csv file '{path}'...") with open(path, mode="r", encoding=encoding) as csvfile: reader = csv.DictReader(csvfile, delimiter=delimiter) with open(json_file, "w", encoding='utf-8') as jsonfile: data_rows = [] fields_ids = self.fields_ids() for r_row in reader: row = {f['id']: r_row[f['src_id']] for f in fields_ids} for f in self.data_fields: if 'code' in f: with stdoutIO() as s: exec(f['code']) row[f['id']] = s.getvalue() data_rows.append(row) json.dump({"data": data_rows}, jsonfile, ensure_ascii=False)
def data_download(self, force=False): if not hasattr(self, 'download_url'): logger.warning(f"DataSet '{self.id}' has no 'download_url'") return url = self.download_url if hasattr(self, 'downloaded_file'): path = save_url_to_directory(url, c.PUBLIC_DATA_DIR, self.downloaded_file) else: path = save_url_to_directory(url, c.PUBLIC_DATA_DIR) file_ext = os.path.splitext(path)[1] if file_ext == '.zip': logger.info( f"Extracting '{self.file_from_zip}' from zip file '{path}'") ZipFile(path, 'r').extract(self.file_from_zip, path=c.DATA_DIR) path = f"{c.DATA_DIR}/{self.file_from_zip}" file_ext = os.path.splitext(path)[1] if file_ext == '.csv': self.csv_to_json(path)
def save_url_to_directory(url: str, directory: str, downloaded_file=None, force=False) -> str: path = None if downloaded_file: path = f"{directory}/{downloaded_file}" else: a = urlparse(url) filename = PurePath(a.path).name path = f"{directory}/{filename}" if not Path(path).is_file() and not force: logger.info(f"Downloading '{url}' to '{path}'...") response = get(url) Path(directory).mkdir(parents=True, exist_ok=True) with open(path, 'wb') as f: f.write(response.content) else: logger.info(f"No download of '{url}' because '{path}' already exists") return path
def DataSetsBuilding(): logger.info("DataSets Building...") for ds in DataSets(): ds.data_download()