def read_meta(inid, git=True, src_dir='', git_data_dir=None): """Perform pre-processing for the metadata files""" status = True # Read and write paths may be different fr = input_path(inid, ftype='meta', src_dir=src_dir) meta_md = yamlmd.read_yamlmd(fr) meta = dict(meta_md[0]) if git: git_update = sdg.git.get_git_updates(inid, src_dir=src_dir, git_data_dir=git_data_dir) for k in git_update.keys(): meta[k] = git_update[k] meta['page_content'] = ''.join(meta_md[1]) # Now look for all subfolders of the meta folder, which may contain # multilingual metadata, and add them as well. meta_folder = input_path(None, ftype='meta', src_dir=src_dir) languages = next(os.walk(meta_folder))[1] for language in languages: i18n_fr = os.path.join(meta_folder, language, inid + '.md') if os.path.isfile(i18n_fr): i18n_meta_md = yamlmd.read_yamlmd(i18n_fr) i18n_meta = dict(i18n_meta_md[0]) meta[language] = i18n_meta meta[language]['page_content'] = ''.join(i18n_meta_md[1]) return meta
def get_git_update(inid, ftype, src_dir='', git_data_dir=None): """Change into the working directory of the file (it might be a submodule) and get the latest git history""" f = input_path(inid, ftype=ftype, src_dir=src_dir, git_data_dir=git_data_dir) f_dir, f_name = os.path.split(f) repo = git.Repo(f_dir, search_parent_directories=True) # Need to translate relative to the repo root (this may be a submodule) repo_dir = os.path.relpath(repo.working_dir, os.getcwd()) f = os.path.relpath(f, repo_dir) commit = next(repo.iter_commits(paths=f, max_count=1)) git_date = str(commit.committed_datetime.date()) git_sha = commit.hexsha # Turn the remote URL into a commit URL remote = repo.remote().url remote_bare = re.sub('^.*github\.com(:|\/)', '', remote).replace('.git', '') commit_url = 'https://github.com/' + remote_bare + '/commit/' + git_sha return { 'date': git_date, 'sha': git_sha, 'file': f, 'id': inid, 'commit_url': commit_url }
def compare_reload_data(inid, src_dir, site_dir): """Load the original csv and compare to reloading the JSON you wrote out which = 'edges' or 'data' """ csv_path = input_path(inid, ftype='data', src_dir=src_dir) jsn_path = output_path(inid, ftype='comb', format='json', site_dir=site_dir) jsn = json.load(open(jsn_path)) df_csv = pd.read_csv(csv_path, encoding='utf-8') df_jsn = pd.DataFrame(jsn['data']).replace({None: np.nan}) # Account for empty data if df_jsn.shape[0] == df_csv.shape[0] == 0: return True df_jsn = df_jsn[df_csv.columns.values] status = isclose_df(df_csv, df_jsn) if not status: print("reload error in " + inid) return status
def check_all_csv(src_dir=''): """Run csv checks on all indicator csvs in the data directory Args: src_dir: str. Base path for the project. Csv files are found relative to this """ status = True ids = get_ids(src_dir=src_dir) if len(ids) == 0: raise FileNotFoundError("No indicator IDs found") print("Checking " + str(len(ids)) + " metadata files...") for inid in ids: csv = input_path(inid, ftype='data', src_dir=src_dir, must_work=True) try: status = status & check_csv(csv) except Exception as e: status = False print(csv, e) return(status)
def read_meta(inid, git=True, src_dir=''): """Perform pre-processing for the metadata files""" status = True # Read and write paths may be different fr = input_path(inid, ftype='meta', src_dir=src_dir) meta_md = yamlmd.read_yamlmd(fr) meta = dict(meta_md[0]) if git: git_update = sdg.git.get_git_updates(inid, src_dir=src_dir) for k in git_update.keys(): meta[k] = git_update[k] meta['page_content'] = ''.join(meta_md[1]) return meta
def check_all_meta(src_dir=''): """Run metadata checks for all indicators Args: src_dir: str. Base path for the project. Metadata files are found relative to this """ status = True ids = get_ids(src_dir=src_dir) if len(ids) == 0: raise FileNotFoundError("No indicator IDs found") print("Checking " + str(len(ids)) + " metadata files...") for inid in ids: met = input_path(inid, ftype='meta', src_dir=src_dir, must_work=True) with open(met, encoding="UTF-8") as stream: meta = next(yaml.safe_load_all(stream)) status = status & check_meta(meta, fname=met) return (status)
def test_in_path(): """Check input path as expected""" in_path = input_path(inid="1-2-1", ftype='meta', src_dir='') assert in_path == os.path.join('meta', '1-2-1.md')
def get_inid_data(inid, src_dir=''): pth = input_path(inid, ftype='data', src_dir=src_dir, must_work=True) df = pd.read_csv(pth) return df