예제 #1
0
def read_meta(inid, git=True, src_dir='', git_data_dir=None):
    """Perform pre-processing for the metadata files"""
    status = True
    # Read and write paths may be different
    fr = input_path(inid, ftype='meta', src_dir=src_dir)

    meta_md = yamlmd.read_yamlmd(fr)
    meta = dict(meta_md[0])
    if git:
        git_update = sdg.git.get_git_updates(inid,
                                             src_dir=src_dir,
                                             git_data_dir=git_data_dir)
        for k in git_update.keys():
            meta[k] = git_update[k]

    meta['page_content'] = ''.join(meta_md[1])

    # Now look for all subfolders of the meta folder, which may contain
    # multilingual metadata, and add them as well.
    meta_folder = input_path(None, ftype='meta', src_dir=src_dir)
    languages = next(os.walk(meta_folder))[1]
    for language in languages:
        i18n_fr = os.path.join(meta_folder, language, inid + '.md')
        if os.path.isfile(i18n_fr):
            i18n_meta_md = yamlmd.read_yamlmd(i18n_fr)
            i18n_meta = dict(i18n_meta_md[0])
            meta[language] = i18n_meta
            meta[language]['page_content'] = ''.join(i18n_meta_md[1])

    return meta
예제 #2
0
def get_git_update(inid, ftype, src_dir='', git_data_dir=None):
    """Change into the working directory of the file (it might be a submodule)
    and get the latest git history"""
    f = input_path(inid,
                   ftype=ftype,
                   src_dir=src_dir,
                   git_data_dir=git_data_dir)
    f_dir, f_name = os.path.split(f)

    repo = git.Repo(f_dir, search_parent_directories=True)
    # Need to translate relative to the repo root (this may be a submodule)
    repo_dir = os.path.relpath(repo.working_dir, os.getcwd())
    f = os.path.relpath(f, repo_dir)

    commit = next(repo.iter_commits(paths=f, max_count=1))
    git_date = str(commit.committed_datetime.date())
    git_sha = commit.hexsha
    # Turn the remote URL into a commit URL
    remote = repo.remote().url
    remote_bare = re.sub('^.*github\.com(:|\/)', '',
                         remote).replace('.git', '')
    commit_url = 'https://github.com/' + remote_bare + '/commit/' + git_sha

    return {
        'date': git_date,
        'sha': git_sha,
        'file': f,
        'id': inid,
        'commit_url': commit_url
    }
def compare_reload_data(inid, src_dir, site_dir):
    """Load the original csv and compare to reloading the JSON you wrote out
    which = 'edges' or 'data'
    """

    csv_path = input_path(inid, ftype='data', src_dir=src_dir)
    jsn_path = output_path(inid,
                           ftype='comb',
                           format='json',
                           site_dir=site_dir)

    jsn = json.load(open(jsn_path))

    df_csv = pd.read_csv(csv_path, encoding='utf-8')
    df_jsn = pd.DataFrame(jsn['data']).replace({None: np.nan})

    # Account for empty data
    if df_jsn.shape[0] == df_csv.shape[0] == 0:
        return True

    df_jsn = df_jsn[df_csv.columns.values]

    status = isclose_df(df_csv, df_jsn)
    if not status:
        print("reload error in " + inid)

    return status
예제 #4
0
def check_all_csv(src_dir=''):
    """Run csv checks on all indicator csvs in the data directory
    
    Args:
        src_dir: str. Base path for the project. Csv 
            files are found relative to this
    """

    status = True

    ids = get_ids(src_dir=src_dir)

    if len(ids) == 0:
        raise FileNotFoundError("No indicator IDs found")
    
    print("Checking " + str(len(ids)) + " metadata files...")
    
    for inid in ids:
        csv = input_path(inid, ftype='data', src_dir=src_dir, must_work=True)
        try:
            status = status & check_csv(csv)
        except Exception as e:
            status = False
            print(csv, e)    
    return(status)
def read_meta(inid, git=True, src_dir=''):
    """Perform pre-processing for the metadata files"""
    status = True
    # Read and write paths may be different
    fr = input_path(inid, ftype='meta', src_dir=src_dir)

    meta_md = yamlmd.read_yamlmd(fr)
    meta = dict(meta_md[0])
    if git:
        git_update = sdg.git.get_git_updates(inid, src_dir=src_dir)
        for k in git_update.keys():
            meta[k] = git_update[k]

    meta['page_content'] = ''.join(meta_md[1])

    return meta
예제 #6
0
def check_all_meta(src_dir=''):
    """Run metadata checks for all indicators
    
    Args:
        src_dir: str. Base path for the project. Metadata 
            files are found relative to this
    """

    status = True

    ids = get_ids(src_dir=src_dir)

    if len(ids) == 0:
        raise FileNotFoundError("No indicator IDs found")

    print("Checking " + str(len(ids)) + " metadata files...")

    for inid in ids:
        met = input_path(inid, ftype='meta', src_dir=src_dir, must_work=True)
        with open(met, encoding="UTF-8") as stream:
            meta = next(yaml.safe_load_all(stream))
        status = status & check_meta(meta, fname=met)

    return (status)
예제 #7
0
def test_in_path():
    """Check input path as expected"""
    in_path = input_path(inid="1-2-1", ftype='meta', src_dir='')
    assert in_path == os.path.join('meta', '1-2-1.md')
예제 #8
0
def get_inid_data(inid, src_dir=''):
    pth = input_path(inid, ftype='data', src_dir=src_dir, must_work=True)
    df = pd.read_csv(pth)
    return df