예제 #1
0
def build_meta(inid):
    """Perform pre-processing for the metadata files"""
    status = True
    # Read and write paths may be different
    fr = indicator_path(inid, ftype='meta', mode='r')
    fw = indicator_path(inid, ftype='meta', mode='w')

    meta = yamlmd.read_yamlmd(fr)
    git_update = sdg.git.get_git_updates(inid)

    for k in git_update.keys():
        meta[0][k] = git_update[k]
    yamlmd.write_yamlmd(meta, fw)

    return status
예제 #2
0
def get_git_update(inid, ftype):
    """Change into the working directory of the file (it might be a submodule)
    and get the latest git history"""
    f = indicator_path(inid, ftype=ftype, mode='r')
    f_dir, f_name = os.path.split(f)

    repo = git.Repo(f_dir, search_parent_directories=True)
    # Need to translate relative to the repo root (this may be a submodule)
    repo_dir = os.path.relpath(repo.working_dir, os.getcwd())
    f = os.path.relpath(f, repo_dir)

    commit = next(repo.iter_commits(paths=f, max_count=1))
    git_date = str(commit.committed_datetime.date())
    git_sha = commit.hexsha
    # Turn the remote URL into a commit URL
    remote = repo.remote().url
    remote_bare = re.sub('^.*github\.com(:|\/)', '',
                         remote).replace('.git', '')
    commit_url = 'https://github.com/' + remote_bare + '/commit/' + git_sha

    return {
        'date': git_date,
        'sha': git_sha,
        'file': f,
        'id': inid,
        'commit_url': commit_url
    }
예제 #3
0
def main():
    """Run csv checks on all indicator csvs in the data directory"""
    status = True
    # Create the place to put the files
    os.makedirs("data", exist_ok=True)

    inids = sdg.path.get_ids()
    print("Building csvs for " + str(len(inids)) + " indicators...")
    for inid in inids:
        status = status & build_csv(inid)

    print("Copying goals info...")
    in_dir = indicator_path(ftype='data', mode='r')
    out_dir = indicator_path(ftype='data', mode='w')
    for f in glob.glob(os.path.join(in_dir, 'sdg*.csv')):
        shutil.copy(f, out_dir)

    return (status)
예제 #4
0
def build_csv(inid):
    """
    For a given ID pull in the raw data and write out the website csv

    Returns:
        bool: Status
    """
    status = True

    in_path = indicator_path(inid, ftype='data', mode='r')
    out_path = indicator_path(inid, ftype='data', mode='w')

    try:
        shutil.copy(in_path, out_path)
    except Exception as e:
        print(inid, e)
        return False

    return status
예제 #5
0
def compare_reload(inid, which='edges'):
    """Load the original csv and compare to reloading the JSON you wrote out
    which = 'edges' or 'data'
    """
    csv_path = indicator_path(inid, ftype=which, mode='w')

    jsn = json.load(open(indicator_path(inid, 'json', mode='w')))

    df_csv = pd.read_csv(csv_path, encoding='utf-8')
    df_jsn = pd.DataFrame(jsn[which]).replace({None: np.nan})

    # Account for empty data
    if df_jsn.shape[0] == df_csv.shape[0] == 0:
        return True

    df_jsn = df_jsn[df_csv.columns.values]

    status = isclose_df(df_csv, df_jsn)
    if not status:
        print("reload " + which + " error in " + inid)

    return status
예제 #6
0
def write_json(inid, orient='list', gz=False):
    """Write out the main csv and edge data as a single json file. This can
    either be as records (orient='records') or as columns (orient='list').

    Args:
        inid -- str: The indicator id, e.g. '1-1-1'
        orient -- str: either 'records' for rowwise, or 'list' for colwise
        gz -- bool: if True then compress the output with gzip

    Return:
        status. bool.
    """

    try:
        all_data = {
            'data': get_main_data(inid, orient=orient),
            'edges': get_edge_data(inid, orient=orient)
        }
        all_json = pd.io.json.dumps(all_data)
        all_json = all_json.replace("\\/", "/")  # why does it double escape?

        # Write out
        if gz:
            json_bytes = all_json.encode('utf-8')
            with gzip.open(
                    indicator_path(inid, 'json', mode='w') + '.gz',
                    'w') as outfile:
                outfile.write(json_bytes)
        else:
            with open(indicator_path(inid, 'json', mode='w'),
                      'w',
                      encoding='utf-8') as outfile:
                outfile.write(all_json)
    except Exception as e:
        print(inid, e)
        return False

    return True
예제 #7
0
def main():
    """Process the metadata files ready for site build"""
    status = True
    ids = sdg.path.get_ids()

    print("Building " + str(len(ids)) + " metadata files...")

    # Make sure they have somewhere to go
    out_dir = indicator_path(ftype='meta', mode='w')
    os.makedirs(out_dir, exist_ok=True)

    for inid in ids:
        try:
            status = status & build_meta(inid)
        except Exception as e:
            status = False
            print(inid, e)
    return (status)
예제 #8
0
def get_main_data(inid, orient='records'):
    """Read the main csv data and return as a json ready object

    Args:
        inid --- str. indicator id. e.g. '1-1-1' 
        orient --- either 'records' for rowwise, or 'list' for colwise

    Return:
        Depending on orient either a list of dicts (rowwise) or dict of lists
        (colwise)
    """
    try:
        df = pd.read_csv(indicator_path(inid, 'data', mode='w'),
                         encoding='utf-8')
    except Exception as e:
        print(inid, e)
        return False

    if df.shape[0] < 1:
        return list()
    else:
        return df_nan_to_none(df, orient=orient)
예제 #9
0
def get_edge_data(inid, orient):
    """Read the edge file associated with a main data csv and return as a
    json ready object

    Args:
        inid --- str. indicator id. e.g. '1-1-1'
        orient --- either 'records' for rowwise, or 'list' for colwise

    Return:
        Depending on orient either a list of dicts (rowwise) or dict of lists
        (colwise)
    """
    try:
        edges = pd.read_csv(indicator_path(inid, 'edges', mode='w'),
                            encoding='utf-8')
    except Exception as e:
        print(inid, e)
        return False

    if edges.shape[0] < 1:
        return list()
    else:
        return df_nan_to_none(edges, orient=orient)