Ejemplo n.º 1
0
def create_datapackage(path, update):
    """create datapackage.json"""
    from ddf_utils.index import get_datapackage
    import json
    if not update:
        if os.path.exists(os.path.join(path, 'datapackage.json')):
            click.echo('datapackage.json already exists. skipping')
            return
        res = get_datapackage(path)
        with open(os.path.join(path, 'datapackage.json'), 'w',
                  encoding='utf8') as f:
            json.dump(res, f, indent=4, ensure_ascii=False)
    else:
        get_datapackage(path, update_existing=True)
    click.echo('Done.')
Ejemplo n.º 2
0
def run_recipe(recipe, outdir, update, dry_run, show_tree):
    """generate new ddf dataset with recipe"""
    import ddf_utils.chef as chef
    from ddf_utils.index import get_datapackage
    import json
    click.echo('building recipe...')
    recipe = chef.build_recipe(recipe)
    if show_tree:
        dag = chef.cook.build_dag(recipe)
        dag.tree_view()
        return
    if update:
        pass
    res = chef.run_recipe(recipe)
    if not dry_run:
        click.echo('saving result to disk...')
        chef.dish_to_csv(res, outdir)
        click.echo('creating datapackage file...')
        res = get_datapackage(outdir)
        with open(os.path.join(outdir, 'datapackage.json'),
                  'w',
                  encoding='utf8') as f:
            json.dump(res, f, indent=4, ensure_ascii=False)
    click.echo("Done.")
Ejemplo n.º 3
0
    concs = data['Element'].unique()
    cdf = pd.DataFrame([], columns=['concept', 'name', 'concept_type'])
    cdf['name'] = ['Name', 'Country', 'Item', 'Year', *concs]
    cdf['concept'] = cdf['name'].map(to_concept_id)
    cdf.concept_type = 'measure'

    cdf.loc[0, 'concept_type'] = 'string'
    cdf.loc[1, 'concept_type'] = 'entity_domain'
    cdf.loc[2, 'concept_type'] = 'entity_domain'
    cdf.loc[3, 'concept_type'] = 'time'

    cdf.to_csv(os.path.join(out_path, 'ddf--concepts.csv'), index=False)

    # datapoints
    data_ = data[['Country Code', 'Item Code', 'Element', 'Year Code', 'Value']]
    gs = data_.groupby('Element').groups

    for k, idx in gs.items():
        cid = to_concept_id(k)
        df = data_.ix[idx].copy()
        df = df.drop('Element', axis=1)
        df.columns = ['country', 'item', 'year', cid]

        path = os.path.join(
            out_path, 'ddf--datapoints--{}--by--country--item--year.csv').format(cid)
        df.to_csv(path, index=False)

    get_datapackage(out_path, use_existing=True, to_disk=True)

    print('Done.')
Ejemplo n.º 4
0
    #
    #    concept_discrete = extract_concept_discrete(country, series)
    #    concept_discrete.to_csv(
    #        os.path.join(output_dir, 'ddf--concepts--discrete.csv'),
    #        index=False, encoding='utf8')
    #
    #    print('creating entities files...')
    #    entities_country = extract_entities_country(country, series)
    #    entities_country.to_csv(
    #        os.path.join(output_dir, 'ddf--entities--country.csv'),
    #        index=False, encoding='utf8')
    #
    #    print('creating datapoints...')
    #    datapoints = extract_datapoints_country_year(data)
    #    for k, v in datapoints.items():
    #        v[k] = pd.to_numeric(v[k])
    #        v.to_csv(
    #            os.path.join(output_dir,
    #                         'ddf--datapoints--'+k+'--by--country--year.csv'),
    #            index=False,
    #            encoding='utf8',
    #            # keep 10 digits. this is to avoid pandas
    #            # use scientific notation in the datapoints
    #            # and also keep precision. There are really
    #            # small/big numbers in this datset.
    #            float_format='%.10f'
    #        )

    print('generating datapackage file...')
    datapackage = get_datapackage(output_dir, to_disk=True)
# -*- coding: utf-8 -*-

from ddf_utils import chef
from ddf_utils.index import get_datapackage
import patch
import os
import json
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s -%(levelname)s- %(message)s',
                    datefmt="%H:%M:%S"
                    )

out_dir = '../../'
recipe_file = '../recipes/recipe_main.yaml'

if __name__ == '__main__':
    # removing old files
    for f in os.listdir(out_dir):
        if f.startswith("ddf--"):
            os.remove(os.path.join(out_dir, f))

    recipe = chef.build_recipe(recipe_file)
    res = chef.run_recipe(recipe, serve=True, outpath=out_dir)

    patch.do_all_changes()

    datapackage = get_datapackage(out_dir, use_existing=True, to_disk=True)

    print('Done.')
Ejemplo n.º 6
0

def copy_other_files():
    for f in os.listdir(source_path):
        if 'entities' in f or 'concepts' in f or f == 'datapackage.json':
            shutil.copy(os.path.join(source_path, f), out_path)


def apply_path_concepts():
    from ddf_utils.patch import apply_patch

    new_concepts = apply_patch(os.path.join(source_path, 'ddf--concepts.csv'),
                               './concept_patch.csv')
    new_concepts.to_csv(os.path.join(out_path, 'ddf--concepts.csv'),
                        index=False)


if __name__ == '__main__':
    datapoints_by_basomrade_gender()
    datapoints_by_municipality()
    entity_gender()
    copy_other_files()
    apply_path_concepts()
    get_datapackage(out_path, to_disk=True, use_existing=True)

    print('Done.')
    print(
        'Please manually edit the entries in datapackage.json for '
        'ddf--datapoints--indicators--by--gender--municipality--year.csv and '
        'ddf--datapoints--indicators--by--gender--municipality--year.csv')
    concept_discrete = extract_concept_discrete(country, series)
    concept_discrete.to_csv(
        os.path.join(output_dir, 'ddf--concepts--discrete.csv'),
        index=False, encoding='utf8')

    print('creating entities files...')
    entities_country = extract_entities_country(country, series)
    entities_country.to_csv(
        os.path.join(output_dir, 'ddf--entities--country.csv'),
        index=False, encoding='utf8')

    print('creating datapoints...')
    datapoints = extract_datapoints_country_year(data)
    for k, v in datapoints.items():
        v[k] = pd.to_numeric(v[k])
        v.to_csv(
            os.path.join(output_dir,
                         'ddf--datapoints--'+k+'--by--country--year.csv'),
            index=False,
            encoding='utf8',
            # keep 10 digits. this is to avoid pandas
            # use scientific notation in the datapoints
            # and also keep precision. There are really
            # small/big numbers in this datset.
            float_format='%.10f'
        )

    print('generating datapackage file...')
    datapackage = get_datapackage(output_dir, to_disk=True)
Ejemplo n.º 8
0
import os
import json
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s -%(levelname)s- %(message)s',
                    datefmt="%H:%M:%S"
                    )

out_dir = '../../'
recipe_file = '../recipes/recipe_main.yaml'

if __name__ == '__main__':
    # removing old files
    for f in os.listdir(out_dir):
        if f.startswith("ddf--"):
            os.remove(os.path.join(out_dir, f))

    recipe = chef.build_recipe(recipe_file)
    res = chef.run_recipe(recipe)
    print('saving result to disk...')
    chef.dish_to_csv(res, out_dir)

    patch.do_all_changes()

    # TODO: keep older datapacakge's basic info(author etc)
    datapackage = get_datapackage(out_dir)
    with open(os.path.join(out_dir, 'datapackage.json'), 'w', encoding='utf8') as f:
        json.dump(datapackage, f, indent=4, ensure_ascii=False)

    print('Done.')