Example #1
0
def economic_blocks():
    csv = s3.get('redshift/attrs/attrs_bloco_economico.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id','name','country_id'],
        converters={
            "country_id": str
        }
    )

    economic_blocks = {}

    for _, row in df.iterrows():

        if economic_blocks.get(row["id"]):
            economic_block = economic_blocks[row["id"]]
            economic_block["countries"].append(row["country_id"])
        else:
            economic_block = {
                'name_en': row["name"],
                'name_pt': row["name"],
                'countries': [
                    row["country_id"]
                ]
            }

        economic_blocks[row['id']] = economic_block
        redis.set('economic_block/' + str(row['id']), pickle.dumps(economic_block))

    s3.put('attrs_economic_block.json', json.dumps(economic_blocks, ensure_ascii=False))

    click.echo("Economic Blocks loaded.")
Example #2
0
def territories():
    csv = s3.get('redshift/attrs/attrs_territorios_de_desenvolvimento.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['territory','microterritory','municipy_id'],
        converters={
            "municipy_id": str
        }
    )

    territories = {}

    for _, row in df.iterrows():
        territory = {
            'territory': row["territory"],
            'microterritory': row["microterritory"],
            'municipy_id': row["municipy_id"]
        }

        territories[row['municipy_id']] = territory
        redis.set('territory/' + str(row['municipy_id']), pickle.dumps(territory))

    s3.put('attrs_territory.json', json.dumps(territories, ensure_ascii=False))

    click.echo("Territories loaded.")
Example #3
0
def attrs(attrs):
    for attr in attrs:
        click.echo('Loading %s ...' % attr['name'])
        csv = s3.get('redshift/attrs/%s' % attr['csv_filename'])
        df = pandas.read_csv(
            csv,
            sep=';',
            header=0,
            converters={
                'id': str
            },
            engine='c'
        )

        items = '{'

        for _, row in df.iterrows():
            item = {
                'id': row["id"],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            if items == '{':
                items = '{}\"{}\": {}'.format(items, row['id'], json.dumps(item, ensure_ascii=False))
            else:
                items = '{}, \"{}\": {}'.format(items, row['id'], json.dumps(item, ensure_ascii=False))

            redis.set(attr['name'] + '/' + str(row['id']), pickle.dumps(item))

        items = items + '}'

        s3.put('attrs_' + attr['name'] + '.json', items)

        click.echo(" loaded.")
Example #4
0
def states():
    csv = s3.get('redshift/attrs/attrs_uf_ibge_mdic.csv')
    df = pandas.read_csv(
            csv,
            sep=';',
            header=0,
            names=['mdic_name', 'mdic_id', 'ibge_id', 'uf'],
            converters={
                "ibge_id": str
            }
        )

    states = {}

    for _, row in df.iterrows():
        if not row['ibge_id']:
            continue

        state = {
            'id': row['ibge_id'],
            'name_pt': row["mdic_name"],
            'name_en': row["mdic_name"],
            'abbr_pt': row['uf'],
            'abbr_en': row['uf']
        }

        states[row['ibge_id']] = state
        redis.set('state/' + str(row['ibge_id']), pickle.dumps(state))

    s3.put('attrs_state.json', json.dumps(states, ensure_ascii=False))

    click.echo("States loaded.")
Example #5
0
def regions(upload):
    csv = s3.get('metadata/regions.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt', 'old_id'])

    regions = {}

    for _, row in df.iterrows():
        region = {
            'id': row['id'],
            'name_en': row["name_en"],
            'abbr_en': row['abbr_en'],
            'name_pt': row["name_pt"],
            'abbr_pt': row['abbr_pt'],
            'old_id': row['old_id'],
        }

        regions[row['id']] = region
        if upload != 'only_s3':
            redis.set('region/' + str(row['id']),
                      json.dumps(region, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('region.json', json.dumps(regions, ensure_ascii=False))

    click.echo("Regions loaded.")
Example #6
0
def economic_blocs(upload):
    csv = s3.get('metadata/economic_blocs.csv')
    df = pandas.read_csv(csv,
                         sep=';',
                         header=0,
                         names=['id', 'name', 'country_id'],
                         converters={"country_id": str})

    economic_blocs = {}

    for _, row in df.iterrows():

        if economic_blocs.get(row["id"]):
            economic_bloc = economic_blocs[row["id"]]
            economic_bloc["countries"].append(row["country_id"])
        else:
            economic_bloc = {
                'name_en': row["name"],
                'name_pt': row["name"],
                'countries': [row["country_id"]]
            }

        economic_blocs[row['id']] = economic_bloc
        if upload != 'only_s3':
            redis.set('economic_bloc/' + str(row['id']),
                      json.dumps(economic_bloc, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('economic_bloc.json',
               json.dumps(economic_blocs, ensure_ascii=False))

    click.echo("Economic Blocs loaded.")
def territories(upload):
    csv = s3.get('metadata/development_territories.csv')
    df = pandas.read_csv(csv,
                         sep=';',
                         header=0,
                         names=['territory', 'microterritory', 'municipy_id'],
                         converters={"municipy_id": str})

    territories = {}

    for _, row in df.iterrows():
        territory = {
            'territory': row["territory"],
            'microterritory': row["microterritory"],
            'municipy_id': row["municipy_id"]
        }

        territories[row['municipy_id']] = territory
        if upload != 'only_s3':
            redis.set('territory/' + str(row['municipy_id']),
                      json.dumps(territory, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('territory.json', json.dumps(territories, ensure_ascii=False))

    click.echo("Territories loaded.")
Example #8
0
def inflections(upload):
    csv = s3.get('metadata/inflections.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt', 'gender', 'plural']
    )

    inflections = {}

    for _, row in df.iterrows():
        inflection = {
            'id': row['id'],
            'name_en': row['name_en'],
            'name_pt': row['name_pt'],
            'gender': row['gender'],
            'plural': row['plural']
        }
        inflections[row['id']] = inflection

        if upload != 'only_s3':
            redis.set('inflection/' + str(row['id']), json.dumps(inflection, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('inflection.json', json.dumps(
            inflections, ensure_ascii=False))

    click.echo("Inflections loaded.")
def establishments(upload):
    csv = s3.get('metadata/cnes_final.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            'id': str,
        }
    )

    for _, row in df.iterrows():

        establishment = {
            'id': row['id'],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
        }

        if upload != 'only_s3':
            redis.set('establishment/' +
                  str(row['id']), json.dumps(establishment, ensure_ascii=False))

    click.echo("Establishment loaded.")
Example #10
0
def regions(upload):
    csv = s3.get('metadata/regions.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt', 'old_id']
    )

    regions = {}

    for _, row in df.iterrows():
        region = {
            'id': row['id'],
            'name_en': row["name_en"],
            'abbr_en': row['abbr_en'],
            'name_pt': row["name_pt"],
            'abbr_pt': row['abbr_pt'],
            'old_id': row['old_id'],
        }

        regions[row['id']] = region
        if upload != 'only_s3':
            redis.set('region/' + str(row['id']), json.dumps(region, ensure_ascii=False))
            
    if upload != 'only_redis':
        s3.put('region.json', json.dumps(regions, ensure_ascii=False))

    click.echo("Regions loaded.")
Example #11
0
def inflections():
    csv = s3.get('redshift/attrs/attrs_infleccoes.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id','name_en','name_pt','gender','plural']
    )

    inflections = {}

    for _, row in df.iterrows():
        inflection = {
            'id': row['id'],
            'name_en': row['name_en'],
            'name_pt': row['name_pt'],
            'gender': row['gender'],
            'plural': row['plural']
        }
        inflections[row['id']] = inflection
        redis.set('inflection/' + str(row['id']), pickle.dumps(inflection))

    s3.put('attrs_inflection.json', json.dumps(inflections, ensure_ascii=False))

    click.echo("Inflections loaded.")
def universities(upload):
    csv = s3.get('metadata/universities.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_pt', 'name_en', 'school_type'],
        converters={
            "id": str
        }
    )

    items = {}

    for _, row in df.iterrows():
        item = {
            'id': row["id"],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
            'school_type': row["school_type"],
        }

        items[row['id']] = item
        if upload != 'only_s3':
            redis.set('university/' + str(row['id']), json.dumps(item, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('university.json', json.dumps(items, ensure_ascii=False))

    click.echo("Universities loaded.")
Example #13
0
def regions():
    csv = s3.get('redshift/attrs/attrs_regioes.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt']
    )

    regions = {}

    for _, row in df.iterrows():
        region = {
            'id': row['id'],
            'name_en': row["name_en"],
            'abbr_en': row['abbr_en'],
            'name_pt': row["name_pt"],
            'abbr_pt': row['abbr_pt'],
        }

        regions[row['id']] = region
        redis.set('region/' + str(row['id']), pickle.dumps(region))

    s3.put('attrs_region.json', json.dumps(regions, ensure_ascii=False))

    click.echo("Regions loaded.")
Example #14
0
def continents():
    csv = s3.get('redshift/attrs/attrs_continente.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'country_id', 'name_en', 'name_pt'],
        converters={
            "country_id": lambda x: '%03d' % int(x)
        }
    )

    continents = {}

    for _, row in df.iterrows():

        if continents.get(row["id"]):
            continent = continents[row["id"]]
            continent["countries"].append(row["country_id"])
        else:
            continent = {
                'countries': [
                    row["country_id"]
                ],
                'name_en': row["name_en"],
                'name_pt': row["name_pt"]
            }

        continents[row['id']] = continent
        redis.set('continent/' + str(row['id']), pickle.dumps(continent))

    s3.put('attrs_continent.json', json.dumps(continents, ensure_ascii=False))

    click.echo("Continents loaded.")
Example #15
0
def attrs(attrs, upload):
    for attr in attrs:
        click.echo('Loading %s ...' % attr['name'])
        csv = s3.get('metadata/%s' % attr['csv_filename'])
        df = pandas.read_csv(
            csv,
            sep=';',
            header=0,
            converters={
                'id': str
            },
            engine='c'
        )

        items = {}

        for _, row in df.iterrows():
            item = {
                'id': row["id"],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            items[row['id']] = item
            if upload != 'only_s3':
                redis.set(attr['name'] + '/' + str(row['id']), json.dumps(item, ensure_ascii=False))

        if upload != 'only_redis':
            s3.put(attr['name'] + '.json', json.dumps(items, ensure_ascii=False))

        click.echo(" loaded.")
Example #16
0
def states(upload):
    csv = s3.get('metadata/uf_ibge_mdic.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['mdic_name', 'mdic_id', 'ibge_id', 'uf', 'old_id'],
        converters={
            "ibge_id": str
        }
    )

    states = {}

    for _, row in df.iterrows():
        if not row['ibge_id']:
            continue

        state = {
            'id': row['ibge_id'],
            'name_pt': row["mdic_name"],
            'name_en': row["mdic_name"],
            'abbr_pt': row['uf'],
            'abbr_en': row['uf'], 
            'old_id': row['old_id']
        }

        states[row['ibge_id']] = state
        if upload != 'only_s3':
            redis.set('state/' + str(row['ibge_id']), json.dumps(state, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('state.json', json.dumps(states, ensure_ascii=False))

    click.echo("States loaded.")
def occupations(upload):

    csv = s3.get('metadata/cbo.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    occupations_family = {}
    occupations_group = {}

    for _, row in df.iterrows():
        if len(row['id']) == 1:
            occupation_group = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            if upload != 'only_s3':
                redis.set('occupation_group/' +
                      str(row['id']), json.dumps(occupation_group, ensure_ascii=False))
            occupations_group[row['id']] = occupation_group

    for _, row in df.iterrows():
        if len(row['id']) == 4:
            occupation_family = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'occupation_group': occupations_group[row['id'][0]],
            }

            if upload != 'only_s3':
                redis.set('occupation_family/' +
                      str(row['id']), json.dumps(occupation_family, ensure_ascii=False))
            occupations_family[row['id']] = occupation_family

    if upload != 'only_redis':
        s3.put('occupation_family.json', json.dumps(
            occupations_family, ensure_ascii=False))

        s3.put('occupation_group.json', json.dumps(
            occupations_group, ensure_ascii=False))

    click.echo("Occupations loaded.")
Example #18
0
def countries(upload):
    csv = s3.get('metadata/continents.csv')
    df_continents = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'country_id', 'name_en', 'name_pt'],
        converters={
            "country_id": lambda x: '%03d' % int(x)
        }
    )

    continents = {}

    for _, row in df_continents.iterrows():
        continents[row['country_id']] = {
            'id': row["id"],
            'name_en': row["name_en"],
            'name_pt': row["name_pt"],
        }

    csv = s3.get('metadata/wld.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_pt', 'name_en', 'abbreviation'],
        converters={
            "id": str
        }
    )

    countries = {}

    for _, row in df.iterrows():
        country = {
            'id': row["id"],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
            'abbrv': row["abbreviation"],
            'continent': continents.get(row["id"], {})
        }

        countries[row['id']] = country
        if upload != 'only_s3':
            redis.set('country/' + str(row['id']), json.dumps(country, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('country.json', json.dumps(countries, ensure_ascii=False))

    click.echo("Countries loaded.")
Example #19
0
def hedu_course(upload):
    csv = s3.get('metadata/hedu_courses.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    hedu_courses = {}
    hedu_courses_field = {}

    for _, row in df.iterrows():
        if len(row['id']) == 2:
            hedu_course_field = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            if upload != 'only_s3':
                redis.set('hedu_course_field/' +
                      str(row['id']), json.dumps(hedu_course_field, ensure_ascii=False))
            hedu_courses_field[row['id']] = hedu_course_field

    for _, row in df.iterrows():
        if len(row['id']) == 6:
            hedu_course = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'hedu_course_field': hedu_courses_field[row['id'][:2]]
            }

            if upload != 'only_s3':
                redis.set('hedu_course/' +
                      str(row['id']), json.dumps(hedu_course, ensure_ascii=False))
            hedu_courses[row['id']] = hedu_course

    if upload != 'only_redis':
        s3.put('hedu_course.json', json.dumps(
            hedu_courses, ensure_ascii=False))

        s3.put('hedu_course_field.json', json.dumps(
            hedu_courses_field, ensure_ascii=False))

    click.echo("HEDU Courses loaded.")
Example #20
0
def sc_course(upload):
    csv = s3.get('metadata/sc_courses.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    sc_courses = {}
    sc_courses_field = {}

    for _, row in df.iterrows():

        if len(row['id']) == 2:
            sc_course_field = {
                'id': row["id"],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            if upload != 'only_s3':
                redis.set('sc_course_field/' +
                      str(row['id']), json.dumps(sc_course_field, ensure_ascii=False))
            sc_courses_field[row['id']] = sc_course_field

        elif len(row['id']) == 5:
            sc_course = {
                'id': row["id"],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            if upload != 'only_s3':
                redis.set('sc_course/' + str(row['id']), json.dumps(sc_course, ensure_ascii=False))
            sc_courses[row['id']] = sc_course

    if upload != 'only_redis':
        for course in sc_courses:
            sc_courses[course]["course_field"] = sc_courses_field[course[:2]]

        s3.put('sc_course.json', json.dumps(sc_courses, ensure_ascii=False))
        s3.put('sc_course_field.json', json.dumps(
            sc_courses_field, ensure_ascii=False))

    click.echo("SC Courses loaded.")
Example #21
0
def countries():
    csv = s3.get('redshift/attrs/attrs_continente.csv')
    df_continents = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'country_id', 'name_en', 'name_pt'],
        converters={
        "country_id": lambda x: '%03d' % int(x)
        }
    )

    continents = {}

    for _, row in df_continents.iterrows():
        continents[row['country_id']] =  {
            'id': row["id"],
            'name_en': row["name_en"],
            'name_pt': row["name_pt"],
        }

    csv = s3.get('redshift/attrs/attrs_wld.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_pt', 'name_en'],
        converters={
            "id": str
        }
    )

    countries = {}

    for _, row in df.iterrows():
        country = {
            'id': row["id"],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
            'continent': continents.get(row["id"], {})
        }

        countries[row['id']] = country
        redis.set('country/' + str(row['id']), pickle.dumps(country))

    s3.put('attrs_country.json', json.dumps(countries, ensure_ascii=False))

    click.echo("Countries loaded.")
Example #22
0
def hedu_course(upload):
    csv = s3.get('metadata/hedu_courses.csv')
    df = pandas.read_csv(csv,
                         sep=';',
                         header=0,
                         names=['id', 'name_en', 'name_pt'],
                         converters={"id": str})

    hedu_courses = {}
    hedu_courses_field = {}

    for _, row in df.iterrows():
        if len(row['id']) == 2:
            hedu_course_field = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            if upload != 'only_s3':
                redis.set('hedu_course_field/' + str(row['id']),
                          json.dumps(hedu_course_field, ensure_ascii=False))
            hedu_courses_field[row['id']] = hedu_course_field

    for _, row in df.iterrows():
        if len(row['id']) == 6:
            hedu_course = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'hedu_course_field': hedu_courses_field[row['id'][:2]]
            }

            if upload != 'only_s3':
                redis.set('hedu_course/' + str(row['id']),
                          json.dumps(hedu_course, ensure_ascii=False))
            hedu_courses[row['id']] = hedu_course

    if upload != 'only_redis':
        s3.put('hedu_course.json', json.dumps(hedu_courses,
                                              ensure_ascii=False))

        s3.put('hedu_course_field.json',
               json.dumps(hedu_courses_field, ensure_ascii=False))

    click.echo("HEDU Courses loaded.")
Example #23
0
def countries(upload):
    csv = s3.get('metadata/continents.csv')
    df_continents = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'country_id', 'name_en', 'name_pt'],
        converters={"country_id": lambda x: '%03d' % int(x)})

    continents = {}

    for _, row in df_continents.iterrows():
        continents[row['country_id']] = {
            'id': row["id"],
            'name_en': row["name_en"],
            'name_pt': row["name_pt"],
        }

    csv = s3.get('metadata/wld.csv')
    df = pandas.read_csv(csv,
                         sep=';',
                         header=0,
                         names=['id', 'name_pt', 'name_en', 'abbreviation'],
                         converters={"id": str})

    countries = {}

    for _, row in df.iterrows():
        country = {
            'id': row["id"],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
            'abbrv': row["abbreviation"],
            'continent': continents.get(row["id"], {})
        }

        countries[row['id']] = country
        if upload != 'only_s3':
            redis.set('country/' + str(row['id']),
                      json.dumps(country, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('country.json', json.dumps(countries, ensure_ascii=False))

    click.echo("Countries loaded.")
Example #24
0
def occupations():

    csv = s3.get('redshift/attrs/attrs_cbo.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id','name_en','name_pt'],
        converters={
            "id": str
        }
    )

    occupations_family = {}
    occupations_group = {}

    for _, row in df.iterrows():
        if len(row['id']) == 1:
            occupation_group = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            redis.set('occupation_group/' + str(row['id']), pickle.dumps(occupation_group))
            occupations_group[row['id']] = occupation_group

    for _, row in df.iterrows():
        if len(row['id']) == 4:
            occupation_family = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'occupation_group': occupations_group[row['id'][0]],
            }

            redis.set('occupation_family/' + str(row['id']), pickle.dumps(occupation_family))
            occupations_family[row['id']] = occupation_family

    s3.put('attrs_occupation_family.json', json.dumps(occupations_family, ensure_ascii=False))

    s3.put('attrs_occupation_group.json', json.dumps(occupations_group, ensure_ascii=False))

    click.echo("Occupations loaded.")
Example #25
0
def hedu_course():
    csv = s3.get('redshift/attrs/attrs_hedu_course.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    hedu_courses = {}
    hedu_courses_field = {}

    for _, row in df.iterrows():
        if len(row['id']) == 2:
            hedu_course_field = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            redis.set('hedu_course_field/' + str(row['id']), pickle.dumps(hedu_course_field))
            hedu_courses_field[row['id']] = hedu_course_field

    for _, row in df.iterrows():
        if len(row['id']) == 6:
            hedu_course = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'hedu_course_field': hedu_courses_field[row['id'][:2]]
            }

            redis.set('hedu_course/' + str(row['id']), pickle.dumps(hedu_course))
            hedu_courses[row['id']] = hedu_course

    s3.put('attrs_hedu_course.json', json.dumps(hedu_courses, ensure_ascii=False))

    s3.put('attrs_hedu_course_field.json', json.dumps(hedu_courses_field, ensure_ascii=False))

    click.echo("HEDU Courses loaded.")
Example #26
0
def ports(upload):
    csv = s3.get('metadata/ports.csv')
    df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name', 'state'])

    ports = {}

    for _, row in df.iterrows():
        port = {
            'name_pt': row["name"] + ' - ' + row["state"],
            'name_en': row["name"] + ' - ' + row["state"]
        }
        ports[row['id']] = port
        if upload != 'only_s3':
            redis.set('port/' + str(row['id']),
                      json.dumps(port, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('port.json', json.dumps(ports, ensure_ascii=False))

    click.echo("Ports loaded.")
Example #27
0
def sc_course():
    csv = s3.get('redshift/attrs/attrs_sc_course.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    sc_courses = {}
    sc_courses_field = {}

    for _, row in df.iterrows():

        if len(row['id']) == 2:
            sc_course_field = {
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            redis.set('sc_course_field/' + str(row['id']), pickle.dumps(sc_course_field))
            sc_courses_field[row['id']] = sc_course_field

        elif len(row['id']) == 5:
            sc_course = {
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            redis.set('sc_course/' + str(row['id']), pickle.dumps(sc_course))
            sc_courses[row['id']] = sc_course

    s3.put('attrs_sc_course.json', json.dumps(sc_courses, ensure_ascii=False))
    s3.put('attrs_sc_course_field.json', json.dumps(sc_courses_field, ensure_ascii=False))

    click.echo("SC Courses loaded.")
Example #28
0
def ports():

    csv = s3.get('redshift/attrs/attrs_porto.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id','name','state']
    )

    ports = {}

    for _, row in df.iterrows():
        port = {
            'name_pt': row["name"] + ' - ' + row["state"],
            'name_en': row["name"] + ' - ' + row["state"]
        }
        ports[row['id']] = port
        redis.set('port/' + str(row['id']), pickle.dumps(port))

    s3.put('attrs_port.json', json.dumps(ports, ensure_ascii=False))

    click.echo("Ports loaded.")
Example #29
0
def establishments():
    csv = s3.get('attrs/cnes_final.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            'id': str,
        }
    )

    for _, row in df.iterrows():

        establishment = {
            'id': row['id'],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
        }

        redis.set('establishment/' + str(row['id']), pickle.dumps(establishment))

    click.echo("Establishment loaded.")
Example #30
0
def establishments(upload):
    csv = s3.get('metadata/cnes_final.csv')
    df = pandas.read_csv(csv,
                         sep=';',
                         header=0,
                         names=['id', 'name_en', 'name_pt'],
                         converters={
                             'id': str,
                         })

    for _, row in df.iterrows():

        establishment = {
            'id': row['id'],
            'name_pt': row["name_pt"],
            'name_en': row["name_en"],
        }

        if upload != 'only_s3':
            redis.set('establishment/' + str(row['id']),
                      json.dumps(establishment, ensure_ascii=False))

    click.echo("Establishment loaded.")
Example #31
0
def continents(upload):
    csv = s3.get('metadata/continents.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id', 'country_id', 'name_en', 'name_pt'],
        converters={
            "country_id": lambda x: '%03d' % int(x)
        }
    )

    continents = {}

    for _, row in df.iterrows():

        if continents.get(row["id"]):
            continent = continents[row["id"]]
            continent["countries"].append(row["country_id"])
        else:
            continent = {
                'countries': [
                    row["country_id"]
                ],
                'name_en': row["name_en"],
                'name_pt': row["name_pt"]
            }

        continents[row['id']] = continent
        if upload != 'only_s3':
            redis.set('continent/' + str(row['id']), json.dumps(continent, ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('continent.json', json.dumps(continents, ensure_ascii=False))

    click.echo("Continents loaded.")
Example #32
0
def municipalities():
    csv = s3.get('redshift/attrs/attrs_municipios.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['uf_id', 'uf_name', 'mesorregiao_id', 'mesorregiao_name', 'microrregiao_id', 'microrregiao_name', 'municipio_id', 'municipio_name', 'municipio_id_mdic'],
        converters={
            "uf_id": str,
            "mesorregiao_id": str,
            "microrregiao_id": str,
            "municipio_id": str
        }
    )

    municipalities = {}
    microregions = {}
    mesoregions = {}

    for _, row in df.iterrows():
        municipality = {
            'id': row['municipio_id'],
            'name_pt': row["municipio_name"],
            'name_en': row["municipio_name"],
            'mesoregion': {
                'id': row["mesorregiao_id"],
                'name_pt': row["mesorregiao_name"],
                'name_en': row["mesorregiao_name"],
            },
            'microregion': {
                'id': row["microrregiao_id"],
                'name_pt': row["microrregiao_name"],
                'name_en': row["microrregiao_name"],
            },
            'state': pickle.loads(redis.get('state/' + row['municipio_id'][:2])),
            'region': pickle.loads(redis.get('region/' + row['municipio_id'][0])),
        }

        municipalities[row['municipio_id']] = municipality
        microregions[row['microrregiao_id']] = municipality['microregion']
        mesoregions[row['mesorregiao_id']] = municipality['mesoregion']

        redis.set('muLoadIndustriesnicipality/' + str(row['municipio_id']), pickle.dumps(municipality))
        redis.set('microregion/' + str(row['microrregiao_id']), pickle.dumps(municipality['microregion']))
        redis.set('mesoregion/' + str(row['mesorregiao_id']), pickle.dumps(municipality['mesoregion']))

    s3.put('attrs_municipality.json', json.dumps(municipalities, ensure_ascii=False))

    s3.put('attrs_microregion.json', json.dumps(microregions, ensure_ascii=False))

    s3.put('attrs_mesoregion.json', json.dumps(mesoregions, ensure_ascii=False))

    click.echo("Municipalities, microregions and mesoregions loaded.")
Example #33
0
def products():
    csv = s3.get('redshift/attrs/attrs_hs.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=['id','name_pt','name_en','profundidade_id','profundidade'],
        converters={
            "id": str
        }
    )

    products = {}
    product_sections = {}
    product_chapters = {}

    for _, row in df.iterrows():
        if row['profundidade'] == 'Seção':
            product_section_id = row['id']

            product_section = {
                'id': product_section_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            redis.set('product_section/' + str(product_section_id), pickle.dumps(product_section))
            product_sections[product_section_id] = product_section

        elif row['profundidade'] == 'Capítulo':
            product_chapter_id = row['id'][2:]

            product_chapter = {
                'id': product_chapter_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
            }

            redis.set('product_chapter/' + str(product_chapter_id), pickle.dumps(product_chapter))
            product_chapters[product_chapter_id] = product_chapter

    for _, row in df.iterrows():
        if row['profundidade'] == 'Posição':
            product_id = row['id'][2:]
            product_section_id = row["id"][:2]
            product_chapter_id = row["id"][2:4]

            product = {
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'product_section': product_sections[product_section_id],
                'product_chapter': product_chapters[product_chapter_id],
            }

            products[product_id] = product
            redis.set('product/' + str(product_id), pickle.dumps(product))

    s3.put('attrs_product.json', json.dumps(products, ensure_ascii=False))

    s3.put('attrs_product_section.json', json.dumps(product_sections, ensure_ascii=False))

    s3.put('attrs_product_chapter.json', json.dumps(product_chapters, ensure_ascii=False))

    click.echo("Products loaded.")
def municipalities(upload):
    csv = s3.get('metadata/municipalities.csv')
    df = pandas.read_csv(
        csv,
        sep=';',
        header=0,
        names=[
            'uf_id',
            'uf_name',
            'mesorregiao_id',
            'mesorregiao_name',
            'microrregiao_id',
            'microrregiao_name',
            'municipio_id',
            'municipio_name',
            'municipio_id_mdic',
            'municipio_old_id',
            'microrregiao_old_id',
            'mesorregiao_old_id',
            'state_old_id',
        ],
        converters={
            "uf_id": str,
            "mesorregiao_id": str,
            "microrregiao_id": str,
            "municipio_id": str
        }
    )

    municipalities = {}
    microregions = {}
    mesoregions = {}

    for _, row in df.iterrows():
        municipality = {
            'id': row['municipio_id'],
            'name_pt': row["municipio_name"],
            'name_en': row["municipio_name"],
            'old_id': row['municipio_old_id'],
            'mesoregion': {
                'id': row["mesorregiao_id"],
                'name_pt': row["mesorregiao_name"],
                'name_en': row["mesorregiao_name"],
                'old_id': row["mesorregiao_old_id"],
            },
            'microregion': {
                'id': row["microrregiao_id"],
                'name_pt': row["microrregiao_name"],
                'name_en': row["microrregiao_name"],
                'old_id': row["microrregiao_old_id"],
            },
            'state': json.loads(
                redis.get('state/' + row['municipio_id'][:2]).decode('utf-8')
            ),
            'region': json.loads(
                redis.get('region/' + row['municipio_id'][0]).decode('utf-8')
            ),
        }

        municipalities[row['municipio_id']] = municipality
        microregions[row['microrregiao_id']] = municipality['microregion']
        mesoregions[row['mesorregiao_id']] = municipality['mesoregion']

        if upload != 'only_s3':
            redis.set('municipality/' + str(row['municipio_id']), 
                  json.dumps(municipality, ensure_ascii=False))
            redis.set('microregion/' + str(row['microrregiao_id']),
                  json.dumps(municipality['microregion'], ensure_ascii=False))
            redis.set('mesoregion/' + str(row['mesorregiao_id']),
                  json.dumps(municipality['mesoregion'], ensure_ascii=False))

    if upload != 'only_redis':
        s3.put('municipality.json', json.dumps(
            municipalities, ensure_ascii=False))

        s3.put('microregion.json', json.dumps(
            microregions, ensure_ascii=False))

        s3.put('mesoregion.json', json.dumps(
            mesoregions, ensure_ascii=False))

    click.echo("Municipalities, microregions and mesoregions loaded.")
Example #35
0
def industries(upload):
    csv = s3.get('metadata/cnae.csv')
    df = pandas.read_csv(
        csv,
        sep=',',
        header=0,
        names=['id', 'name_en', 'name_pt'],
        converters={
            "id": str
        }
    )

    industry_sections = {}
    industry_divisions = {}
    industry_classes = {}

    industry_classes['-1'] = {
        'name_pt': 'Não definido',
        'name_en': 'Undefined'
    }

    industry_sections['0'] = {
        'name_pt': 'Não definido',
        'name_en': 'Undefined'
    }

    for _, row in df.iterrows():
        if len(row['id']) == 1:
            industry_section = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            if upload != 'only_s3':
                redis.set('industry_section/' +
                      str(row['id']), json.dumps(industry_section, ensure_ascii=False))
            industry_sections[row['id']] = industry_section

    for _, row in df.iterrows():
        if len(row['id']) == 3:
            division_id = row['id'][1:3]

            industry_division = {
                'id': division_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'industry_section': row["id"][0]
            }

            if upload != 'only_s3':
                redis.set('industry_division/' + str(division_id),
                      json.dumps(industry_division, ensure_ascii=False))
            industry_divisions[division_id] = industry_division

    for _, row in df.iterrows():
        if len(row['id']) == 6:
            class_id = row["id"][1:]

            industry_classe = {
                'id': class_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'industry_section': industry_sections[row["id"][0]],
                'industry_division': industry_divisions[row["id"][1:3]]
            }

            if upload != 'only_s3':
                redis.set('industry_class/' + str(class_id),
                      json.dumps(industry_classe, ensure_ascii=False))
            industry_classes[class_id] = industry_classe

    if upload != 'only_redis':
        s3.put('industry_class.json', json.dumps(
            industry_classes, ensure_ascii=False))

        s3.put('industry_division.json', json.dumps(
            industry_divisions, ensure_ascii=False))

        s3.put('industry_section.json', json.dumps(
            industry_sections, ensure_ascii=False))

    click.echo("Industries loaded.")
Example #36
0
def industries():
    csv = s3.get('redshift/attrs/attrs_cnae.csv')
    df = pandas.read_csv(
        csv,
        sep=',',
        header=0,
        names=['id','name_en','name_pt'],
        converters={
            "id": str
        }
    )

    industry_sections = {}
    industry_divisions = {}
    industry_classes = {}

    industry_classes['-1'] = {
        'name_pt': 'Não definido',
        'name_en': 'Undefined'
    }

    industry_sections['0'] = {
        'name_pt': 'Não definido',
        'name_en': 'Undefined'
    }

    for _, row in df.iterrows():
        if len(row['id']) == 1:
            industry_section = {
                'id': row['id'],
                'name_pt': row["name_pt"],
                'name_en': row["name_en"]
            }

            redis.set('industry_section/' + str(row['id']), pickle.dumps(industry_section))
            industry_sections[row['id']] = industry_section

    for _, row in df.iterrows():
        if len(row['id']) == 3:
            division_id = row['id'][1:3]

            industry_division = {
                'id': division_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'industry_section': row["id"][0]
            }


            redis.set('industry_division/' + str(division_id), pickle.dumps(industry_division))
            industry_divisions[division_id] = industry_division

    for _, row in df.iterrows():
        if len(row['id']) == 6:
            class_id = row["id"][1:]

            industry_classe = {
                'id': class_id,
                'name_pt': row["name_pt"],
                'name_en': row["name_en"],
                'industry_section': industry_sections[row["id"][0]],
                'industry_division': industry_divisions[row["id"][1:3]]
            }

            redis.set('industry_class/' + str(class_id), pickle.dumps(industry_classe))
            industry_classes[class_id] = industry_classe

    s3.put('attrs_industry_class.json', json.dumps(industry_classes, ensure_ascii=False))

    s3.put('attrs_industry_division.json', json.dumps(industry_divisions, ensure_ascii=False))

    s3.put('attrs_industry_section.json', json.dumps(industry_sections, ensure_ascii=False))

    click.echo("Industries loaded.")