def countries(upload): csv = s3.get('metadata/continents.csv') df_continents = pandas.read_csv( csv, sep=';', header=0, names=['id', 'country_id', 'name_en', 'name_pt'], converters={ "country_id": lambda x: '%03d' % int(x) } ) continents = {} for _, row in df_continents.iterrows(): continents[row['country_id']] = { 'id': row["id"], 'name_en': row["name_en"], 'name_pt': row["name_pt"], } csv = s3.get('metadata/wld.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_pt', 'name_en', 'abbreviation'], converters={ "id": str } ) countries = {} for _, row in df.iterrows(): country = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'abbrv': row["abbreviation"], 'continent': continents.get(row["id"], {}) } countries[row['id']] = country if upload != 'only_s3': redis.set('country/' + str(row['id']), json.dumps(country, ensure_ascii=False)) if upload != 'only_redis': s3.put('country.json', json.dumps(countries, ensure_ascii=False)) click.echo("Countries loaded.")
def countries(): csv = s3.get('redshift/attrs/attrs_continente.csv') df_continents = pandas.read_csv( csv, sep=';', header=0, names=['id', 'country_id', 'name_en', 'name_pt'], converters={ "country_id": lambda x: '%03d' % int(x) } ) continents = {} for _, row in df_continents.iterrows(): continents[row['country_id']] = { 'id': row["id"], 'name_en': row["name_en"], 'name_pt': row["name_pt"], } csv = s3.get('redshift/attrs/attrs_wld.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_pt', 'name_en'], converters={ "id": str } ) countries = {} for _, row in df.iterrows(): country = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'continent': continents.get(row["id"], {}) } countries[row['id']] = country redis.set('country/' + str(row['id']), pickle.dumps(country)) s3.put('attrs_country.json', json.dumps(countries, ensure_ascii=False)) click.echo("Countries loaded.")
def territories(upload): csv = s3.get('metadata/development_territories.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['territory', 'microterritory', 'municipy_id'], converters={"municipy_id": str}) territories = {} for _, row in df.iterrows(): territory = { 'territory': row["territory"], 'microterritory': row["microterritory"], 'municipy_id': row["municipy_id"] } territories[row['municipy_id']] = territory if upload != 'only_s3': redis.set('territory/' + str(row['municipy_id']), json.dumps(territory, ensure_ascii=False)) if upload != 'only_redis': s3.put('territory.json', json.dumps(territories, ensure_ascii=False)) click.echo("Territories loaded.")
def establishments(upload): csv = s3.get('metadata/cnes_final.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ 'id': str, } ) for _, row in df.iterrows(): establishment = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } if upload != 'only_s3': redis.set('establishment/' + str(row['id']), json.dumps(establishment, ensure_ascii=False)) click.echo("Establishment loaded.")
def regions(upload): csv = s3.get('metadata/regions.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt', 'old_id']) regions = {} for _, row in df.iterrows(): region = { 'id': row['id'], 'name_en': row["name_en"], 'abbr_en': row['abbr_en'], 'name_pt': row["name_pt"], 'abbr_pt': row['abbr_pt'], 'old_id': row['old_id'], } regions[row['id']] = region if upload != 'only_s3': redis.set('region/' + str(row['id']), json.dumps(region, ensure_ascii=False)) if upload != 'only_redis': s3.put('region.json', json.dumps(regions, ensure_ascii=False)) click.echo("Regions loaded.")
def regions(): csv = s3.get('redshift/attrs/attrs_regioes.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt'] ) regions = {} for _, row in df.iterrows(): region = { 'id': row['id'], 'name_en': row["name_en"], 'abbr_en': row['abbr_en'], 'name_pt': row["name_pt"], 'abbr_pt': row['abbr_pt'], } regions[row['id']] = region redis.set('region/' + str(row['id']), pickle.dumps(region)) s3.put('attrs_region.json', json.dumps(regions, ensure_ascii=False)) click.echo("Regions loaded.")
def continents(): csv = s3.get('redshift/attrs/attrs_continente.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'country_id', 'name_en', 'name_pt'], converters={ "country_id": lambda x: '%03d' % int(x) } ) continents = {} for _, row in df.iterrows(): if continents.get(row["id"]): continent = continents[row["id"]] continent["countries"].append(row["country_id"]) else: continent = { 'countries': [ row["country_id"] ], 'name_en': row["name_en"], 'name_pt': row["name_pt"] } continents[row['id']] = continent redis.set('continent/' + str(row['id']), pickle.dumps(continent)) s3.put('attrs_continent.json', json.dumps(continents, ensure_ascii=False)) click.echo("Continents loaded.")
def territories(): csv = s3.get('redshift/attrs/attrs_territorios_de_desenvolvimento.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['territory','microterritory','municipy_id'], converters={ "municipy_id": str } ) territories = {} for _, row in df.iterrows(): territory = { 'territory': row["territory"], 'microterritory': row["microterritory"], 'municipy_id': row["municipy_id"] } territories[row['municipy_id']] = territory redis.set('territory/' + str(row['municipy_id']), pickle.dumps(territory)) s3.put('attrs_territory.json', json.dumps(territories, ensure_ascii=False)) click.echo("Territories loaded.")
def states(): csv = s3.get('redshift/attrs/attrs_uf_ibge_mdic.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['mdic_name', 'mdic_id', 'ibge_id', 'uf'], converters={ "ibge_id": str } ) states = {} for _, row in df.iterrows(): if not row['ibge_id']: continue state = { 'id': row['ibge_id'], 'name_pt': row["mdic_name"], 'name_en': row["mdic_name"], 'abbr_pt': row['uf'], 'abbr_en': row['uf'] } states[row['ibge_id']] = state redis.set('state/' + str(row['ibge_id']), pickle.dumps(state)) s3.put('attrs_state.json', json.dumps(states, ensure_ascii=False)) click.echo("States loaded.")
def universities(upload): csv = s3.get('metadata/universities.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_pt', 'name_en', 'school_type'], converters={ "id": str } ) items = {} for _, row in df.iterrows(): item = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'school_type': row["school_type"], } items[row['id']] = item if upload != 'only_s3': redis.set('university/' + str(row['id']), json.dumps(item, ensure_ascii=False)) if upload != 'only_redis': s3.put('university.json', json.dumps(items, ensure_ascii=False)) click.echo("Universities loaded.")
def attrs(attrs): for attr in attrs: click.echo('Loading %s ...' % attr['name']) csv = s3.get('redshift/attrs/%s' % attr['csv_filename']) df = pandas.read_csv( csv, sep=';', header=0, converters={ 'id': str }, engine='c' ) items = '{' for _, row in df.iterrows(): item = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } if items == '{': items = '{}\"{}\": {}'.format(items, row['id'], json.dumps(item, ensure_ascii=False)) else: items = '{}, \"{}\": {}'.format(items, row['id'], json.dumps(item, ensure_ascii=False)) redis.set(attr['name'] + '/' + str(row['id']), pickle.dumps(item)) items = items + '}' s3.put('attrs_' + attr['name'] + '.json', items) click.echo(" loaded.")
def inflections(): csv = s3.get('redshift/attrs/attrs_infleccoes.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id','name_en','name_pt','gender','plural'] ) inflections = {} for _, row in df.iterrows(): inflection = { 'id': row['id'], 'name_en': row['name_en'], 'name_pt': row['name_pt'], 'gender': row['gender'], 'plural': row['plural'] } inflections[row['id']] = inflection redis.set('inflection/' + str(row['id']), pickle.dumps(inflection)) s3.put('attrs_inflection.json', json.dumps(inflections, ensure_ascii=False)) click.echo("Inflections loaded.")
def attrs(attrs, upload): for attr in attrs: click.echo('Loading %s ...' % attr['name']) csv = s3.get('metadata/%s' % attr['csv_filename']) df = pandas.read_csv( csv, sep=';', header=0, converters={ 'id': str }, engine='c' ) items = {} for _, row in df.iterrows(): item = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } items[row['id']] = item if upload != 'only_s3': redis.set(attr['name'] + '/' + str(row['id']), json.dumps(item, ensure_ascii=False)) if upload != 'only_redis': s3.put(attr['name'] + '.json', json.dumps(items, ensure_ascii=False)) click.echo(" loaded.")
def states(upload): csv = s3.get('metadata/uf_ibge_mdic.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['mdic_name', 'mdic_id', 'ibge_id', 'uf', 'old_id'], converters={ "ibge_id": str } ) states = {} for _, row in df.iterrows(): if not row['ibge_id']: continue state = { 'id': row['ibge_id'], 'name_pt': row["mdic_name"], 'name_en': row["mdic_name"], 'abbr_pt': row['uf'], 'abbr_en': row['uf'], 'old_id': row['old_id'] } states[row['ibge_id']] = state if upload != 'only_s3': redis.set('state/' + str(row['ibge_id']), json.dumps(state, ensure_ascii=False)) if upload != 'only_redis': s3.put('state.json', json.dumps(states, ensure_ascii=False)) click.echo("States loaded.")
def regions(upload): csv = s3.get('metadata/regions.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'abbr_en', 'name_pt', 'abbr_pt', 'old_id'] ) regions = {} for _, row in df.iterrows(): region = { 'id': row['id'], 'name_en': row["name_en"], 'abbr_en': row['abbr_en'], 'name_pt': row["name_pt"], 'abbr_pt': row['abbr_pt'], 'old_id': row['old_id'], } regions[row['id']] = region if upload != 'only_s3': redis.set('region/' + str(row['id']), json.dumps(region, ensure_ascii=False)) if upload != 'only_redis': s3.put('region.json', json.dumps(regions, ensure_ascii=False)) click.echo("Regions loaded.")
def economic_blocs(upload): csv = s3.get('metadata/economic_blocs.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name', 'country_id'], converters={"country_id": str}) economic_blocs = {} for _, row in df.iterrows(): if economic_blocs.get(row["id"]): economic_bloc = economic_blocs[row["id"]] economic_bloc["countries"].append(row["country_id"]) else: economic_bloc = { 'name_en': row["name"], 'name_pt': row["name"], 'countries': [row["country_id"]] } economic_blocs[row['id']] = economic_bloc if upload != 'only_s3': redis.set('economic_bloc/' + str(row['id']), json.dumps(economic_bloc, ensure_ascii=False)) if upload != 'only_redis': s3.put('economic_bloc.json', json.dumps(economic_blocs, ensure_ascii=False)) click.echo("Economic Blocs loaded.")
def inflections(upload): csv = s3.get('metadata/inflections.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt', 'gender', 'plural'] ) inflections = {} for _, row in df.iterrows(): inflection = { 'id': row['id'], 'name_en': row['name_en'], 'name_pt': row['name_pt'], 'gender': row['gender'], 'plural': row['plural'] } inflections[row['id']] = inflection if upload != 'only_s3': redis.set('inflection/' + str(row['id']), json.dumps(inflection, ensure_ascii=False)) if upload != 'only_redis': s3.put('inflection.json', json.dumps( inflections, ensure_ascii=False)) click.echo("Inflections loaded.")
def economic_blocks(): csv = s3.get('redshift/attrs/attrs_bloco_economico.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id','name','country_id'], converters={ "country_id": str } ) economic_blocks = {} for _, row in df.iterrows(): if economic_blocks.get(row["id"]): economic_block = economic_blocks[row["id"]] economic_block["countries"].append(row["country_id"]) else: economic_block = { 'name_en': row["name"], 'name_pt': row["name"], 'countries': [ row["country_id"] ] } economic_blocks[row['id']] = economic_block redis.set('economic_block/' + str(row['id']), pickle.dumps(economic_block)) s3.put('attrs_economic_block.json', json.dumps(economic_blocks, ensure_ascii=False)) click.echo("Economic Blocks loaded.")
def countries(upload): csv = s3.get('metadata/continents.csv') df_continents = pandas.read_csv( csv, sep=';', header=0, names=['id', 'country_id', 'name_en', 'name_pt'], converters={"country_id": lambda x: '%03d' % int(x)}) continents = {} for _, row in df_continents.iterrows(): continents[row['country_id']] = { 'id': row["id"], 'name_en': row["name_en"], 'name_pt': row["name_pt"], } csv = s3.get('metadata/wld.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name_pt', 'name_en', 'abbreviation'], converters={"id": str}) countries = {} for _, row in df.iterrows(): country = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'abbrv': row["abbreviation"], 'continent': continents.get(row["id"], {}) } countries[row['id']] = country if upload != 'only_s3': redis.set('country/' + str(row['id']), json.dumps(country, ensure_ascii=False)) if upload != 'only_redis': s3.put('country.json', json.dumps(countries, ensure_ascii=False)) click.echo("Countries loaded.")
def municipalities(): csv = s3.get('redshift/attrs/attrs_municipios.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['uf_id', 'uf_name', 'mesorregiao_id', 'mesorregiao_name', 'microrregiao_id', 'microrregiao_name', 'municipio_id', 'municipio_name', 'municipio_id_mdic'], converters={ "uf_id": str, "mesorregiao_id": str, "microrregiao_id": str, "municipio_id": str } ) municipalities = {} microregions = {} mesoregions = {} for _, row in df.iterrows(): municipality = { 'id': row['municipio_id'], 'name_pt': row["municipio_name"], 'name_en': row["municipio_name"], 'mesoregion': { 'id': row["mesorregiao_id"], 'name_pt': row["mesorregiao_name"], 'name_en': row["mesorregiao_name"], }, 'microregion': { 'id': row["microrregiao_id"], 'name_pt': row["microrregiao_name"], 'name_en': row["microrregiao_name"], }, 'state': pickle.loads(redis.get('state/' + row['municipio_id'][:2])), 'region': pickle.loads(redis.get('region/' + row['municipio_id'][0])), } municipalities[row['municipio_id']] = municipality microregions[row['microrregiao_id']] = municipality['microregion'] mesoregions[row['mesorregiao_id']] = municipality['mesoregion'] redis.set('muLoadIndustriesnicipality/' + str(row['municipio_id']), pickle.dumps(municipality)) redis.set('microregion/' + str(row['microrregiao_id']), pickle.dumps(municipality['microregion'])) redis.set('mesoregion/' + str(row['mesorregiao_id']), pickle.dumps(municipality['mesoregion'])) s3.put('attrs_municipality.json', json.dumps(municipalities, ensure_ascii=False)) s3.put('attrs_microregion.json', json.dumps(microregions, ensure_ascii=False)) s3.put('attrs_mesoregion.json', json.dumps(mesoregions, ensure_ascii=False)) click.echo("Municipalities, microregions and mesoregions loaded.")
def occupations(upload): csv = s3.get('metadata/cbo.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) occupations_family = {} occupations_group = {} for _, row in df.iterrows(): if len(row['id']) == 1: occupation_group = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } if upload != 'only_s3': redis.set('occupation_group/' + str(row['id']), json.dumps(occupation_group, ensure_ascii=False)) occupations_group[row['id']] = occupation_group for _, row in df.iterrows(): if len(row['id']) == 4: occupation_family = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'occupation_group': occupations_group[row['id'][0]], } if upload != 'only_s3': redis.set('occupation_family/' + str(row['id']), json.dumps(occupation_family, ensure_ascii=False)) occupations_family[row['id']] = occupation_family if upload != 'only_redis': s3.put('occupation_family.json', json.dumps( occupations_family, ensure_ascii=False)) s3.put('occupation_group.json', json.dumps( occupations_group, ensure_ascii=False)) click.echo("Occupations loaded.")
def hedu_course(upload): csv = s3.get('metadata/hedu_courses.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) hedu_courses = {} hedu_courses_field = {} for _, row in df.iterrows(): if len(row['id']) == 2: hedu_course_field = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } if upload != 'only_s3': redis.set('hedu_course_field/' + str(row['id']), json.dumps(hedu_course_field, ensure_ascii=False)) hedu_courses_field[row['id']] = hedu_course_field for _, row in df.iterrows(): if len(row['id']) == 6: hedu_course = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'hedu_course_field': hedu_courses_field[row['id'][:2]] } if upload != 'only_s3': redis.set('hedu_course/' + str(row['id']), json.dumps(hedu_course, ensure_ascii=False)) hedu_courses[row['id']] = hedu_course if upload != 'only_redis': s3.put('hedu_course.json', json.dumps( hedu_courses, ensure_ascii=False)) s3.put('hedu_course_field.json', json.dumps( hedu_courses_field, ensure_ascii=False)) click.echo("HEDU Courses loaded.")
def sc_course(upload): csv = s3.get('metadata/sc_courses.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) sc_courses = {} sc_courses_field = {} for _, row in df.iterrows(): if len(row['id']) == 2: sc_course_field = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } if upload != 'only_s3': redis.set('sc_course_field/' + str(row['id']), json.dumps(sc_course_field, ensure_ascii=False)) sc_courses_field[row['id']] = sc_course_field elif len(row['id']) == 5: sc_course = { 'id': row["id"], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } if upload != 'only_s3': redis.set('sc_course/' + str(row['id']), json.dumps(sc_course, ensure_ascii=False)) sc_courses[row['id']] = sc_course if upload != 'only_redis': for course in sc_courses: sc_courses[course]["course_field"] = sc_courses_field[course[:2]] s3.put('sc_course.json', json.dumps(sc_courses, ensure_ascii=False)) s3.put('sc_course_field.json', json.dumps( sc_courses_field, ensure_ascii=False)) click.echo("SC Courses loaded.")
def hedu_course(upload): csv = s3.get('metadata/hedu_courses.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={"id": str}) hedu_courses = {} hedu_courses_field = {} for _, row in df.iterrows(): if len(row['id']) == 2: hedu_course_field = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } if upload != 'only_s3': redis.set('hedu_course_field/' + str(row['id']), json.dumps(hedu_course_field, ensure_ascii=False)) hedu_courses_field[row['id']] = hedu_course_field for _, row in df.iterrows(): if len(row['id']) == 6: hedu_course = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'hedu_course_field': hedu_courses_field[row['id'][:2]] } if upload != 'only_s3': redis.set('hedu_course/' + str(row['id']), json.dumps(hedu_course, ensure_ascii=False)) hedu_courses[row['id']] = hedu_course if upload != 'only_redis': s3.put('hedu_course.json', json.dumps(hedu_courses, ensure_ascii=False)) s3.put('hedu_course_field.json', json.dumps(hedu_courses_field, ensure_ascii=False)) click.echo("HEDU Courses loaded.")
def occupations(): csv = s3.get('redshift/attrs/attrs_cbo.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id','name_en','name_pt'], converters={ "id": str } ) occupations_family = {} occupations_group = {} for _, row in df.iterrows(): if len(row['id']) == 1: occupation_group = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } redis.set('occupation_group/' + str(row['id']), pickle.dumps(occupation_group)) occupations_group[row['id']] = occupation_group for _, row in df.iterrows(): if len(row['id']) == 4: occupation_family = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'occupation_group': occupations_group[row['id'][0]], } redis.set('occupation_family/' + str(row['id']), pickle.dumps(occupation_family)) occupations_family[row['id']] = occupation_family s3.put('attrs_occupation_family.json', json.dumps(occupations_family, ensure_ascii=False)) s3.put('attrs_occupation_group.json', json.dumps(occupations_group, ensure_ascii=False)) click.echo("Occupations loaded.")
def hedu_course(): csv = s3.get('redshift/attrs/attrs_hedu_course.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) hedu_courses = {} hedu_courses_field = {} for _, row in df.iterrows(): if len(row['id']) == 2: hedu_course_field = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } redis.set('hedu_course_field/' + str(row['id']), pickle.dumps(hedu_course_field)) hedu_courses_field[row['id']] = hedu_course_field for _, row in df.iterrows(): if len(row['id']) == 6: hedu_course = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'hedu_course_field': hedu_courses_field[row['id'][:2]] } redis.set('hedu_course/' + str(row['id']), pickle.dumps(hedu_course)) hedu_courses[row['id']] = hedu_course s3.put('attrs_hedu_course.json', json.dumps(hedu_courses, ensure_ascii=False)) s3.put('attrs_hedu_course_field.json', json.dumps(hedu_courses_field, ensure_ascii=False)) click.echo("HEDU Courses loaded.")
def ports(upload): csv = s3.get('metadata/ports.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name', 'state']) ports = {} for _, row in df.iterrows(): port = { 'name_pt': row["name"] + ' - ' + row["state"], 'name_en': row["name"] + ' - ' + row["state"] } ports[row['id']] = port if upload != 'only_s3': redis.set('port/' + str(row['id']), json.dumps(port, ensure_ascii=False)) if upload != 'only_redis': s3.put('port.json', json.dumps(ports, ensure_ascii=False)) click.echo("Ports loaded.")
def sc_course(): csv = s3.get('redshift/attrs/attrs_sc_course.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) sc_courses = {} sc_courses_field = {} for _, row in df.iterrows(): if len(row['id']) == 2: sc_course_field = { 'name_pt': row["name_pt"], 'name_en': row["name_en"] } redis.set('sc_course_field/' + str(row['id']), pickle.dumps(sc_course_field)) sc_courses_field[row['id']] = sc_course_field elif len(row['id']) == 5: sc_course = { 'name_pt': row["name_pt"], 'name_en': row["name_en"] } redis.set('sc_course/' + str(row['id']), pickle.dumps(sc_course)) sc_courses[row['id']] = sc_course s3.put('attrs_sc_course.json', json.dumps(sc_courses, ensure_ascii=False)) s3.put('attrs_sc_course_field.json', json.dumps(sc_courses_field, ensure_ascii=False)) click.echo("SC Courses loaded.")
def establishments(upload): csv = s3.get('metadata/cnes_final.csv') df = pandas.read_csv(csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ 'id': str, }) for _, row in df.iterrows(): establishment = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } if upload != 'only_s3': redis.set('establishment/' + str(row['id']), json.dumps(establishment, ensure_ascii=False)) click.echo("Establishment loaded.")
def establishments(): csv = s3.get('attrs/cnes_final.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'name_en', 'name_pt'], converters={ 'id': str, } ) for _, row in df.iterrows(): establishment = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"], } redis.set('establishment/' + str(row['id']), pickle.dumps(establishment)) click.echo("Establishment loaded.")
def ports(): csv = s3.get('redshift/attrs/attrs_porto.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id','name','state'] ) ports = {} for _, row in df.iterrows(): port = { 'name_pt': row["name"] + ' - ' + row["state"], 'name_en': row["name"] + ' - ' + row["state"] } ports[row['id']] = port redis.set('port/' + str(row['id']), pickle.dumps(port)) s3.put('attrs_port.json', json.dumps(ports, ensure_ascii=False)) click.echo("Ports loaded.")
def continents(upload): csv = s3.get('metadata/continents.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id', 'country_id', 'name_en', 'name_pt'], converters={ "country_id": lambda x: '%03d' % int(x) } ) continents = {} for _, row in df.iterrows(): if continents.get(row["id"]): continent = continents[row["id"]] continent["countries"].append(row["country_id"]) else: continent = { 'countries': [ row["country_id"] ], 'name_en': row["name_en"], 'name_pt': row["name_pt"] } continents[row['id']] = continent if upload != 'only_s3': redis.set('continent/' + str(row['id']), json.dumps(continent, ensure_ascii=False)) if upload != 'only_redis': s3.put('continent.json', json.dumps(continents, ensure_ascii=False)) click.echo("Continents loaded.")
def municipalities(upload): csv = s3.get('metadata/municipalities.csv') df = pandas.read_csv( csv, sep=';', header=0, names=[ 'uf_id', 'uf_name', 'mesorregiao_id', 'mesorregiao_name', 'microrregiao_id', 'microrregiao_name', 'municipio_id', 'municipio_name', 'municipio_id_mdic', 'municipio_old_id', 'microrregiao_old_id', 'mesorregiao_old_id', 'state_old_id', ], converters={ "uf_id": str, "mesorregiao_id": str, "microrregiao_id": str, "municipio_id": str } ) municipalities = {} microregions = {} mesoregions = {} for _, row in df.iterrows(): municipality = { 'id': row['municipio_id'], 'name_pt': row["municipio_name"], 'name_en': row["municipio_name"], 'old_id': row['municipio_old_id'], 'mesoregion': { 'id': row["mesorregiao_id"], 'name_pt': row["mesorregiao_name"], 'name_en': row["mesorregiao_name"], 'old_id': row["mesorregiao_old_id"], }, 'microregion': { 'id': row["microrregiao_id"], 'name_pt': row["microrregiao_name"], 'name_en': row["microrregiao_name"], 'old_id': row["microrregiao_old_id"], }, 'state': json.loads( redis.get('state/' + row['municipio_id'][:2]).decode('utf-8') ), 'region': json.loads( redis.get('region/' + row['municipio_id'][0]).decode('utf-8') ), } municipalities[row['municipio_id']] = municipality microregions[row['microrregiao_id']] = municipality['microregion'] mesoregions[row['mesorregiao_id']] = municipality['mesoregion'] if upload != 'only_s3': redis.set('municipality/' + str(row['municipio_id']), json.dumps(municipality, ensure_ascii=False)) redis.set('microregion/' + str(row['microrregiao_id']), json.dumps(municipality['microregion'], ensure_ascii=False)) redis.set('mesoregion/' + str(row['mesorregiao_id']), json.dumps(municipality['mesoregion'], ensure_ascii=False)) if upload != 'only_redis': s3.put('municipality.json', json.dumps( municipalities, ensure_ascii=False)) s3.put('microregion.json', json.dumps( microregions, ensure_ascii=False)) s3.put('mesoregion.json', json.dumps( mesoregions, ensure_ascii=False)) click.echo("Municipalities, microregions and mesoregions loaded.")
def industries(upload): csv = s3.get('metadata/cnae.csv') df = pandas.read_csv( csv, sep=',', header=0, names=['id', 'name_en', 'name_pt'], converters={ "id": str } ) industry_sections = {} industry_divisions = {} industry_classes = {} industry_classes['-1'] = { 'name_pt': 'Não definido', 'name_en': 'Undefined' } industry_sections['0'] = { 'name_pt': 'Não definido', 'name_en': 'Undefined' } for _, row in df.iterrows(): if len(row['id']) == 1: industry_section = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } if upload != 'only_s3': redis.set('industry_section/' + str(row['id']), json.dumps(industry_section, ensure_ascii=False)) industry_sections[row['id']] = industry_section for _, row in df.iterrows(): if len(row['id']) == 3: division_id = row['id'][1:3] industry_division = { 'id': division_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'industry_section': row["id"][0] } if upload != 'only_s3': redis.set('industry_division/' + str(division_id), json.dumps(industry_division, ensure_ascii=False)) industry_divisions[division_id] = industry_division for _, row in df.iterrows(): if len(row['id']) == 6: class_id = row["id"][1:] industry_classe = { 'id': class_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'industry_section': industry_sections[row["id"][0]], 'industry_division': industry_divisions[row["id"][1:3]] } if upload != 'only_s3': redis.set('industry_class/' + str(class_id), json.dumps(industry_classe, ensure_ascii=False)) industry_classes[class_id] = industry_classe if upload != 'only_redis': s3.put('industry_class.json', json.dumps( industry_classes, ensure_ascii=False)) s3.put('industry_division.json', json.dumps( industry_divisions, ensure_ascii=False)) s3.put('industry_section.json', json.dumps( industry_sections, ensure_ascii=False)) click.echo("Industries loaded.")
def products(): csv = s3.get('redshift/attrs/attrs_hs.csv') df = pandas.read_csv( csv, sep=';', header=0, names=['id','name_pt','name_en','profundidade_id','profundidade'], converters={ "id": str } ) products = {} product_sections = {} product_chapters = {} for _, row in df.iterrows(): if row['profundidade'] == 'Seção': product_section_id = row['id'] product_section = { 'id': product_section_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], } redis.set('product_section/' + str(product_section_id), pickle.dumps(product_section)) product_sections[product_section_id] = product_section elif row['profundidade'] == 'Capítulo': product_chapter_id = row['id'][2:] product_chapter = { 'id': product_chapter_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], } redis.set('product_chapter/' + str(product_chapter_id), pickle.dumps(product_chapter)) product_chapters[product_chapter_id] = product_chapter for _, row in df.iterrows(): if row['profundidade'] == 'Posição': product_id = row['id'][2:] product_section_id = row["id"][:2] product_chapter_id = row["id"][2:4] product = { 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'product_section': product_sections[product_section_id], 'product_chapter': product_chapters[product_chapter_id], } products[product_id] = product redis.set('product/' + str(product_id), pickle.dumps(product)) s3.put('attrs_product.json', json.dumps(products, ensure_ascii=False)) s3.put('attrs_product_section.json', json.dumps(product_sections, ensure_ascii=False)) s3.put('attrs_product_chapter.json', json.dumps(product_chapters, ensure_ascii=False)) click.echo("Products loaded.")
def industries(): csv = s3.get('redshift/attrs/attrs_cnae.csv') df = pandas.read_csv( csv, sep=',', header=0, names=['id','name_en','name_pt'], converters={ "id": str } ) industry_sections = {} industry_divisions = {} industry_classes = {} industry_classes['-1'] = { 'name_pt': 'Não definido', 'name_en': 'Undefined' } industry_sections['0'] = { 'name_pt': 'Não definido', 'name_en': 'Undefined' } for _, row in df.iterrows(): if len(row['id']) == 1: industry_section = { 'id': row['id'], 'name_pt': row["name_pt"], 'name_en': row["name_en"] } redis.set('industry_section/' + str(row['id']), pickle.dumps(industry_section)) industry_sections[row['id']] = industry_section for _, row in df.iterrows(): if len(row['id']) == 3: division_id = row['id'][1:3] industry_division = { 'id': division_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'industry_section': row["id"][0] } redis.set('industry_division/' + str(division_id), pickle.dumps(industry_division)) industry_divisions[division_id] = industry_division for _, row in df.iterrows(): if len(row['id']) == 6: class_id = row["id"][1:] industry_classe = { 'id': class_id, 'name_pt': row["name_pt"], 'name_en': row["name_en"], 'industry_section': industry_sections[row["id"][0]], 'industry_division': industry_divisions[row["id"][1:3]] } redis.set('industry_class/' + str(class_id), pickle.dumps(industry_classe)) industry_classes[class_id] = industry_classe s3.put('attrs_industry_class.json', json.dumps(industry_classes, ensure_ascii=False)) s3.put('attrs_industry_division.json', json.dumps(industry_divisions, ensure_ascii=False)) s3.put('attrs_industry_section.json', json.dumps(industry_sections, ensure_ascii=False)) click.echo("Industries loaded.")