def processMultiCountryFile(config): us.log(config) xlFile = getXLFile(config) multiIndex = excelToMultiIndex(xlFile, header=[1,2]) cols = getTopCols(multiIndex) gen1Path = gen_1_dir + config["path"] us.mkdirs(gen1Path) prefix = us.format_lower_no_spaces(config["descriptor"]) countryMap = {} for country in cols: lcountry = us.format_lower_no_spaces(country) us.log(country) if config["timeseries"]: try: (monthly, yearly, meta) = getTimeseriesDataSet(multiIndex, country, config) if len(monthly.index) and len(yearly.index) == 0: yearly = build_annual_from_monthly(monthly, country) countryMap[lcountry] = (monthly, yearly, meta) if len(yearly.index): writeGen1XLS(yearly, meta, gen1Path, lcountry, prefix, "annual") if len(monthly.index): writeGen1XLS(monthly, meta, gen1Path, lcountry, prefix, "monthly") except ValueError: print("********************** Unable to convert data for " + country) continue else: (data, meta) = getDataSet(multiIndex, country, config) countryMap[lcountry] = (data, meta) writeGen1XLS(data, meta, gen1Path, lcountry, prefix) return countryMap
# <codecell> dataset[0][2] # This is what the metadata looks like # <markdowncell> # Generation 2 - Refines the rough csv data from Generation 1 into a standardized csv format common to all data sets. Prepares this data for importing to the database. # <markdowncell> # This code follows a fairly standard form, in which it loops through each country's information, writes the data to a .csv file, extracts some metadata and writes that to a .csv file, and then writes a list of all the files that it made to a file called \_PREFIX.csv # <codecell> filelist = [] us.mkdirs(config["gen_2_dir"]) for (iso3, df, mf) in dataset: us.log(iso3) try: df.columns = ["description", "value"] except Exception: # The data for St Kitts only has one column. We are excluding it for now. us.log(sys.exc_info()) continue filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False) country = us.get_country_by_iso3(iso3) meta = [("name", "%s - %s [SIDS RCM]" % (country, config["indicator"])), ("originalsource", mf.ix["Capacity"]["Source"]),
dataset[3][2] # This is what the metadata looks like # <markdowncell> # Generation 2 - Refines the rough csv data from Generation 1 into a standardized csv format common to all data sets. Prepares this data for importing to the database. # <markdowncell> # This code follows a fairly standard form, in which it loops through each country's information, writes the data to a .csv file, extracts some metadata and writes that to a .csv file, and then writes a list of all the files that it made to a file called \_PREFIX.csv # <codecell> keycol = "Total" filelist = [] us.mkdirs(config["gen_2_dir"]) for (iso3, df, mf) in dataset: us.log(iso3) try: if len(df.columns) == 2: df.columns = ["year", "value"] else: df.columns = ["year", "value", "notes"] except Exception: # The data for St Kitts only has one column. We are excluding it for now. us.log(sys.exc_info()) continue filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"] filename = filestem + ".csv" filepath = config["gen_2_dir"] + filename df.to_csv(filepath, encoding="utf8", float_format='%.3f', index=False)
def build_d3_line_graph(config): script = [] script.append(d3.metadata_iso3_map % (config["iso3_map"])) script.append(d3.metadata_series_info_map % (config["series_info_map"])) if "currency_year" in config: script.append(util_currency_converter_2005) if config["series_source"] == "params": script.append(d3.util_function_get_parameters_by_name) if config["series_param"] == "iso3": script.append(d3.js_series_name_from_iso3_parameter % (config["prefix"],config["suffix"])) elif config["series_param"] == "s": script.append(d3.js_series_name_from_s_parameter) if config["data_source"] == "local": script.append(d3.js_dataset_list % get_all_series_json(config)) elif config["data_source"] == "query": script.append(d3.js_data_url_series_query) script.append(d3.graph_set_title % (config["title"])) if config["subtitle"] == "countryname": script.append(d3.graph_set_subtitle_country) else: script.append(d3.graph_set_subtitle % (config["subtitle"])) script.append(d3.graph_default_size) script.append(d3.graph_title_location) script.append(d3.graph_default_header) script.append(d3.include_js_d3_basic_axis) script.append(d3.js_d3_start_csv) if config["unit"] == "guess": script.append(d3.js_unit_guess) else: script.append(d3.js_unit_set % (config["unit"], str(config["multiplier"]))) script.append(d3.js_d3_process_annual_csv_data) script.append(d3.js_d3_build_simple_xy_domains) script.append(d3.js_d3_svg_build_x_axis) if config["y_axis_label"] == "unit": script.append(d3.js_d3_svg_build_y_axis_unit_label) else: script.append(d3.js_d3_svg_build_y_axis_label % config["y_axis_label"]) script.append(d3.js_d3_svg_draw_line) script.append(d3.svg_draw_title) script.append(d3.svg_draw_subtitle) script.append(d3.svg_draw_source) script.append(d3.js_d3_end_csv) head = build_d3_head(config["indicator"]) body = html.body(html.script("\n".join(script))) page = html.html(head + "\n" + body) page_name = build_file_name(config) us.mkdirs(config["gen_4_dir"]) page_file = open(config["gen_4_dir"] + page_name, "w", encoding="utf8") page_file.write(page) page_file.close() return page
def build_d3_line_graph(config): script = [] script.append(d3.metadata_iso3_map % (config["iso3_map"])) script.append(d3.metadata_series_info_map % (config["series_info_map"])) if "currency_year" in config: script.append(util_currency_converter_2005) if config["series_source"] == "params": script.append(d3.util_function_get_parameters_by_name) if config["series_param"] == "iso3": script.append(d3.js_series_name_from_iso3_parameter % (config["prefix"], config["suffix"])) elif config["series_param"] == "s": script.append(d3.js_series_name_from_s_parameter) if config["data_source"] == "local": script.append(d3.js_dataset_list % get_all_series_json(config)) elif config["data_source"] == "query": script.append(d3.js_data_url_series_query) script.append(d3.graph_set_title % (config["title"])) if config["subtitle"] == "countryname": script.append(d3.graph_set_subtitle_country) else: script.append(d3.graph_set_subtitle % (config["subtitle"])) script.append(d3.graph_default_size) script.append(d3.graph_title_location) script.append(d3.graph_default_header) script.append(d3.include_js_d3_basic_axis) script.append(d3.js_d3_start_csv) if config["unit"] == "guess": script.append(d3.js_unit_guess) else: script.append(d3.js_unit_set % (config["unit"], str(config["multiplier"]))) script.append(d3.js_d3_process_annual_csv_data) script.append(d3.js_d3_build_simple_xy_domains) script.append(d3.js_d3_svg_build_x_axis) if config["y_axis_label"] == "unit": script.append(d3.js_d3_svg_build_y_axis_unit_label) else: script.append(d3.js_d3_svg_build_y_axis_label % config["y_axis_label"]) script.append(d3.js_d3_svg_draw_line) script.append(d3.svg_draw_title) script.append(d3.svg_draw_subtitle) script.append(d3.svg_draw_source) script.append(d3.js_d3_end_csv) head = build_d3_head(config["indicator"]) body = html.body(html.script("\n".join(script))) page = html.html(head + "\n" + body) page_name = build_file_name(config) us.mkdirs(config["gen_4_dir"]) page_file = open(config["gen_4_dir"] + page_name, "w", encoding="utf8") page_file.write(page) page_file.close() return page
'Manufacturing':'mfg', 'Mining and quarrying':'mq', 'Post and telecommunications':'pt', 'Public administration, defence; compulsory social security, education, ' + 'health and social work, and other community, social and personal service activities': 'public', 'Statistical discrepancy of GDP by economic activity': 'discrep', 'Taxes on products less subsidies on products': 'taxes', 'Total value added': 'tva', 'Transport and supporting and auxiliary activities': 'tsaa', 'Transport, storage and communications': 'tsc', 'Wholesale and retail trade, and repair of goods': 'wrtrg', 'Wholesale and retail trade, repair of goods, and hotels and restaurants': 'wrtrghr'} for val in sector_map.values(): # ensure there is a directory for each of the above sets directory = config['gen_2_dir'] + val + "/" us.mkdirs(directory) # <codecell> metafile = config["gen_1_dir"] + config["indicator_id"] + "_meta.csv" metamap = gen2_cepalstat.parse_meta_file(metafile) metamap # <codecell> metamap.update(config) metamap["note"] = metamap["6495"] metamap["unit"] = "USD2005" del metamap["6432"] del metamap["6495"] metamap