Python get_index_set Examples, utils_statmart.get_index_set Python Examples

Example #1

0

Show file

File: gen2_cepalstat.py Project: spikewilliams/statmart

def build_files(df, config):
    filelist = []
    countrylist = []
    for iso3 in us.get_index_set(df):
        try:
            idf = df.ix[iso3]
            if (
                type(idf) == pd.Series
            ):  # idf a Series if there is only one element in it, but we want a DataFrame always
                idf = pd.DataFrame([idf])
            idf = idf[["Year", "Value", "Source", "Notes"]]
            idf.columns = ["year", "value", "source", "note"]
            mult = config["multiplier"]
            if mult:
                if (mult <= 1 and mult >= -1) or not type(mult) is int:
                    idf["value"] = idf["value"].apply(lambda x: x * mult)
                else:
                    idf["value"] = idf["value"].apply(lambda x: int(x * mult)).astype(object)
            idf["source"] = idf["source"].apply(lambda x: config["source"])
            idf["note"] = idf["note"].apply(lambda x: get_notes(str(x), config))
            filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
            filename = filestem + ".csv"
            filepath = config["gen_2_dir"] + filename
            us.log(filepath)
            idf.to_csv(filepath, encoding="utf8", index=False)

            country = us.get_country_by_iso3(iso3)
            meta = [
                ("name", "%s - %s [CEPALStat]" % (country, config["indicator"])),
                ("originalsource", config["source"]),
                ("proximatesource", "CEPALStat"),
                ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"),
                ("description", config["definition"]),
                ("category", config["indicator_category"]),
                ("type", config["indicator_type"]),
                ("file", filename),
                ("filehash", us.githash(filepath)),
                ("columns", "year,value,source,notes"),
            ]

            metafile = config["gen_2_dir"] + filestem + "_meta.csv"
            pd.DataFrame(meta, columns=["key", "value"]).to_csv(
                metafile, encoding="utf8", float_format="%.3f", index=False
            )
            filelist.append([filestem])
            countrylist.append(country)
        except Exception as strerror:
            us.log("ERROR: Failed to build data for %s" % iso3)
            us.log(sys.exc_info())
            traceback.print_tb(sys.exc_info()[2])

    fldf = pd.DataFrame(filelist, index=countrylist).sort_index()
    fldf.to_csv(
        config["gen_2_dir"] + "_" + config["prefix"] + ".csv",
        encoding="utf8",
        float_format="%.1f",
        index=False,
        header=False,
    )
    return fldf

Example #2

0

Show file

File: gen2_sidsrcm.py Project: spikewilliams/statmart

def build_files(df, config):
    filelist = []
    countrylist = []
    for iso3 in us.get_index_set(df):
        try:
            idf = df.ix[iso3]
            if type(idf) == pd.Series: #idf a Series if there is only one element in it, but we want a DataFrame always
                idf = pd.DataFrame([idf])
            idf = idf[["Year","Value","Source","Notes"]]
            idf.columns = ["year","value","source","notes"]
            mult = config["multiplier"]
            if mult:
                if (mult <= 1 and mult >= -1) or not type(mult) is int:
                    idf["value"] = idf["value"].apply(lambda x : x * mult)
                else:
                    idf["value"] = idf["value"].apply(lambda x : int(x * mult)).astype(object)
            idf["source"] = idf["source"].apply(lambda x : config["source"])
            idf["notes"] = idf["notes"].apply(lambda x : get_notes(str(x), config))
            filestem = config["prefix"] + "_" + iso3.lower() + "_" + config["suffix"]
            filename = filestem + ".csv"
            filepath = config["gen_2_dir"] + filename
            us.log(filepath)
            idf.to_csv(filepath, encoding="utf8", index=False)
                   
            country = us.get_country_by_iso3(iso3)    
            meta = [("name", "%s - %s [CEPALStat]" % (country, config["indicator"])),
                ("originalsource", config["source"]),
                ("proximatesource", "CEPALStat"),
                ("dataset", config["indicator"] + " [" + config["indicator_id"] + "]"),
                ("description", config["definition"]),
                ("category", config["indicator_category"]),
                ("type", config["indicator_type"]),
                ("file", filename),
                ("filehash", us.githash(filepath)),
                ("columns", "year,value,source,notes")
                ]
     
            metafile = config["gen_2_dir"] + filestem + "_meta.csv"    
            pd.DataFrame(meta,columns = ["key","value"]).to_csv(metafile, encoding="utf8", float_format='%.3f',index=False)
            filelist.append([filestem])
            countrylist.append(country)
        except Exception as strerror:
            us.log("ERROR: Failed to build data for %s" % iso3)
            us.log(sys.exc_info())
            traceback.print_tb(sys.exc_info()[2])
            
    fldf = pd.DataFrame(filelist, index=countrylist).sort_index()
    fldf.to_csv(config["gen_2_dir"] + "_" + config["prefix"] + ".csv", encoding="utf8", float_format='%.1f', index=False, header=False)
    return fldf

Example #3

0

Show file

File: gen2_sidsrcm.py Project: spikewilliams/statmart

def get_meta_map(mf):
    metamap = {}
    
    mf = pd.read_csv(metafile, encoding="utf-8")
    source = mf.ix[mf["Key"] == "source"]["Value"]
    if len(source) > 0:
        metamap["source"] = clip_period(source[source.index[0]].strip())
    
    indicator = mf.loc[mf["Key"] == "indicator"]["Value"]
    metamap["indicator"] = clip_period(indicator[indicator.index[0]].strip())
    
    definition = mf.loc[mf["Key"] == "definition"]["Value"]
    if len("definition") > 0:
        metamap["definition"] = clip_period(definition[definition.index[0]].strip())
    
    nf = mf.loc[mf["Key"] == "note"][["ID","Value"]]
    nf = nf.set_index(["ID"])

    for index in us.get_index_set(nf):
        note = nf.ix[index]["Value"].strip()
        note = clip_period(note)
        metamap[str(index)] = note
    return metamap

Example #4

0

Show file

File: profiles.py Project: spikewilliams/statmart

import base64

from settings_statmart import *
import utils_statmart as us



config = {'gen_1_dir': statmart_facts_gen1 + 'profiles/',
     'gen_2_dir': statmart_facts_gen2 + 'profiles/',
     'prefix': 'profile',
     'suffix': ''}


# <codecell>

countries = us.get_index_set(pd.DataFrame(us.load_carib_country_dict(key_column="name")).T)

chatter = True
def elog(s):
    if chatter:
        print(s)

def formatComputerReadableString(strn):
    if strn == "U.S. Virgin Islands":
        strn = "United States Virgin Islands"
    return(strn.strip().lower().replace(" ","-"))

countries = sorted(list(map(lambda x: (x,formatComputerReadableString(x)), countries)))
countries

# <codecell>

Example #5

0

Show file

File: profiles.py Project: spikewilliams/statmart

import http.client
import base64

from settings_statmart import *
import utils_statmart as us

config = {
    'gen_1_dir': statmart_facts_gen1 + 'profiles/',
    'gen_2_dir': statmart_facts_gen2 + 'profiles/',
    'prefix': 'profile',
    'suffix': ''
}

# <codecell>

countries = us.get_index_set(
    pd.DataFrame(us.load_carib_country_dict(key_column="name")).T)

chatter = True


def elog(s):
    if chatter:
        print(s)


def formatComputerReadableString(strn):
    if strn == "U.S. Virgin Islands":
        strn = "United States Virgin Islands"
    return (strn.strip().lower().replace(" ", "-"))

Example #6

0

Show file

# <codecell>

statfile = config["gen_1_dir"] + indicator_id + "_all.csv"
df = pd.read_csv(statfile, encoding="utf-8", index_col=["Item"])
df = df.transpose()
sample = df["Construction"].transpose()
sample[11:20]

# <markdowncell>

# We need a list of sectors so we can build a map out of them. __Should any sectors be added to the data in the future, the additional fields will need to be added in the sectorMap, below.__ If the additional sector fields are not added, an error will occur during the file generation step, when the sector name is not found in sector_map.

# <codecell>

sectors = us.get_index_set(df.transpose())
sectors

# <markdowncell>

# Here is the sector_map, which maps every long name of a sector (see set listing above) to a short one for use in the file system. We also ensure that a directory exists to corresponding to each sector in the map.

# <codecell>

sector_map = {'Agriculture, hunting and forestry':'aghufo',
 'Agriculture, hunting, forestry and fishing':'aghufofi',
 'Construction':'cstrn',
 'Electricity, gas and water supply':'egws',
 'Financial intermediation services indirectly measured (FISIM)':'fisim',
 'Financial intermediation, real estate, renting and business activities':'firerba',
 'Fishing':'fish',