Ejemplo n.º 1
0
def conversion(maindata, moderndata, historicaldata):
    #if config:
    if switch == 'modern':
        if '1' in moderndata.columns:
            moderndata = moderndata.drop('1', axis=1)
        maindata = moderndata
        maindata.columns = moderndata.columns
    else:
        # Do conversion to webmapper system
        if not historicaldata:
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            #maindata.columns = moderndata.columns[1]
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)
    return maindata
Ejemplo n.º 2
0
def conversion(maindata, moderndata, historicaldata):
#if config:
    if switch == 'modern':
        if '1' in moderndata.columns:
            moderndata = moderndata.drop('1', axis=1)
        maindata = moderndata
        maindata.columns = moderndata.columns
    else:
        # Do conversion to webmapper system
        if not historicaldata:
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            #maindata.columns = moderndata.columns[1]
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata
    
    (cfilter, notint) = selectint(maindata.columns)
    
    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)
    return maindata
Ejemplo n.º 3
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    a = datetime.now()
    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle],
                                    icoder.columns, coderyears, finalsubset,
                                    isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Ejemplo n.º 4
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']), int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)
            
    a = datetime.now()    
    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Ejemplo n.º 5
0
def dataset_to_csv(config, dataset, geocoder):
    datastring = ''
    aggrstring = 'date,value\n'
    aggrvalue = 0
    aggr = {}
    # Plot header
    datastring = 'date\t'
    (years, notyears) = selectint(dataset.columns)
    (countries, notcountries) = selectint(dataset.index)

    if datastring:
        for code in countries:
            try:
                ctr = geocoder.ix[code][config['webmappercountry']]
                datastring = str(datastring) + str(ctr) + "\t"
            except:
                ctr = str(code)
    datastring = datastring[:-1]
    datastring = datastring + "\n"

    for year in years:
        dataframe = dataset[year]
        datastringitem = str(year) + "\t"
        isvalue = ''

        for code in countries:
            country = ''
            foundloc = 0
            try:
                value = dataset.ix[code][year]
            except:
                value = ''

            if value:
                if str(value) == 'nan':
                    value = 'NaN'
                else:
                    isvalue = 'yes'
                datastringitem = str(datastringitem) + str(value) + "\t"
                foundloc = 1
                try:
                    aggr[year] = aggr[year] + value
                except:
                    aggr[year] = value
            else:
                value = 'NaN'
                datastringitem = str(datastringitem) + str(value) + "\t"
        datastringitem = datastringitem[:-1]
        # Include lines with values
        if isvalue:
            datastring = datastring + str(datastringitem) + "\n"

    for year in sorted(aggr):
        aggrstring = aggrstring + str(year) + ',' + str(aggr[year]) + "\n"

    return (datastring, aggrstring)
Ejemplo n.º 6
0
def dataset_to_csv(config, dataset, geocoder):
    datastring = ''
    aggrstring = 'date,value\n'
    aggrvalue = 0
    aggr = {}
    # Plot header
    datastring = 'date\t'
    (years, notyears) = selectint(dataset.columns)
    (countries, notcountries) = selectint(dataset.index)

    if datastring:
        for code in countries:
            try:
                ctr = geocoder.ix[code][config['webmappercountry']]
                datastring = str(datastring) + str(ctr) + "\t"
            except:
                ctr = str(code)
    datastring = datastring[:-1]
    datastring = datastring + "\n"
    
    for year in years:
        dataframe = dataset[year]
        datastringitem = str(year) + "\t"
	isvalue = ''
        
        for code in countries:
            country = ''
            foundloc = 0
            try:
                value = dataset.ix[code][year]
            except:
                value = ''
                    
            if value:
		if str(value) == 'nan':
		    value = 'NaN'
		else:
		    isvalue = 'yes'
                datastringitem = str(datastringitem) + str(value) + "\t"
                foundloc = 1
                try:
                    aggr[year] = aggr[year] + value
                except:
                    aggr[year] = value
            else:
                value = 'NaN'
                datastringitem = str(datastringitem) + str(value) + "\t"
  	datastringitem = datastringitem[:-1]
	# Include lines with values
	if isvalue:
            datastring = datastring + str(datastringitem) + "\n"

    for year in sorted(aggr):
        aggrstring = aggrstring + str(year) + ',' + str(aggr[year]) + "\n"
        
    return (datastring, aggrstring)
Ejemplo n.º 7
0
def geocoding(switch, dataset, modern, historical):
    #if config:
    switch = 'historical'
    metadata = {}
    metadataitem = {}
    if title:
        metadataitem['title'] = title
        metadataitem['units'] = units

        if switch == 'modern':
            activeindex = modern.index
            coder = modern
            class1 = switch
        else:
            activeindex = historical.index
            coder = historical

        (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
        if switch == 'modern':
            maindata = moderndata
        else:
            # Do conversion to webmapper system
            if not historicaldata:
                maindata = moderndata
                webmapperindex = []
                for code in maindata.index:
                    try:
                        webmappercode = oecd2webmapper[int(code)]
                    except:
                        webmappercode = -1
                    webmapperindex.append(webmappercode)
                maindata.index = webmapperindex
                maindata = maindata[maindata.index > 0]
            else:
                maindata = historicaldata

        maindata = maindata.convert_objects(convert_numeric=True)
        (cfilter, notint) = selectint(maindata.columns)

        codes = selectint(maindata.index)
        geo = load_geocodes(config, switch, codes, maindata, geolist)
        #for colname in notint:
        #maindata = maindata.drop(colname, axis=1)
        # Drop num if in dataframe
        if '1' in maindata.columns:
            maindata = maindata.drop('1', axis=1)
        #dataframe[pid] = maindata
        metadata[handle] = metadataitem
    return (maindata, metadata, coder)
Ejemplo n.º 8
0
def geocoding(switch, dataset, modern, historical):
#if config:
    switch = 'historical'
    metadata = {}
    metadataitem = {}
    if title:
        metadataitem['title'] = title
        metadataitem['units'] = units

        if switch == 'modern':
            activeindex = modern.index
            coder = modern
            class1 = switch
        else:
            activeindex = historical.index
            coder = historical

        (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
        if switch == 'modern':
            maindata = moderndata
        else:
            # Do conversion to webmapper system
            if not historicaldata:
                maindata = moderndata
                webmapperindex = []
                for code in maindata.index:
                    try:
                        webmappercode = oecd2webmapper[int(code)]
                    except:
                        webmappercode = -1
                    webmapperindex.append(webmappercode)
                maindata.index = webmapperindex
                maindata = maindata[maindata.index > 0]
            else:
                maindata = historicaldata

        maindata = maindata.convert_objects(convert_numeric=True)                
        (cfilter, notint) = selectint(maindata.columns)

        codes = selectint(maindata.index)
        geo = load_geocodes(config, switch, codes, maindata, geolist)
        #for colname in notint:
            #maindata = maindata.drop(colname, axis=1)
        # Drop num if in dataframe
        if '1' in maindata.columns:
            maindata = maindata.drop('1', axis=1)
        #dataframe[pid] = maindata
        metadata[handle] = metadataitem
    return (maindata, metadata, coder)
Ejemplo n.º 9
0
def compiledataset(csvfile):
    handles = []
    remote = 'on'

    # Geocoder
    config = configuration()
    config['remote'] = 'yes'
    dataframe = loaddataset_fromfile(config, csvfile)
    dataset = dataframe
    title = dataframe.columns[1]
    units = dataframe.ix[0][1]
    metadata = {}
    switch = 'modern'
    switch = 'historical'
    #dataset = dataset.convert_objects(convert_numeric=True)
    dataset.columns = dataset.ix[1]
    dataset.index = dataset[config['moderncode']]
    if '1' in dataset.columns:
        dataset = dataset.drop('1', axis=1)

    #dataset = dataset[2:]
    (intcodes, notint) = selectint(dataset.columns)
    #for colname in notint:
    #    dataset = dataset.drop(colname, axis=1)
    dataset.columns = notint + intcodes
    return (dataset, title, units)
Ejemplo n.º 10
0
def compiledataset(csvfile):
    handles = []
    remote = 'on'

    # Geocoder
    config = configuration()
    config['remote'] = 'yes'
    dataframe = loaddataset_fromfile(config, csvfile)
    dataset = dataframe
    title = dataframe.columns[1]
    units = dataframe.ix[0][1]
    metadata = {}
    switch = 'modern'
    switch = 'historical'
    #dataset = dataset.convert_objects(convert_numeric=True)
    dataset.columns = dataset.ix[1]
    dataset.index = dataset[config['moderncode']]
    if '1' in dataset.columns:
        dataset = dataset.drop('1', axis=1)

    #dataset = dataset[2:]
    (intcodes, notint) = selectint(dataset.columns)
    #for colname in notint:
    #    dataset = dataset.drop(colname, axis=1)
    dataset.columns = notint + intcodes
    return (dataset, title, units)
Ejemplo n.º 11
0
def panel2dict(config, cleanedpaneldata, names):
    #data = cleanedpaneldata.reset_index().to_dict()
    #codes = data['Code']
    data = cleanedpaneldata.to_dict()
    codes = []
    if 'Code' in cleanedpaneldata.columns:
        codes = data['Code']
    if config['webmappercode'] in cleanedpaneldata.columns:
        #(codes, notcodes) = selectint(cleanedpaneldata[config['webmappercode']])
	(codes, ncodes) = selectint(cleanedpaneldata.index)

    handlesdata = data['handle']
    handles = {}
    vhandles = {}
    for i in handlesdata:
        thishandle = handlesdata[i]
        try:
            hinfo = handles[thishandle]
        except:
            hinfo = []
        
        hinfo.append(i) 
        handles[thishandle] = hinfo
        for i in hinfo:
            vhandles[i] = thishandle

    rcodes = {}
    i = 0
    countries = {}
    for i in codes:
        code = codes[i]
        try:
            cdata = countries[code]
        except:
            cdata = []
        
        cdata.append(i)
        countries[code] = cdata
        rcodes[code] = i
        i = i +1

    panelout = {}
    header = 'Country,'
    for handle in handles:
        header = header + str(names[handle]) + ','
    #header = header + str('Year')
    
    return (header, data, countries, handles, vhandles) 
Ejemplo n.º 12
0
def panel2dict(config, cleanedpaneldata, names):
    #data = cleanedpaneldata.reset_index().to_dict()
    #codes = data['Code']
    data = cleanedpaneldata.to_dict()
    codes = []
    if 'Code' in cleanedpaneldata.columns:
        codes = data['Code']
    if config['webmappercode'] in cleanedpaneldata.columns:
        #(codes, notcodes) = selectint(cleanedpaneldata[config['webmappercode']])
        (codes, ncodes) = selectint(cleanedpaneldata.index)

    handlesdata = data['handle']
    handles = {}
    vhandles = {}
    for i in handlesdata:
        thishandle = handlesdata[i]
        try:
            hinfo = handles[thishandle]
        except:
            hinfo = []

        hinfo.append(i)
        handles[thishandle] = hinfo
        for i in hinfo:
            vhandles[i] = thishandle

    rcodes = {}
    i = 0
    countries = {}
    for i in codes:
        code = codes[i]
        try:
            cdata = countries[code]
        except:
            cdata = []

        cdata.append(i)
        countries[code] = cdata
        rcodes[code] = i
        i = i + 1

    panelout = {}
    header = 'Country,'
    for handle in handles:
        header = header + str(names[handle]) + ','
    #header = header + str('Year')

    return (header, data, countries, handles, vhandles)
Ejemplo n.º 13
0
if switch == 'modern':
    activeindex = modern.index
    coder = modern
    class1 = switch
else:
    activeindex = historical.index
    coder = historical

(moderndata, historicaldata) = loadgeocoder(config, dataset, '')
if switch == 'modern':
    maindata = moderndata
else:
    maindata = historicaldata

(cfilter, notint) = selectint(maindata.columns)
    
codes = selectint(maindata.index)
geo = load_geocodes(config, switch, codes, maindata, geolist)
for colname in notint:
    maindata = maindata.drop(colname, axis=1)
print title

# Test filters
# Use case 1
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)
Ejemplo n.º 14
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    #switch = 'historical'

    # Load Geocoder
    (classification, dataset, title,
     units) = content2dataframe(config, config['geocoderhandle'])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '')
    (modern, historical) = loadgeocoder(config, dataset, 'geocoder')

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(' ', '_', filetitle)
    filetitle = re.sub(r'[\(\)]', '_', filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    #return ('test', 'test')

    if switch == 'modern':
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
    if switch == 'modern':
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ''
        try:
            if not historicaldata.empty:
                isdata = 'yes'
        except:
            isdata = 'no'

        if isdata == 'no':
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata['title'] = title
    else:
        metadata['title'] = ''

    if units:
        metadata['units'] = units
    else:
        metadata['units'] = ''

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

    # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    #return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    #return (finalsubset.to_html(), 'test')
    if fullpath:
        config['emptyvalues'] = 'no'
        if config['emptyvalues'] == 'no':
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(fullpath, icoder, metadata,
                                        icoder.columns, coderyears,
                                        finalsubset, isyear, ctrfilter)
    return (filetitle, fullpath, finalsubset)
Ejemplo n.º 15
0
def build_panel(config, switch, handles, datafilter):

    (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '')
    
    (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist)
    (subsets, panel) = ({}, [])
    logscale = ''
    for handle in handles:    
        (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter)
        if not datasubset.empty:
            datasubset = datasubset.dropna(how='all')
            try:
                if np.nan in datasubset.index:
                    datasubset = datasubset.drop(np.nan, axis=0)
            except:
                skip = 'yes'

            datasubset['handle'] = handle
            metadata['url'] = 0
            panel.append(datasubset)
            subsets[handle] = datasubset 
            (panelcells, originalvalues) = dataset2panel(config, subsets[handle], historical, logscale)
        
    totalpanel = pd.concat(panel)
    try:
        if np.nan in totalpanel.index:
            totalpanel = totalpanel.drop(np.nan, axis=0)
    except: 
	skip = 'yes'

    if switch == 'historical':
        geocoder = historical
    else:
        geocoder = modern
    # Remove columns with empty years

    for colyear in totalpanel.columns:
        if totalpanel[colyear].count() == 0:
            totalpanel = totalpanel.drop(colyear, axis=1)

    (allyears, nyears) = selectint(totalpanel.columns)
    print totalpanel.index
    panels = []
    known = {}
    matrix = {}
    #return (str(totalpanel.to_html()), '', '', '')
    for code in totalpanel.index:
	try:
            country = geocoder.ix[int(code)][config['webmappercountry']]
	except:
	    country = ''

        for thisyear in allyears:
            thiskey = str(int(code)) + str(thisyear)
            
            if thiskey not in known:
                dataitem = [country]
                dataitem.append(thisyear)
                known[thiskey] = thisyear
		matrix[thiskey] = ''
        
                for handle in handles:
                    tmpframe = totalpanel.loc[totalpanel['handle'] == handle]
                    try:
                        thisval = tmpframe.ix[int(code)][thisyear]
			matrix[thiskey] = thisval
                    except:
                        thisval = ''
                    dataitem.append(thisval)
                    
		# Filter out np.NaN
		if str(thisval) != 'nan':
		    if country:
		        if matrix[thiskey]:
                            panels.append(dataitem)

    # Build header
    header = ['Country', 'Year']
    for handle in handles:
        header.append(metadata[handle]['title'])
    
    return (header, panels, metadata, totalpanel)
Ejemplo n.º 16
0
def build_panel(config, switch, handles, datafilter):

    (geocoder, geolist, oecd2webmapper, modern,
     historical) = request_geocoder(config, '')

    (origdata, maindata, metadata) = request_datasets(config, switch, modern,
                                                      historical, handles,
                                                      geolist)
    (subsets, panel) = ({}, [])
    logscale = ''
    for handle in handles:
        (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter)
        if not datasubset.empty:
            datasubset = datasubset.dropna(how='all')
            try:
                if np.nan in datasubset.index:
                    datasubset = datasubset.drop(np.nan, axis=0)
            except:
                skip = 'yes'

            datasubset['handle'] = handle
            metadata['url'] = 0
            panel.append(datasubset)
            subsets[handle] = datasubset
            (panelcells,
             originalvalues) = dataset2panel(config, subsets[handle],
                                             historical, logscale)

    totalpanel = pd.concat(panel)
    try:
        if np.nan in totalpanel.index:
            totalpanel = totalpanel.drop(np.nan, axis=0)
    except:
        skip = 'yes'

    if switch == 'historical':
        geocoder = historical
    else:
        geocoder = modern
    # Remove columns with empty years

    for colyear in totalpanel.columns:
        if totalpanel[colyear].count() == 0:
            totalpanel = totalpanel.drop(colyear, axis=1)

    (allyears, nyears) = selectint(totalpanel.columns)
    print totalpanel.index
    panels = []
    known = {}
    matrix = {}
    #return (str(totalpanel.to_html()), '', '', '')
    for code in totalpanel.index:
        try:
            country = geocoder.ix[int(code)][config['webmappercountry']]
        except:
            country = ''

        for thisyear in allyears:
            thiskey = str(int(code)) + str(thisyear)

            if thiskey not in known:
                dataitem = [country]
                dataitem.append(thisyear)
                known[thiskey] = thisyear
                matrix[thiskey] = ''

                for handle in handles:
                    tmpframe = totalpanel.loc[totalpanel['handle'] == handle]
                    try:
                        thisval = tmpframe.ix[int(code)][thisyear]
                        matrix[thiskey] = thisval
                    except:
                        thisval = ''
                    dataitem.append(thisval)

        # Filter out np.NaN
                if str(thisval) != 'nan':
                    if country:
                        if matrix[thiskey]:
                            panels.append(dataitem)

    # Build header
    header = ['Country', 'Year']
    for handle in handles:
        header.append(metadata[handle]['title'])

    return (header, panels, metadata, totalpanel)
Ejemplo n.º 17
0
    class1 = switch
else:
    activeindex = historical.index
    coder = historical

handle = "hdl:10622/DIUBXI"
handle = "hdl:10622/WNGZ4A"
handle = "hdl:10622/GZ7O1K"

#if remote:
#    (class1, dataset) = loaddataset_fromurl(config, handle)
#else:
#    dataset = loaddataset(handles)
(class1, dataset, title, units) = content2dataframe(config, handle)

(cfilter, notint) = selectint(activeindex.values)
(moderndata, historicaldata) = loadgeocoder(config, dataset, '')
if switch == 'modern':
    maindata = moderndata
else:
    maindata = historicaldata

tree = []
tree = treemap(config, maindata, class1, cfilter, coder)
print tree

ccode = '150'
year = '2004'

test = ''
if test:
Ejemplo n.º 18
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    # switch = 'historical'

    # Load Geocoder
    (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "")
    (modern, historical) = loadgeocoder(config, dataset, "geocoder")

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter["startyear"]:
        coderyears = []
        for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(" ", "_", filetitle)
    filetitle = re.sub(r"[\(\)]", "_", filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    # return ('test', 'test')

    if switch == "modern":
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, "")
    if switch == "modern":
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ""
        try:
            if not historicaldata.empty:
                isdata = "yes"
        except:
            isdata = "no"

        if isdata == "no":
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata["title"] = title
    else:
        metadata["title"] = ""

    if units:
        metadata["units"] = units
    else:
        metadata["units"] = ""

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

        # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    # return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter["ctrlist"]:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    # return (finalsubset.to_html(), 'test')
    if fullpath:
        config["emptyvalues"] = "no"
        if config["emptyvalues"] == "no":
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(
            fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter
        )
    return (filetitle, fullpath, finalsubset)
Ejemplo n.º 19
0
if switch == 'modern':
    activeindex = modern.index
    coder = modern
    class1 = switch
else:
    activeindex = historical.index
    coder = historical

(moderndata, historicaldata) = loadgeocoder(config, dataset, '')
if switch == 'modern':
    maindata = moderndata
else:
    maindata = historicaldata

(cfilter, notint) = selectint(maindata.columns)

codes = selectint(maindata.index)
geo = load_geocodes(config, switch, codes, maindata, geolist)
for colname in notint:
    maindata = maindata.drop(colname, axis=1)
print title

# Test filters
# Use case 1
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)