Beispiel #1
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']), int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)
            
    a = datetime.now()    
    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Beispiel #2
0
def store_dataset(fullpath, maindata, metadata, coder):
    #fullpath = "subdata_set_last.xlsx"
    # Use case 1
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2016'
    #datafilter['ctrlist'] = '528,14,18,67'
    datafilter['ctrlist'] = '528'
    datafilter['ctrlist'] = '1523'
    datafilter['ctrlist'] = ''
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)
    (datasubset, ctrlist) = datasetfilter(maindata, datafilter)

    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    a = datetime.now()
    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)
    datafile = create_excel_dataset(fullpath, icoder, metadata[handle],
                                    icoder.columns, coderyears, finalsubset,
                                    isyear, ctrfilter)
    b = datetime.now()
    d = b - a
    print "Time: " + str(d.seconds) + " seconds"
    return datafile
Beispiel #3
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    #switch = 'historical'

    # Load Geocoder
    (classification, dataset, title,
     units) = content2dataframe(config, config['geocoderhandle'])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '')
    (modern, historical) = loadgeocoder(config, dataset, 'geocoder')

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter['startyear']:
        coderyears = []
        for i in range(int(datafilter['startyear']),
                       int(datafilter['endyear'])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(' ', '_', filetitle)
    filetitle = re.sub(r'[\(\)]', '_', filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    #return ('test', 'test')

    if switch == 'modern':
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, '')
    if switch == 'modern':
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ''
        try:
            if not historicaldata.empty:
                isdata = 'yes'
        except:
            isdata = 'no'

        if isdata == 'no':
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata['title'] = title
    else:
        metadata['title'] = ''

    if units:
        metadata['units'] = units
    else:
        metadata['units'] = ''

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

    # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter,
     nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    #return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter['ctrlist']:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    #return (finalsubset.to_html(), 'test')
    if fullpath:
        config['emptyvalues'] = 'no'
        if config['emptyvalues'] == 'no':
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(fullpath, icoder, metadata,
                                        icoder.columns, coderyears,
                                        finalsubset, isyear, ctrfilter)
    return (filetitle, fullpath, finalsubset)
Beispiel #4
0
def build_panel(config, switch, handles, datafilter):

    (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '')
    
    (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist)
    (subsets, panel) = ({}, [])
    logscale = ''
    for handle in handles:    
        (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter)
        if not datasubset.empty:
            datasubset = datasubset.dropna(how='all')
            try:
                if np.nan in datasubset.index:
                    datasubset = datasubset.drop(np.nan, axis=0)
            except:
                skip = 'yes'

            datasubset['handle'] = handle
            metadata['url'] = 0
            panel.append(datasubset)
            subsets[handle] = datasubset 
            (panelcells, originalvalues) = dataset2panel(config, subsets[handle], historical, logscale)
        
    totalpanel = pd.concat(panel)
    try:
        if np.nan in totalpanel.index:
            totalpanel = totalpanel.drop(np.nan, axis=0)
    except: 
	skip = 'yes'

    if switch == 'historical':
        geocoder = historical
    else:
        geocoder = modern
    # Remove columns with empty years

    for colyear in totalpanel.columns:
        if totalpanel[colyear].count() == 0:
            totalpanel = totalpanel.drop(colyear, axis=1)

    (allyears, nyears) = selectint(totalpanel.columns)
    print totalpanel.index
    panels = []
    known = {}
    matrix = {}
    #return (str(totalpanel.to_html()), '', '', '')
    for code in totalpanel.index:
	try:
            country = geocoder.ix[int(code)][config['webmappercountry']]
	except:
	    country = ''

        for thisyear in allyears:
            thiskey = str(int(code)) + str(thisyear)
            
            if thiskey not in known:
                dataitem = [country]
                dataitem.append(thisyear)
                known[thiskey] = thisyear
		matrix[thiskey] = ''
        
                for handle in handles:
                    tmpframe = totalpanel.loc[totalpanel['handle'] == handle]
                    try:
                        thisval = tmpframe.ix[int(code)][thisyear]
			matrix[thiskey] = thisval
                    except:
                        thisval = ''
                    dataitem.append(thisval)
                    
		# Filter out np.NaN
		if str(thisval) != 'nan':
		    if country:
		        if matrix[thiskey]:
                            panels.append(dataitem)

    # Build header
    header = ['Country', 'Year']
    for handle in handles:
        header.append(metadata[handle]['title'])
    
    return (header, panels, metadata, totalpanel)
Beispiel #5
0
def build_panel(config, switch, handles, datafilter):

    (geocoder, geolist, oecd2webmapper, modern,
     historical) = request_geocoder(config, '')

    (origdata, maindata, metadata) = request_datasets(config, switch, modern,
                                                      historical, handles,
                                                      geolist)
    (subsets, panel) = ({}, [])
    logscale = ''
    for handle in handles:
        (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter)
        if not datasubset.empty:
            datasubset = datasubset.dropna(how='all')
            try:
                if np.nan in datasubset.index:
                    datasubset = datasubset.drop(np.nan, axis=0)
            except:
                skip = 'yes'

            datasubset['handle'] = handle
            metadata['url'] = 0
            panel.append(datasubset)
            subsets[handle] = datasubset
            (panelcells,
             originalvalues) = dataset2panel(config, subsets[handle],
                                             historical, logscale)

    totalpanel = pd.concat(panel)
    try:
        if np.nan in totalpanel.index:
            totalpanel = totalpanel.drop(np.nan, axis=0)
    except:
        skip = 'yes'

    if switch == 'historical':
        geocoder = historical
    else:
        geocoder = modern
    # Remove columns with empty years

    for colyear in totalpanel.columns:
        if totalpanel[colyear].count() == 0:
            totalpanel = totalpanel.drop(colyear, axis=1)

    (allyears, nyears) = selectint(totalpanel.columns)
    print totalpanel.index
    panels = []
    known = {}
    matrix = {}
    #return (str(totalpanel.to_html()), '', '', '')
    for code in totalpanel.index:
        try:
            country = geocoder.ix[int(code)][config['webmappercountry']]
        except:
            country = ''

        for thisyear in allyears:
            thiskey = str(int(code)) + str(thisyear)

            if thiskey not in known:
                dataitem = [country]
                dataitem.append(thisyear)
                known[thiskey] = thisyear
                matrix[thiskey] = ''

                for handle in handles:
                    tmpframe = totalpanel.loc[totalpanel['handle'] == handle]
                    try:
                        thisval = tmpframe.ix[int(code)][thisyear]
                        matrix[thiskey] = thisval
                    except:
                        thisval = ''
                    dataitem.append(thisval)

        # Filter out np.NaN
                if str(thisval) != 'nan':
                    if country:
                        if matrix[thiskey]:
                            panels.append(dataitem)

    # Build header
    header = ['Country', 'Year']
    for handle in handles:
        header.append(metadata[handle]['title'])

    return (header, panels, metadata, totalpanel)
Beispiel #6
0
def dataframe_compiler(config, fullpath, handle, switch, datafilter):
    handles = []
    ctrlist = []
    metadata = {}
    # switch = 'historical'

    # Load Geocoder
    (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"])

    (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "")
    (modern, historical) = loadgeocoder(config, dataset, "geocoder")

    coderyears = []
    # Default years selection
    for i in range(1500, 2016):
        coderyears.append(i)
    # Years selected
    if datafilter["startyear"]:
        coderyears = []
        for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])):
            coderyears.append(i)

    # Reading dataset
    (class1, dataset, title, units) = content2dataframe(config, handle)
    filetitle = title
    filetitle = re.sub(" ", "_", filetitle)
    filetitle = re.sub(r"[\(\)]", "_", filetitle)
    if filetitle:
        fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch)

    # return ('test', 'test')

    if switch == "modern":
        activeindex = modern.index
        coder = modern
        class1 = switch
    else:
        activeindex = historical.index
        coder = historical

    (moderndata, historicaldata) = loadgeocoder(config, dataset, "")
    if switch == "modern":
        maindata = moderndata
    else:
        # Do conversion to webmapper system if there are no historical data
        isdata = ""
        try:
            if not historicaldata.empty:
                isdata = "yes"
        except:
            isdata = "no"

        if isdata == "no":
            maindata = moderndata
            webmapperindex = []
            for code in maindata.index:
                try:
                    webmappercode = oecd2webmapper[int(code)]
                except:
                    webmappercode = -1
                webmapperindex.append(webmappercode)
            maindata.index = webmapperindex
            # Drop not recognized locations
            maindata = maindata[maindata.index > 0]
        else:
            maindata = historicaldata

    if title:
        metadata["title"] = title
    else:
        metadata["title"] = ""

    if units:
        metadata["units"] = units
    else:
        metadata["units"] = ""

    (cfilter, notint) = selectint(maindata.columns)

    codes = selectint(maindata.index)
    geo = load_geocodes(config, switch, codes, maindata, geolist)
    for colname in notint:
        maindata = maindata.drop(colname, axis=1)

    # Apply filters
    if datafilter:
        (datasubset, ctrlist) = datasetfilter(maindata, datafilter)
    else:
        datasetset = maindata

        # Create Excel file out from dataframe
    (yearscolumns, notyears) = selectint(maindata.columns)
    (countryinfo, notcountry) = selectint(maindata.index)

    (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns)
    # return (finalsubset, fullpath, finalsubset)
    # Apply filter to countries
    if datafilter["ctrlist"]:
        tmpcoder = icoder.ix[ctrlist]
        icoder = pd.DataFrame(tmpcoder)

    # return (finalsubset.to_html(), 'test')
    if fullpath:
        config["emptyvalues"] = "no"
        if config["emptyvalues"] == "no":
            (coderyears, notyears) = selectint(finalsubset.columns)
        datafile = create_excel_dataset(
            fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter
        )
    return (filetitle, fullpath, finalsubset)
Beispiel #7
0
(cfilter, notint) = selectint(maindata.columns)

codes = selectint(maindata.index)
geo = load_geocodes(config, switch, codes, maindata, geolist)
for colname in notint:
    maindata = maindata.drop(colname, axis=1)
print title

# Test filters
# Use case 1
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)
#print datasubset.to_html

# Create Excel file out from dataframe
fullpath = config['webtest'] + "/subdata_set.xlsx"
(yearscolumns, notyears) = selectint(maindata.columns)
(countryinfo, notcountry) = selectint(maindata.index)
metadata = {}
metadata['title'] = 'testtitle'
metadata['units'] = 'testunits'
if title:
    metadata['title'] = title
if units:
    metadata['units'] = units

a = datetime.now()
(cfilter, notint) = selectint(maindata.columns)
    
codes = selectint(maindata.index)
geo = load_geocodes(config, switch, codes, maindata, geolist)
for colname in notint:
    maindata = maindata.drop(colname, axis=1)
print title

# Test filters
# Use case 1
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
datafilter['ctrlist'] = '528,14,18,67'
datasubset = datasetfilter(maindata, datafilter)
#print datasubset.to_html

# Create Excel file out from dataframe
fullpath = config['webtest'] + "/subdata_set.xlsx"
(yearscolumns, notyears) = selectint(maindata.columns)
(countryinfo, notcountry) = selectint(maindata.index)
metadata = {}
metadata['title'] = 'testtitle'
metadata['units'] = 'testunits'
if title:
    metadata['title'] = title
if units:
    metadata['units'] = units

a = datetime.now()