def conversion(maindata, moderndata, historicaldata): #if config: if switch == 'modern': if '1' in moderndata.columns: moderndata = moderndata.drop('1', axis=1) maindata = moderndata maindata.columns = moderndata.columns else: # Do conversion to webmapper system if not historicaldata: maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex #maindata.columns = moderndata.columns[1] maindata = maindata[maindata.index > 0] else: maindata = historicaldata (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) return maindata
def geocoding(switch, dataset, modern, historical): #if config: switch = 'historical' metadata = {} metadataitem = {} if title: metadataitem['title'] = title metadataitem['units'] = units if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if not historicaldata: maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex maindata = maindata[maindata.index > 0] else: maindata = historicaldata maindata = maindata.convert_objects(convert_numeric=True) (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) #for colname in notint: #maindata = maindata.drop(colname, axis=1) # Drop num if in dataframe if '1' in maindata.columns: maindata = maindata.drop('1', axis=1) #dataframe[pid] = maindata metadata[handle] = metadataitem return (maindata, metadata, coder)
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} #switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config['geocoderhandle']) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '') (modern, historical) = loadgeocoder(config, dataset, 'geocoder') coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(' ', '_', filetitle) filetitle = re.sub(r'[\(\)]', '_', filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) #return ('test', 'test') if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = '' try: if not historicaldata.empty: isdata = 'yes' except: isdata = 'no' if isdata == 'no': maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata['title'] = title else: metadata['title'] = '' if units: metadata['units'] = units else: metadata['units'] = '' (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) #return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) #return (finalsubset.to_html(), 'test') if fullpath: config['emptyvalues'] = 'no' if config['emptyvalues'] == 'no': (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) return (filetitle, fullpath, finalsubset)
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} # switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"]) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "") (modern, historical) = loadgeocoder(config, dataset, "geocoder") coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter["startyear"]: coderyears = [] for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(" ", "_", filetitle) filetitle = re.sub(r"[\(\)]", "_", filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) # return ('test', 'test') if switch == "modern": activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, "") if switch == "modern": maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = "" try: if not historicaldata.empty: isdata = "yes" except: isdata = "no" if isdata == "no": maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata["title"] = title else: metadata["title"] = "" if units: metadata["units"] = units else: metadata["units"] = "" (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) # return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter["ctrlist"]: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) # return (finalsubset.to_html(), 'test') if fullpath: config["emptyvalues"] = "no" if config["emptyvalues"] == "no": (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset( fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter ) return (filetitle, fullpath, finalsubset)
coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: maindata = historicaldata (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) print title # Test filters # Use case 1 datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' datafilter['ctrlist'] = '528,14,18,67' datasubset = datasetfilter(maindata, datafilter) #print datasubset.to_html # Create Excel file out from dataframe