def store_dataset(fullpath, maindata, metadata, coder): #fullpath = "subdata_set_last.xlsx" # Use case 1 datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2016' #datafilter['ctrlist'] = '528,14,18,67' datafilter['ctrlist'] = '528' datafilter['ctrlist'] = '1523' datafilter['ctrlist'] = '' if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) (datasubset, ctrlist) = datasetfilter(maindata, datafilter) (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) a = datetime.now() (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} #switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config['geocoderhandle']) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '') (modern, historical) = loadgeocoder(config, dataset, 'geocoder') coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(' ', '_', filetitle) filetitle = re.sub(r'[\(\)]', '_', filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) #return ('test', 'test') if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = '' try: if not historicaldata.empty: isdata = 'yes' except: isdata = 'no' if isdata == 'no': maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata['title'] = title else: metadata['title'] = '' if units: metadata['units'] = units else: metadata['units'] = '' (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) #return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) #return (finalsubset.to_html(), 'test') if fullpath: config['emptyvalues'] = 'no' if config['emptyvalues'] == 'no': (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) return (filetitle, fullpath, finalsubset)
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} # switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"]) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "") (modern, historical) = loadgeocoder(config, dataset, "geocoder") coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter["startyear"]: coderyears = [] for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(" ", "_", filetitle) filetitle = re.sub(r"[\(\)]", "_", filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) # return ('test', 'test') if switch == "modern": activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, "") if switch == "modern": maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = "" try: if not historicaldata.empty: isdata = "yes" except: isdata = "no" if isdata == "no": maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata["title"] = title else: metadata["title"] = "" if units: metadata["units"] = units else: metadata["units"] = "" (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) # return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter["ctrlist"]: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) # return (finalsubset.to_html(), 'test') if fullpath: config["emptyvalues"] = "no" if config["emptyvalues"] == "no": (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset( fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter ) return (filetitle, fullpath, finalsubset)
datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' datafilter['ctrlist'] = '528,14,18,67' datasubset = datasetfilter(maindata, datafilter) #print datasubset.to_html # Create Excel file out from dataframe fullpath = config['webtest'] + "/subdata_set.xlsx" (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) metadata = {} metadata['title'] = 'testtitle' metadata['units'] = 'testunits' if title: metadata['title'] = title if units: metadata['units'] = units a = datetime.now() (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" print datafile #yearscolumns print finalsubset.ix[67][1831]
# Use case 1 datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' datafilter['ctrlist'] = '528,14,18,67' datasubset = datasetfilter(maindata, datafilter) #print datasubset.to_html # Create Excel file out from dataframe fullpath = config['webtest'] + "/subdata_set.xlsx" (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) metadata = {} metadata['title'] = 'testtitle' metadata['units'] = 'testunits' if title: metadata['title'] = title if units: metadata['units'] = units a = datetime.now() (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" print datafile #yearscolumns print finalsubset.ix[67][1831]