def conversion(maindata, moderndata, historicaldata): #if config: if switch == 'modern': if '1' in moderndata.columns: moderndata = moderndata.drop('1', axis=1) maindata = moderndata maindata.columns = moderndata.columns else: # Do conversion to webmapper system if not historicaldata: maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex #maindata.columns = moderndata.columns[1] maindata = maindata[maindata.index > 0] else: maindata = historicaldata (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) return maindata
def store_dataset(fullpath, maindata, metadata, coder): #fullpath = "subdata_set_last.xlsx" # Use case 1 datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2016' #datafilter['ctrlist'] = '528,14,18,67' datafilter['ctrlist'] = '528' datafilter['ctrlist'] = '1523' datafilter['ctrlist'] = '' if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) (datasubset, ctrlist) = datasetfilter(maindata, datafilter) (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) a = datetime.now() (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile
def dataset_to_csv(config, dataset, geocoder): datastring = '' aggrstring = 'date,value\n' aggrvalue = 0 aggr = {} # Plot header datastring = 'date\t' (years, notyears) = selectint(dataset.columns) (countries, notcountries) = selectint(dataset.index) if datastring: for code in countries: try: ctr = geocoder.ix[code][config['webmappercountry']] datastring = str(datastring) + str(ctr) + "\t" except: ctr = str(code) datastring = datastring[:-1] datastring = datastring + "\n" for year in years: dataframe = dataset[year] datastringitem = str(year) + "\t" isvalue = '' for code in countries: country = '' foundloc = 0 try: value = dataset.ix[code][year] except: value = '' if value: if str(value) == 'nan': value = 'NaN' else: isvalue = 'yes' datastringitem = str(datastringitem) + str(value) + "\t" foundloc = 1 try: aggr[year] = aggr[year] + value except: aggr[year] = value else: value = 'NaN' datastringitem = str(datastringitem) + str(value) + "\t" datastringitem = datastringitem[:-1] # Include lines with values if isvalue: datastring = datastring + str(datastringitem) + "\n" for year in sorted(aggr): aggrstring = aggrstring + str(year) + ',' + str(aggr[year]) + "\n" return (datastring, aggrstring)
def geocoding(switch, dataset, modern, historical): #if config: switch = 'historical' metadata = {} metadataitem = {} if title: metadataitem['title'] = title metadataitem['units'] = units if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if not historicaldata: maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex maindata = maindata[maindata.index > 0] else: maindata = historicaldata maindata = maindata.convert_objects(convert_numeric=True) (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) #for colname in notint: #maindata = maindata.drop(colname, axis=1) # Drop num if in dataframe if '1' in maindata.columns: maindata = maindata.drop('1', axis=1) #dataframe[pid] = maindata metadata[handle] = metadataitem return (maindata, metadata, coder)
def compiledataset(csvfile): handles = [] remote = 'on' # Geocoder config = configuration() config['remote'] = 'yes' dataframe = loaddataset_fromfile(config, csvfile) dataset = dataframe title = dataframe.columns[1] units = dataframe.ix[0][1] metadata = {} switch = 'modern' switch = 'historical' #dataset = dataset.convert_objects(convert_numeric=True) dataset.columns = dataset.ix[1] dataset.index = dataset[config['moderncode']] if '1' in dataset.columns: dataset = dataset.drop('1', axis=1) #dataset = dataset[2:] (intcodes, notint) = selectint(dataset.columns) #for colname in notint: # dataset = dataset.drop(colname, axis=1) dataset.columns = notint + intcodes return (dataset, title, units)
def panel2dict(config, cleanedpaneldata, names): #data = cleanedpaneldata.reset_index().to_dict() #codes = data['Code'] data = cleanedpaneldata.to_dict() codes = [] if 'Code' in cleanedpaneldata.columns: codes = data['Code'] if config['webmappercode'] in cleanedpaneldata.columns: #(codes, notcodes) = selectint(cleanedpaneldata[config['webmappercode']]) (codes, ncodes) = selectint(cleanedpaneldata.index) handlesdata = data['handle'] handles = {} vhandles = {} for i in handlesdata: thishandle = handlesdata[i] try: hinfo = handles[thishandle] except: hinfo = [] hinfo.append(i) handles[thishandle] = hinfo for i in hinfo: vhandles[i] = thishandle rcodes = {} i = 0 countries = {} for i in codes: code = codes[i] try: cdata = countries[code] except: cdata = [] cdata.append(i) countries[code] = cdata rcodes[code] = i i = i +1 panelout = {} header = 'Country,' for handle in handles: header = header + str(names[handle]) + ',' #header = header + str('Year') return (header, data, countries, handles, vhandles)
def panel2dict(config, cleanedpaneldata, names): #data = cleanedpaneldata.reset_index().to_dict() #codes = data['Code'] data = cleanedpaneldata.to_dict() codes = [] if 'Code' in cleanedpaneldata.columns: codes = data['Code'] if config['webmappercode'] in cleanedpaneldata.columns: #(codes, notcodes) = selectint(cleanedpaneldata[config['webmappercode']]) (codes, ncodes) = selectint(cleanedpaneldata.index) handlesdata = data['handle'] handles = {} vhandles = {} for i in handlesdata: thishandle = handlesdata[i] try: hinfo = handles[thishandle] except: hinfo = [] hinfo.append(i) handles[thishandle] = hinfo for i in hinfo: vhandles[i] = thishandle rcodes = {} i = 0 countries = {} for i in codes: code = codes[i] try: cdata = countries[code] except: cdata = [] cdata.append(i) countries[code] = cdata rcodes[code] = i i = i + 1 panelout = {} header = 'Country,' for handle in handles: header = header + str(names[handle]) + ',' #header = header + str('Year') return (header, data, countries, handles, vhandles)
if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: maindata = historicaldata (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) print title # Test filters # Use case 1 datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' datafilter['ctrlist'] = '528,14,18,67' datasubset = datasetfilter(maindata, datafilter)
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} #switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config['geocoderhandle']) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '') (modern, historical) = loadgeocoder(config, dataset, 'geocoder') coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(' ', '_', filetitle) filetitle = re.sub(r'[\(\)]', '_', filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) #return ('test', 'test') if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = '' try: if not historicaldata.empty: isdata = 'yes' except: isdata = 'no' if isdata == 'no': maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata['title'] = title else: metadata['title'] = '' if units: metadata['units'] = units else: metadata['units'] = '' (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) #return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) #return (finalsubset.to_html(), 'test') if fullpath: config['emptyvalues'] = 'no' if config['emptyvalues'] == 'no': (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) return (filetitle, fullpath, finalsubset)
def build_panel(config, switch, handles, datafilter): (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, panel) = ({}, []) logscale = '' for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) if not datasubset.empty: datasubset = datasubset.dropna(how='all') try: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) except: skip = 'yes' datasubset['handle'] = handle metadata['url'] = 0 panel.append(datasubset) subsets[handle] = datasubset (panelcells, originalvalues) = dataset2panel(config, subsets[handle], historical, logscale) totalpanel = pd.concat(panel) try: if np.nan in totalpanel.index: totalpanel = totalpanel.drop(np.nan, axis=0) except: skip = 'yes' if switch == 'historical': geocoder = historical else: geocoder = modern # Remove columns with empty years for colyear in totalpanel.columns: if totalpanel[colyear].count() == 0: totalpanel = totalpanel.drop(colyear, axis=1) (allyears, nyears) = selectint(totalpanel.columns) print totalpanel.index panels = [] known = {} matrix = {} #return (str(totalpanel.to_html()), '', '', '') for code in totalpanel.index: try: country = geocoder.ix[int(code)][config['webmappercountry']] except: country = '' for thisyear in allyears: thiskey = str(int(code)) + str(thisyear) if thiskey not in known: dataitem = [country] dataitem.append(thisyear) known[thiskey] = thisyear matrix[thiskey] = '' for handle in handles: tmpframe = totalpanel.loc[totalpanel['handle'] == handle] try: thisval = tmpframe.ix[int(code)][thisyear] matrix[thiskey] = thisval except: thisval = '' dataitem.append(thisval) # Filter out np.NaN if str(thisval) != 'nan': if country: if matrix[thiskey]: panels.append(dataitem) # Build header header = ['Country', 'Year'] for handle in handles: header.append(metadata[handle]['title']) return (header, panels, metadata, totalpanel)
class1 = switch else: activeindex = historical.index coder = historical handle = "hdl:10622/DIUBXI" handle = "hdl:10622/WNGZ4A" handle = "hdl:10622/GZ7O1K" #if remote: # (class1, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) (class1, dataset, title, units) = content2dataframe(config, handle) (cfilter, notint) = selectint(activeindex.values) (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: maindata = historicaldata tree = [] tree = treemap(config, maindata, class1, cfilter, coder) print tree ccode = '150' year = '2004' test = '' if test:
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} # switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"]) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "") (modern, historical) = loadgeocoder(config, dataset, "geocoder") coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter["startyear"]: coderyears = [] for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(" ", "_", filetitle) filetitle = re.sub(r"[\(\)]", "_", filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) # return ('test', 'test') if switch == "modern": activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, "") if switch == "modern": maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = "" try: if not historicaldata.empty: isdata = "yes" except: isdata = "no" if isdata == "no": maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata["title"] = title else: metadata["title"] = "" if units: metadata["units"] = units else: metadata["units"] = "" (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) # return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter["ctrlist"]: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) # return (finalsubset.to_html(), 'test') if fullpath: config["emptyvalues"] = "no" if config["emptyvalues"] == "no": (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset( fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter ) return (filetitle, fullpath, finalsubset)