def indicators(): #data = load_indicators("indicators.csv") config = configuration() pid = config['topicindex'] if pid: (handles, pidslist) = pidfrompanel(pid) hquery = formdatasetquery(handles,'') datainfo = readdatasets('datasets', json.loads(hquery)) csvio = StringIO(str(datainfo[0]['csvframe'])) data = pd.read_csv(csvio, sep='\t', dtype='unicode',quoting=csv.QUOTE_NONE) columns = [] for item in data.columns: col = re.sub(r"\"", "", item) columns.append(col) data.columns = columns storeddata = readdatasets('datasets', '') linking = {} for item in storeddata: try: linking[item['title']] = item['handle'] except: skip = 'yes' data['handle'] = '' data = data.drop('ID', axis=1) for row in data.index: title = data.ix[row]['Name'] try: data.ix[row]['handle'] = linking[title] except: data.ix[row]['handle'] = '' return Response(data.to_csv(orient='records'), mimetype='application/json') else: return 'No data'
def datasets(): config = configuration() (jsondata, pid) = ('', '') handles = [] combineddataset = [] resultdataset = '' datainfo = [] outformat = 'json' if request.args.get('format'): outformat = request.args.get('format') if request.args.get('handle'): pid = request.args.get('handle') if request.args.get('latest'): dataset = config['defaulthandle'] return dataset if pid: (handles, pidslist) = pidfrompanel(pid) hquery = formdatasetquery(handles, '') datainfo = readdatasets('datasets', json.loads(hquery)) #if not datainfo: #datainfo.append(pid) for dataset in datainfo: data = {} handle = dataset['handle'] if outformat == 'json': jsondata = str(dataset['data']) jsondata = jsondata.replace(".0,", ",") json_dict = ast.literal_eval(jsondata.strip()) data['handle'] = handle try: data['title'] = dataset['title'] data['units'] = dataset['units'] data['datasetID'] = dataset['datasetID'] except: data['title'] = 'Title' data['units'] = 'Units' data['datasetID'] = 228 data['data'] = json_dict combineddataset.append(data) elif outformat == 'csv': data['data'] = dataset['csvframe'] resultdataset = data['data'] if outformat == 'json': if combineddataset: finaldata = json.dumps(combineddataset, encoding="utf-8", sort_keys=True, indent=4) return Response(finaldata, mimetype='application/json') elif outformat == 'csv': return Response(resultdataset, mimetype='text/plain')
def treemap(settings=''): (years, ctrlist) = ([], '') showpanel = 'yes' config = configuration() if config['error']: return config['error'] (historical, handle, handles, thisyear) = ('', '', [], '') if request.args.get('face'): facehandle = request.args.get('face') if facehandle not in handles: handles.append(facehandle) handle = facehandle if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) handle = pids[0] handles.append(handle) except: handles.append(handledataset) handle = handledataset nopanel = 'yes' if request.args.get('historical'): historical = request.args.get('historical') if request.args.get('year'): thisyear = request.args.get('year') if request.args.get('hist'): historical = request.args.get('hist') if request.args.get('ctrlist'): ctrlist = request.args.get('ctrlist') if ctrlist == config['ctrlist']: ctrlist = '' mainlink = '&handle=' + str(handle) try: (title, units, years) = dpemetadata(config, handle) except: (title, units, years) = ('Panel Data', '', []) if historical: mainlink = str(mainlink) + '&historical=on' if thisyear: mainlink = str(mainlink) + '&year=' + str(thisyear) if ctrlist: mainlink = str(mainlink) + '&ctrlist=' + str(ctrlist) links = graphlinks(mainlink) apitreemap = config['apiroot'] + "/api/treemap?action=showyears&handle=" + str(handles[0]) + "&ctrlist=" + str(ctrlist) years = load_api_data(apitreemap, 1) total = len(years) lastyear = years[-1] resp = make_response(render_template('treemap.html', handle=handle, chartlib=links['chartlib'], barlib=links['barlib'], panellib=links['panellib'], treemaplib=links['treemaplib'], q=handle, showpanel=showpanel, historical=historical, title=title, thisyear=thisyear, years=years, total=total, lastyear=lastyear, ctrlist=ctrlist)) return resp
def datasets(): config = configuration() (jsondata, pid) = ('', '') handles = [] combineddataset = [] resultdataset = '' datainfo = [] outformat = 'json' if request.args.get('format'): outformat = request.args.get('format') if request.args.get('handle'): pid = request.args.get('handle') if request.args.get('latest'): dataset = config['defaulthandle'] return dataset if pid: (handles, pidslist) = pidfrompanel(pid) hquery = formdatasetquery(handles,'') datainfo = readdatasets('datasets', json.loads(hquery)) #if not datainfo: #datainfo.append(pid) for dataset in datainfo: data = {} handle = dataset['handle'] if outformat == 'json': jsondata = str(dataset['data']) jsondata = jsondata.replace(".0,", ",") json_dict = ast.literal_eval(jsondata.strip()) data['handle'] = handle try: data['title'] = dataset['title'] data['units'] = dataset['units'] data['datasetID'] = dataset['datasetID'] except: data['title'] = 'Title' data['units'] = 'Units' data['datasetID'] = 228 data['data'] = json_dict combineddataset.append(data) elif outformat == 'csv': data['data'] = dataset['csvframe'] resultdataset = data['data'] if outformat == 'json': if combineddataset: finaldata = json.dumps(combineddataset, encoding="utf-8", sort_keys=True, indent=4) return Response(finaldata, mimetype='application/json') elif outformat == 'csv': return Response(resultdataset, mimetype='text/plain')
def indicators(): #data = load_indicators("indicators.csv") config = configuration() pid = config['topicindex'] if pid: (handles, pidslist) = pidfrompanel(pid) hquery = formdatasetquery(handles, '') datainfo = readdatasets('datasets', json.loads(hquery)) csvio = StringIO(str(datainfo[0]['csvframe'])) data = pd.read_csv(csvio, sep='\t', dtype='unicode', quoting=csv.QUOTE_NONE) columns = [] for item in data.columns: col = re.sub(r"\"", "", item) columns.append(col) data.columns = columns storeddata = readdatasets('datasets', '') linking = {} for item in storeddata: try: linking[item['title']] = item['handle'] except: skip = 'yes' data['handle'] = '' data = data.drop('ID', axis=1) for row in data.index: title = data.ix[row]['Name'] try: data.ix[row]['handle'] = linking[title] except: data.ix[row]['handle'] = '' return Response(data.to_csv(orient='records'), mimetype='application/json') else: return 'No data'
def panel(): (thisyear, datafilter, handle, yearmin, yearmax, thisyear, ctrlist, lastyear, logscale) = (0, {}, '', '1500', '2020', 1950, '', 2010, '') handles = [] config = configuration() datafilter['startyear'] = yearmin datafilter['endyear'] = lastyear datafilter['ctrlist'] = config['ctrlist'] #modern = moderncodes(config['modernnames'], config['apiroot']) if request.args.get('handle'): handle = str(request.args.get('handle')) handle = handle.replace(" ", "") handle = handle.replace("'", "") try: (pids, pidslist) = pidfrompanel(handle) handles = pids except: nopanel = 'yes' handles.append(handle) if request.args.get('face'): facehandle = request.args.get('face') if facehandle not in handles: handles.append(facehandle) if request.args.get('dataset'): dataset = request.args.get('dataset') if request.args.get('ctrlist'): customcountrycodes = '' ctrlist = request.args.get('ctrlist') datafilter['ctrlist'] = ctrlist if request.args.get('logscale'): logscale = request.args.get('logscale') if request.args.get('year'): thisyear = request.args.get('year') datafilter['startyear'] = int(thisyear) datafilter['endyear'] = int(thisyear) if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('hist'): switch = 'historical' if datafilter['ctrlist'] == '': datafilter['ctrlist'] = config['histctrlist'] else: switch = 'modern' (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, subsetyears, panel) = ({}, [], []) for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) datasubset['handle'] = handle if not datasubset.empty: datasubset = datasubset.dropna(how='all') try: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) except: skip = 'yes' for year in datasubset: if datasubset[year].count() == 0: datasubset = datasubset.drop(year, axis=1) (datayears, notyears) = selectint(datasubset.columns) panel.append(datasubset) subsets[handle] = datasubset subsetyears.append(datayears) dataframe = subsets ctrlimit = 10 # Trying to find the best year with most filled data values try: bestyearlist = subsetyears[0] for i in range(1,len(subsetyears)): bestyearlist = list(set(bestyearlist) & set(subsetyears[i])) #bestyearlist = bestyearlist.sort() thisyear = bestyearlist[0] except: bestyearlist = [] allcodes = {} panel = [] names = {} for handle in dataframe: try: names[handle] = metadata[handle]['title'] except: names[handle] = 'title' try: #(dataset, codes) = paneldatafilter(dataframe[handle], int(yearmin), int(yearmax), ctrlist, handle) dataset = dataframe[handle] if not dataset.empty: panel.append(dataset) except: nodata = 0 if panel: totalpanel = pd.concat(panel) cleanedpanel = totalpanel.dropna(axis=1, how='any') cleanedpanel = totalpanel #return str(cleanedpanel.to_html()) totalpanel = cleanedpanel if int(thisyear) <= 0: thisyear = totalpanel.columns[-2] result = '' original = {} if thisyear: if switch == 'historical': geocoder = historical if switch == 'hist': geocoder = historical else: geocoder = modern result = 'Country,' for handle in handles: result = result + str(metadata[handle]['title']) + ',' result = result[:-1] known = {} for code in totalpanel.index: if str(code) not in known: result = result + '\n' + str(geocoder.ix[int(code)][config['webmappercountry']]) for handle in handles: tmpframe = totalpanel.loc[totalpanel['handle'] == handle] try: (thisval, original) = value2scale(tmpframe.ix[code][thisyear], logscale, original) except: thisval = 'NaN' result = result + ',' + str(thisval) known[str(code)] = code return Response(result, mimetype='text/plain') (allyears, notyears) = selectint(cleanedpanel.columns) (codes, notcodes) = selectint(cleanedpanel.index) cleanedpanel.index = codes (header, data, countries, handles, vhandles) = panel2dict(config, cleanedpanel, names) #return str(data) #thisyear = 1882 #return str(countries) #return str(countries) years = [] for year in sorted(data): try: years.append(int(year)) lastyear = year except: skip = 1 # Return only years if request.args.get('showyears'): yearsdata = {} yearsdata['years'] = years yearsdata['latestyear'] = lastyear #yearsdata['data'] = data yearsjson = json.dumps(yearsdata, ensure_ascii=False, sort_keys=True, indent=4) return Response(yearsjson, mimetype='application/json') return Response(result, mimetype='text/plain')
def treemapweb(): (thisyear, datafilter, yearmin, lastyear, handles) = (0, {}, 1500, 2010, []) (action, switch, geodataset) = ('', 'modern', '') config = configuration() datafilter['startyear'] = yearmin datafilter['endyear'] = lastyear datafilter['ctrlist'] = '' handle = '' if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) handle = pids[0] handles.append(handle) except: handles.append(handledataset) nopanel = 'yes' if request.args.get('face'): handle = request.args.get('face') handles.append(handle) if request.args.get('year'): thisyear = request.args.get('year') if request.args.get('action'): action = request.args.get('action') if request.args.get('ctrlist'): datafilter['ctrlist'] = request.args.get('ctrlist') if int(thisyear) > 0: datafilter['startyear'] = int(thisyear) datafilter['endyear'] = int(thisyear) if request.args.get('historical'): switch = 'historical' # Geocoder (classification, geodataset, title, units) = content2dataframe(config, config['geocoderhandle']) #(modern, historical) = loadgeocoder(config, geodataset, 'geocoder') (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical class1 = switch # Loading dataset in dataframe try: (class1, dataset, title, units) = content2dataframe(config, handle) except: return 'No dataset ' + handle (cfilter, notint) = selectint(activeindex.values) (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, panel) = ({}, []) # Show only available years if action == 'showyears': years = [] datafilter['startyear'] = yearmin datafilter['endyear'] = lastyear (datasubset, ctrlist) = datasetfilter(maindata[handles[0]], datafilter) # Remove years without any values if not datafilter['ctrlist']: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) for colyear in datasubset.columns: if datasubset[colyear].count() == 0: datasubset = datasubset.drop(colyear, axis=1) (years, notyears) = selectint(datasubset.columns) # YEARS return Response(json.dumps(years), mimetype='application/json') # Process all indicators for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) if not datasubset.empty: #datasubset = datasubset.dropna(how='all') if not datafilter['ctrlist']: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) panel.append(datasubset) subsets[handle] = datasubset maindata = subsets[handles[0]] treemapdata = buildtreemap(config, maindata, switch, cfilter, coder) return Response(treemapdata, mimetype='application/json')
def tableapi(): # years in filter config = configuration() switch = 'modern' datafilter = {} datafilter['ctrlist'] = '' customyear = '' fromyear = '1500' datafilter['startyear'] = fromyear toyear = '2012' datafilter['endyear'] = toyear customcountrycodes = '' (aggr, logscale, dataset, handles) = ('','','',[]) # Select countries f = request.args for key in f.keys(): if key == 'loc': for value in sorted(f.getlist(key)): if value: customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] #handle = "F16UDU" # HANDLE if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) handles.append(pids[0]) except: handles.append(handledataset) nopanel = 'yes' if request.args.get('dataset'): dataset = request.args.get('dataset') if request.args.get('hist'): switch = 'historical' if request.args.get('ctrlist'): customcountrycodes = '' tmpcustomcountrycodes = request.args.get('ctrlist') c = tmpcustomcountrycodes.split(',') for ids in sorted(c): if ids: customcountrycodes = str(customcountrycodes) + str(ids) + ',' customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if not customcountrycodes: customcountrycodes = '528' if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('aggr'): aggr = request.args.get('aggr') # Log scales switch if request.args.get('logscale'): logscale = request.args.get('logscale') DEBUG = 0 old = '' if old: apifile = str(dataset) + ".json" jsonapi = config['apiroot'] + "/collabs/static/data/" + apifile dataframe = load_api_data(jsonapi, '') loccodes = loadcodes(dataframe) (ctr, header) = countryset(customcountrycodes, loccodes) indicator = '' (frame, years, values, dates, original) = createframe(indicator, loccodes, dataframe, customyear, fromyear, toyear, ctr, logscale, DEBUG) names = ['indicator', 'm', 'ctrcode', 'country', 'year', 'intcode', 'value', 'id'] (csvdata, aggrdata) = combinedata(ctr, frame, loccodes) # New version is fast else: (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, panel) = ({}, []) for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) if not datasubset.empty: datasubset = datasubset.dropna(how='all') panel.append(datasubset) subsets[handle] = datasubset classification = modern if switch == 'historical': classification = historical (csvdata, aggrdata) = dataset_to_csv(config, subsets[handles[0]], classification) if aggr: csvdata = aggrdata return (csvdata, aggrdata)
def downloadzip(pid): DEBUG = 0 (fullpath) = ('') fullmetadata = {} logscale = 0 config = configuration() config['remote'] = 'on' API_TOKEN = config['key'] HOSTNAME = config['dataverseroot'] cmd = "--insecure -u " + API_TOKEN + ": " + HOSTNAME + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' customyear = '' fromyear = request.args.get('y[min]') toyear = request.args.get('y[max]') historical = request.args.get('type[0]') (handles, pidslist) = pidfrompanel(pid) try: if pidslist: fullmetadata = load_fullmetadata(pidslist) except: showwarning = 1 # Log scales switch if request.args.get('logscale'): logscale = 1 # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] if handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' (classification, geodataset, title, units) = content2dataframe(config, config['geocoderhandle']) #geocoder = buildgeocoder(dataset, config) (modern, historical) = loadgeocoder(config, dataset, 'geocoder') for handle in handles: #if remote: # (class1, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) #(cfilter, notint) = selectint(activeindex.values) #(moderndata, historicaldata) = loadgeocoder(dataset, '') # CHANGE #return str(dataset.index) (header, panelcells, codes, datahub, data, handle2ind, unit2ind, original) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) filename = filename + '.xls' fullpath = panel2excel(finaldir, filename, header, panelcells, fullmetadata) else: # Clio format download zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, pid, tmpdir, arc, finaldir) (alonepid, revid, cliohandle, clearpid) = findpid(pid) if alonepid: handles = [ clearpid ] for pid in handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' filename = filename + '.xls' # 2DEBUG (header, panelcells, codes, datahub, data, handle2ind, unit2ind, originalvalues) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) #codes = hist #return str(fullmetadata) metadata = fullmetadata result = individual_dataset(finaldir, filename, handle2ind[pid], unit2ind[pid], datahub, data[pid], codes, metadata) try: for everypid in handles: # Download papers zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, everypid, tmpdir, arc, finaldir) except: nopapers = 1 compile2zip(finaldir, arc) filename = arc return filename
def download(): (classification, pid, root, switch, datafile) = ('modern', '', '', 'modern', '') handle = '' config = configuration() cmd = "--insecure -u " + config['key'] + ": " + config[ 'dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" config['remote'] = '' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' if request.args.get('handle'): handle = request.args.get('handle') if request.args.get('type[0]') == 'historical': classification = request.args.get('type[0]') switch = classification if request.args.get('y[min]'): datafilter['startyear'] = request.args.get('y[min]') if request.args.get('y[max]'): datafilter['endyear'] = request.args.get('y[max]') # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('ctrlist'): datafilter['ctrlist'] = request.args.get('ctrlist') if request.args.get('pid'): pid = request.args.get('pid') ispanel = '' try: (pids, pidslist) = pidfrompanel(pid) handles = pids handle = pids[0] match = re.match(r'Panel\[(.+)\]', pid) if match: ispanel = 'yes' except: handles = pid handle = pids[0] if ispanel: dirforzip = '' for handle in handles: dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) (header, panelcells, metadata, totalpanel) = build_panel(config, switch, handles, datafilter) filename = "paneldata.xlsx" metadata = [] datadir = config['webtest'] localoutfile = panel2excel(dirforzip, filename, header, panelcells, metadata) arc = 'dataarchive.zip' compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) return redirect(root, code=301) if classification: outfile = "clioinfra.xlsx" dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) #fullpath = config['webtest'] + "/" + str(outfile) fullpath = dirforzip # Check selection isselection = 'yes' if datafilter['startyear'] == '1500': if datafilter['ctrlist'] == '': isselection = 'yes' if isselection: (datafile, outfilefinal, finalsubset) = dataframe_compiler(config, fullpath, handle, classification, datafilter) #return datafile.to_html() else: # Copy original dataset source = os.listdir(tmpdir) for excelfile in source: shutil.copy(tmpdir + '/' + excelfile, dirforzip) #return outfilefinal arc = 'dataarchive.zip' if datafile: arc = "%s_%s.zip" % (datafile, switch) compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) #root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile) return redirect(root, code=301) else: zipfile = downloadzip(pid) # CHANGE #return zipfile # DEBUG1 root = config['apiroot'] + "/collabs/static/tmp/" + zipfile # HTML #resp = make_response(render_template('progress.html', download=root)) #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>" #return resp return redirect(root, code=301)
def mapslider(): (title, steps, customcountrycodes, fromyear, toyear, customyear, catmax, histo) = ('', 0, '', '1500', '2012', '', 6, '') config = configuration() datafilter = {} datafilter['ctrlist'] = '' datafilter['startyear'] = fromyear datafilter['endyear'] = toyear if config['error']: return config['error'] handleface = '' urlmatch = re.search(r'(.+)\&face', request.url) try: if urlmatch.group(0): thismapurl = urlmatch.group(1) except: thismapurl = request.url thismapurl = thismapurl.replace('http://', 'https://') geocoder = '' pids = [] handledataset = '' logscale = 0 handles = [] datahub = {} dataset = '' warning = '' hist = {} if request.args.get('ctrlist'): customcountrycodes = '' tmpcustomcountrycodes = request.args.get('ctrlist') c = tmpcustomcountrycodes.split(',') for ids in sorted(c): if ids: customcountrycodes = str(customcountrycodes) + str(ids) + ',' customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('dataset'): dataset = request.args.get('dataset') handles.append(dataset) if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) except: nopanel = 'yes' handlestring = request.args.get('handle') ishandle = re.search(r'(hdl:\d+\/\w+)', handlestring) if ishandle: handle = ishandle.group(1) handle = handle.replace("'", "") else: handle = handlestring (dataset, revid, cliopid, clearpid) = findpid(handle) #handles.append(dataset) handles.append(handle) handleface = handle if request.args.get('logscale'): logscale = 1 if request.args.get('catmax'): catmax = request.args.get('catmax') if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('geocoder'): geocoder = request.args.get('geocoder') if request.args.get('hist'): geocoder = request.args.get('hist') histo = 'on' if request.args.get('face'): handleface = request.args.get('face') if handleface: handles = [] handle = handleface handles.append(handleface) try: pids.remove(handleface) except: nothing = 1 historical = 0 hubyears = [] if config: switch = 'modern' if histo: switch = 'historical' (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (hubyears, notyears) = selectint(origdata.columns) title = metadata[handles[0]]['title'] for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) datasubset['handle'] = handle if not datasubset.empty: datasubset = datasubset.dropna(how='all') (allyears, notyears) = selectint(datasubset.columns) for year in datasubset: if datasubset[year].count() == 0: datasubset = datasubset.drop(year, axis=1) (hubyears, notyears) = selectint(datasubset.columns) validyears = [] lastyear = '' for year in sorted(hubyears): validyears.append(year) lastyear = year steps = steps + 1 handledict = {} if pids: hquery = formdatasetquery(pids,'') d = readdatasets('datasets', json.loads(hquery)) for x in d: thishandle = x['handle'] handledict[thishandle] = x['title'] #validyears.reverse() return make_response(render_template('mapslider.html', handle=handle, years=validyears, warning=warning, steps=steps, title=title, geocoder=histo, dataset=dataset, customcountrycodes=customcountrycodes, catmax=catmax, lastyear=lastyear, indicators=pids, thismapurl=thismapurl, handledict=handledict))
def tableapi(): # years in filter config = configuration() switch = 'modern' datafilter = {} datafilter['ctrlist'] = '' customyear = '' fromyear = '1500' datafilter['startyear'] = fromyear toyear = '2012' datafilter['endyear'] = toyear customcountrycodes = '' (aggr, logscale, dataset, handles) = ('', '', '', []) # Select countries f = request.args for key in f.keys(): if key == 'loc': for value in sorted(f.getlist(key)): if value: customcountrycodes = str(customcountrycodes) + str( value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] #handle = "F16UDU" # HANDLE if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) handles.append(pids[0]) except: handles.append(handledataset) nopanel = 'yes' if request.args.get('dataset'): dataset = request.args.get('dataset') if request.args.get('hist'): switch = 'historical' if request.args.get('ctrlist'): customcountrycodes = '' tmpcustomcountrycodes = request.args.get('ctrlist') c = tmpcustomcountrycodes.split(',') for ids in sorted(c): if ids: customcountrycodes = str(customcountrycodes) + str(ids) + ',' customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if not customcountrycodes: customcountrycodes = '528' if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('aggr'): aggr = request.args.get('aggr') # Log scales switch if request.args.get('logscale'): logscale = request.args.get('logscale') DEBUG = 0 old = '' if old: apifile = str(dataset) + ".json" jsonapi = config['apiroot'] + "/collabs/static/data/" + apifile dataframe = load_api_data(jsonapi, '') loccodes = loadcodes(dataframe) (ctr, header) = countryset(customcountrycodes, loccodes) indicator = '' (frame, years, values, dates, original) = createframe(indicator, loccodes, dataframe, customyear, fromyear, toyear, ctr, logscale, DEBUG) names = [ 'indicator', 'm', 'ctrcode', 'country', 'year', 'intcode', 'value', 'id' ] (csvdata, aggrdata) = combinedata(ctr, frame, loccodes) # New version is fast else: (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, panel) = ({}, []) for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) if not datasubset.empty: datasubset = datasubset.dropna(how='all') panel.append(datasubset) subsets[handle] = datasubset classification = modern if switch == 'historical': classification = historical (csvdata, aggrdata) = dataset_to_csv(config, subsets[handles[0]], classification) if aggr: csvdata = aggrdata return (csvdata, aggrdata)
def downloadzip(pid): DEBUG = 0 (fullpath) = ('') fullmetadata = {} logscale = 0 config = configuration() config['remote'] = 'on' API_TOKEN = config['key'] HOSTNAME = config['dataverseroot'] cmd = "--insecure -u " + API_TOKEN + ": " + HOSTNAME + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' customyear = '' fromyear = request.args.get('y[min]') toyear = request.args.get('y[max]') historical = request.args.get('type[0]') (handles, pidslist) = pidfrompanel(pid) try: if pidslist: fullmetadata = load_fullmetadata(pidslist) except: showwarning = 1 # Log scales switch if request.args.get('logscale'): logscale = 1 # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] if handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' (classification, geodataset, title, units) = content2dataframe(config, config['geocoderhandle']) #geocoder = buildgeocoder(dataset, config) (modern, historical) = loadgeocoder(config, dataset, 'geocoder') for handle in handles: #if remote: # (class1, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) #(cfilter, notint) = selectint(activeindex.values) #(moderndata, historicaldata) = loadgeocoder(dataset, '') # CHANGE #return str(dataset.index) (header, panelcells, codes, datahub, data, handle2ind, unit2ind, original) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) filename = filename + '.xls' fullpath = panel2excel(finaldir, filename, header, panelcells, fullmetadata) else: # Clio format download zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, pid, tmpdir, arc, finaldir) (alonepid, revid, cliohandle, clearpid) = findpid(pid) if alonepid: handles = [clearpid] for pid in handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' filename = filename + '.xls' # 2DEBUG (header, panelcells, codes, datahub, data, handle2ind, unit2ind, originalvalues) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) #codes = hist #return str(fullmetadata) metadata = fullmetadata result = individual_dataset(finaldir, filename, handle2ind[pid], unit2ind[pid], datahub, data[pid], codes, metadata) try: for everypid in handles: # Download papers zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, everypid, tmpdir, arc, finaldir) except: nopapers = 1 compile2zip(finaldir, arc) filename = arc return filename
def chartlib(): (thismapurl, apilink, ctrlist, title, units, switch, hist) = ('', '', '', 'Title', 'Units', 'modern', '') handleface = [] config = configuration() ctrlist = config['ctrlist'] if config['error']: return config['error'] urlmatch = re.search(r'(.+)\&face', request.url) try: if urlmatch.group(0): thismapurl = urlmatch.group(1) except: thismapurl = request.url if 'sandbox' not in thismapurl: thismapurl = thismapurl.replace('http://', 'https://') handles = [] showpanel = 'yes' try: if request.args.get('print'): showpanel = '' except: showpanel = 'yes' f = request.args handle = '' for q in f: value = f[q] if value: handle = str(handle) + '&' + str(q) + '=' + str(f[q]) if request.args.get('ctrlist'): ctrlist = request.args.get('ctrlist') if request.args.get('hist'): switch = 'historical' hist = request.args.get('hist') if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) handles.append(pids[0]) except: handles.append(handledataset) nopanel = 'yes' if pids[0]: apilink = "/api/tabledata?handle=" + str(pids[0]) if ctrlist: apilink = apilink + '&ctrlist=' + ctrlist if request.args.get('hist'): apilink = apilink + '&hist=' + hist if request.args.get('face'): handles = [] handleface = request.args.get('face') handles.append(handleface) if handleface: apilink = "/api/tabledata?handle=" + str(handleface) if ctrlist: apilink = apilink + '&ctrlist=' + ctrlist if request.args.get('hist'): apilink = apilink + '&hist=' + hist try: pids.remove(handleface) except: nothing = 1 if set(pids) == set(handles): pids[:] = [] links = graphlinks('&face=' + str(handles[0]) + '&hist=' + hist) (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') # vty hist (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) try: title = metadata[handles[0]]['title'] units = metadata[handles[0]]['units'] except: skip = 0 handledict = {} if handles: handle = handles[0] if pids: try: if handles[1]: pids.remove(handles[0]) except: skip = 1 hquery = formdatasetquery(pids,'') d = readdatasets('datasets', json.loads(hquery)) for x in d: thishandle = x['handle'] if thishandle != handle: handledict[thishandle] = x['title'] resp = make_response(render_template('chartlib.html', thismapurl=thismapurl, indicators=handledict, apilink=apilink, title=title, units=units, showpanel=showpanel, handle=handle, chartlib=links['chartlib'], barlib=links['barlib'], panellib=links['panellib'], treemaplib=links['treemaplib'])) return resp
def download(): (classification, pid, root, switch, datafile) = ('modern', '', '', 'modern', '') handle = '' config = configuration() cmd = "--insecure -u " + config['key'] + ": " + config['dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" config['remote'] = '' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' if request.args.get('handle'): handle = request.args.get('handle') if request.args.get('type[0]') == 'historical': classification = request.args.get('type[0]') switch = classification if request.args.get('y[min]'): datafilter['startyear'] = request.args.get('y[min]') if request.args.get('y[max]'): datafilter['endyear'] = request.args.get('y[max]') # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('ctrlist'): datafilter['ctrlist'] = request.args.get('ctrlist') if request.args.get('pid'): pid = request.args.get('pid') ispanel = '' try: (pids, pidslist) = pidfrompanel(pid) handles = pids handle = pids[0] match = re.match(r'Panel\[(.+)\]', pid) if match: ispanel = 'yes' except: handles = pid handle = pids[0] if ispanel: dirforzip = '' for handle in handles: dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) (header, panelcells, metadata, totalpanel) = build_panel(config, switch, handles, datafilter) filename = "paneldata.xlsx" metadata = [] datadir = config['webtest'] localoutfile = panel2excel(dirforzip, filename, header, panelcells, metadata) arc = 'dataarchive.zip' compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) return redirect(root, code=301) if classification: outfile = "clioinfra.xlsx" dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) #fullpath = config['webtest'] + "/" + str(outfile) fullpath = dirforzip # Check selection isselection = 'yes' if datafilter['startyear'] == '1500': if datafilter['ctrlist'] == '': isselection = 'yes' if isselection: (datafile, outfilefinal, finalsubset) = dataframe_compiler(config, fullpath, handle, classification, datafilter) #return datafile.to_html() else: # Copy original dataset source = os.listdir(tmpdir) for excelfile in source: shutil.copy(tmpdir + '/' + excelfile, dirforzip) #return outfilefinal arc = 'dataarchive.zip' if datafile: arc = "%s_%s.zip" % (datafile, switch) compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) #root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile) return redirect(root, code=301) else: zipfile = downloadzip(pid) # CHANGE #return zipfile # DEBUG1 root = config['apiroot'] + "/collabs/static/tmp/" + zipfile # HTML #resp = make_response(render_template('progress.html', download=root)) #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>" #return resp return redirect(root, code=301)
def panel(): (thisyear, datafilter, handle, yearmin, yearmax, thisyear, ctrlist, lastyear, logscale) = (0, {}, '', '1500', '2020', 1950, '', 2010, '') handles = [] config = configuration() datafilter['startyear'] = yearmin datafilter['endyear'] = lastyear datafilter['ctrlist'] = config['ctrlist'] #modern = moderncodes(config['modernnames'], config['apiroot']) if request.args.get('handle'): handle = str(request.args.get('handle')) handle = handle.replace(" ", "") handle = handle.replace("'", "") try: (pids, pidslist) = pidfrompanel(handle) handles = pids except: nopanel = 'yes' handles.append(handle) if request.args.get('face'): facehandle = request.args.get('face') if facehandle not in handles: handles.append(facehandle) if request.args.get('dataset'): dataset = request.args.get('dataset') if request.args.get('ctrlist'): customcountrycodes = '' ctrlist = request.args.get('ctrlist') datafilter['ctrlist'] = ctrlist if request.args.get('logscale'): logscale = request.args.get('logscale') if request.args.get('year'): thisyear = request.args.get('year') datafilter['startyear'] = int(thisyear) datafilter['endyear'] = int(thisyear) if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('hist'): switch = 'historical' if datafilter['ctrlist'] == '': datafilter['ctrlist'] = config['histctrlist'] else: switch = 'modern' (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, subsetyears, panel) = ({}, [], []) for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) datasubset['handle'] = handle if not datasubset.empty: datasubset = datasubset.dropna(how='all') try: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) except: skip = 'yes' for year in datasubset: if datasubset[year].count() == 0: datasubset = datasubset.drop(year, axis=1) (datayears, notyears) = selectint(datasubset.columns) panel.append(datasubset) subsets[handle] = datasubset subsetyears.append(datayears) dataframe = subsets ctrlimit = 10 # Trying to find the best year with most filled data values try: bestyearlist = subsetyears[0] for i in range(1, len(subsetyears)): bestyearlist = list(set(bestyearlist) & set(subsetyears[i])) #bestyearlist = bestyearlist.sort() thisyear = bestyearlist[0] except: bestyearlist = [] allcodes = {} panel = [] names = {} for handle in dataframe: try: names[handle] = metadata[handle]['title'] except: names[handle] = 'title' try: #(dataset, codes) = paneldatafilter(dataframe[handle], int(yearmin), int(yearmax), ctrlist, handle) dataset = dataframe[handle] if not dataset.empty: panel.append(dataset) except: nodata = 0 if panel: totalpanel = pd.concat(panel) cleanedpanel = totalpanel.dropna(axis=1, how='any') cleanedpanel = totalpanel #return str(cleanedpanel.to_html()) totalpanel = cleanedpanel if int(thisyear) <= 0: thisyear = totalpanel.columns[-2] result = '' original = {} if thisyear: if switch == 'historical': geocoder = historical if switch == 'hist': geocoder = historical else: geocoder = modern result = 'Country,' for handle in handles: result = result + str(metadata[handle]['title']) + ',' result = result[:-1] known = {} for code in totalpanel.index: if str(code) not in known: result = result + '\n' + str( geocoder.ix[int(code)][config['webmappercountry']]) for handle in handles: tmpframe = totalpanel.loc[totalpanel['handle'] == handle] try: (thisval, original) = value2scale(tmpframe.ix[code][thisyear], logscale, original) except: thisval = 'NaN' result = result + ',' + str(thisval) known[str(code)] = code return Response(result, mimetype='text/plain') (allyears, notyears) = selectint(cleanedpanel.columns) (codes, notcodes) = selectint(cleanedpanel.index) cleanedpanel.index = codes (header, data, countries, handles, vhandles) = panel2dict(config, cleanedpanel, names) #return str(data) #thisyear = 1882 #return str(countries) #return str(countries) years = [] for year in sorted(data): try: years.append(int(year)) lastyear = year except: skip = 1 # Return only years if request.args.get('showyears'): yearsdata = {} yearsdata['years'] = years yearsdata['latestyear'] = lastyear #yearsdata['data'] = data yearsjson = json.dumps(yearsdata, ensure_ascii=False, sort_keys=True, indent=4) return Response(yearsjson, mimetype='application/json') return Response(result, mimetype='text/plain')
def statistics(settings=''): datafilter = {} (yearmin, yearmax, ctrlist, histo) = ('1500', '2020', '', '') datafilter['startyear'] = yearmin datafilter['endyear'] = yearmax datafilter['ctrlist'] = '' config = configuration() if config['error']: return config['error'] handles = [] if request.args.get('handle'): handledataset = request.args.get('handle') handledataset = handledataset.replace(" ", '') panelcheck = re.search(r'Panel', handledataset) if not panelcheck: handles.append(handledataset) handledataset = "Panel[" + handledataset + "]" else: (handles, pidslist) = pidfrompanel(handledataset) if request.args.get('dataset'): dataset = request.args.get('dataset') handles.append(dataset) if request.args.get('hist'): histo = 'on' if request.args.get('yearmin'): yearmin = request.args.get('yearmin') datafilter['startyear'] = yearmin if request.args.get('yearmax'): yearmax = request.args.get('yearmax') datafilter['endyear'] = yearmax if request.args.get('ctrlist'): ctrlist = request.args.get('ctrlist') datafilter['ctrlist'] = ctrlist old = '' (names, cleanedpanel) = ({}, []) for handle in handles: names[handle] = str(handle) if old: modern = moderncodes(config['modernnames'], config['apiroot']) jsonapi = config['apiroot'] + '/api/datasets?handle=' + str(handledataset) (panel, cleanedpanel, names) = loadpanel(jsonapi, yearmin, yearmax, ctrlist) else: if histo: switch = 'historical' loadgeo = 'geocoder' else: switch = 'modern' loadgeo = '' geolist = {} (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, loadgeo) (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (subsets, panel) = ({}, []) for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) #datasubset['handle'] = handle meta = metadata[handle] names[handle] = meta['title'] # Try to remove years columns try: if np.nan in datasubset.index: datasubset = datasubset.drop(np.nan, axis=0) if str(np.nan) in datasubset.columns: datasubset = datasubset.drop(np.nan, axis=1) except: skip = 'yes' # Try to remove index columns try: if config['webmapperoecd'] in datasubset.index: datasubset = datasubset.drop(config['webmapperoecd'], axis=0) except: skip = 'yes' if not datasubset.empty: datasubset['handle'] = handle panel.append(datasubset) subsets[handle] = datasubset cleanedpanel = pd.concat(panel) #(header, data, countries, handles, vhandles) = advpanel2dict(cleanedpanel) #data = advpanel2dict(cleanedpanel) #return data.to_html() ctrlimit = 200 data = handle2statistics(handles, cleanedpanel, names) showhtml = statistics2table(data) return showhtml