import sys import os import re import simplejson import pandas as pd from datetime import datetime sys.path.append( os.path.abspath(os.path.join(os.path.dirname("__file__"), '../modules'))) from config import configuration, dataverse2indicators, load_dataverse, findpid, load_metadata from datacompiler import dataframe_compiler config = configuration() config['remote'] = 'on' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' #datafilter['ctrlist'] = '528,14,18,67' handle = 'hdl:10622/SO62N5' switch = 'historical' fullpath = '/home/dpe/tmp' a = datetime.now() (filetitle, fullpath, finalsubset) = dataframe_compiler(config, fullpath, handle, switch, datafilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" #print finalsubset.ix[67][1831]
def download(): (classification, pid, root, switch, datafile) = ('modern', '', '', 'modern', '') handle = '' config = configuration() cmd = "--insecure -u " + config['key'] + ": " + config[ 'dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" config['remote'] = '' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' if request.args.get('handle'): handle = request.args.get('handle') if request.args.get('type[0]') == 'historical': classification = request.args.get('type[0]') switch = classification if request.args.get('y[min]'): datafilter['startyear'] = request.args.get('y[min]') if request.args.get('y[max]'): datafilter['endyear'] = request.args.get('y[max]') # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('ctrlist'): datafilter['ctrlist'] = request.args.get('ctrlist') if request.args.get('pid'): pid = request.args.get('pid') ispanel = '' try: (pids, pidslist) = pidfrompanel(pid) handles = pids handle = pids[0] match = re.match(r'Panel\[(.+)\]', pid) if match: ispanel = 'yes' except: handles = pid handle = pids[0] if ispanel: dirforzip = '' for handle in handles: dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) (header, panelcells, metadata, totalpanel) = build_panel(config, switch, handles, datafilter) filename = "paneldata.xlsx" metadata = [] datadir = config['webtest'] localoutfile = panel2excel(dirforzip, filename, header, panelcells, metadata) arc = 'dataarchive.zip' compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) return redirect(root, code=301) if classification: outfile = "clioinfra.xlsx" dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) #fullpath = config['webtest'] + "/" + str(outfile) fullpath = dirforzip # Check selection isselection = 'yes' if datafilter['startyear'] == '1500': if datafilter['ctrlist'] == '': isselection = 'yes' if isselection: (datafile, outfilefinal, finalsubset) = dataframe_compiler(config, fullpath, handle, classification, datafilter) #return datafile.to_html() else: # Copy original dataset source = os.listdir(tmpdir) for excelfile in source: shutil.copy(tmpdir + '/' + excelfile, dirforzip) #return outfilefinal arc = 'dataarchive.zip' if datafile: arc = "%s_%s.zip" % (datafile, switch) compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) #root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile) return redirect(root, code=301) else: zipfile = downloadzip(pid) # CHANGE #return zipfile # DEBUG1 root = config['apiroot'] + "/collabs/static/tmp/" + zipfile # HTML #resp = make_response(render_template('progress.html', download=root)) #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>" #return resp return redirect(root, code=301)
def download(): (classification, pid, root, switch, datafile) = ('modern', '', '', 'modern', '') handle = '' config = configuration() cmd = "--insecure -u " + config['key'] + ": " + config['dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" config['remote'] = '' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' if request.args.get('handle'): handle = request.args.get('handle') if request.args.get('type[0]') == 'historical': classification = request.args.get('type[0]') switch = classification if request.args.get('y[min]'): datafilter['startyear'] = request.args.get('y[min]') if request.args.get('y[max]'): datafilter['endyear'] = request.args.get('y[max]') # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('ctrlist'): datafilter['ctrlist'] = request.args.get('ctrlist') if request.args.get('pid'): pid = request.args.get('pid') ispanel = '' try: (pids, pidslist) = pidfrompanel(pid) handles = pids handle = pids[0] match = re.match(r'Panel\[(.+)\]', pid) if match: ispanel = 'yes' except: handles = pid handle = pids[0] if ispanel: dirforzip = '' for handle in handles: dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) (header, panelcells, metadata, totalpanel) = build_panel(config, switch, handles, datafilter) filename = "paneldata.xlsx" metadata = [] datadir = config['webtest'] localoutfile = panel2excel(dirforzip, filename, header, panelcells, metadata) arc = 'dataarchive.zip' compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) return redirect(root, code=301) if classification: outfile = "clioinfra.xlsx" dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir) #fullpath = config['webtest'] + "/" + str(outfile) fullpath = dirforzip # Check selection isselection = 'yes' if datafilter['startyear'] == '1500': if datafilter['ctrlist'] == '': isselection = 'yes' if isselection: (datafile, outfilefinal, finalsubset) = dataframe_compiler(config, fullpath, handle, classification, datafilter) #return datafile.to_html() else: # Copy original dataset source = os.listdir(tmpdir) for excelfile in source: shutil.copy(tmpdir + '/' + excelfile, dirforzip) #return outfilefinal arc = 'dataarchive.zip' if datafile: arc = "%s_%s.zip" % (datafile, switch) compile2zip(dirforzip, arc) root = config['apiroot'] + "/collabs/static/tmp/" + str(arc) #root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile) return redirect(root, code=301) else: zipfile = downloadzip(pid) # CHANGE #return zipfile # DEBUG1 root = config['apiroot'] + "/collabs/static/tmp/" + zipfile # HTML #resp = make_response(render_template('progress.html', download=root)) #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>" #return resp return redirect(root, code=301)
#!/usr/bin/python import json import sys import os import re import simplejson import pandas as pd from datetime import datetime sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), '../modules'))) from config import configuration, dataverse2indicators, load_dataverse, findpid, load_metadata from datacompiler import dataframe_compiler config = configuration() config['remote'] = 'on' datafilter = {} datafilter['startyear'] = '1500' datafilter['endyear'] = '2010' datafilter['ctrlist'] = '' #datafilter['ctrlist'] = '528,14,18,67' handle = 'hdl:10622/SO62N5' switch = 'historical' fullpath = '/home/dpe/tmp' a = datetime.now() (filetitle, fullpath, finalsubset) = dataframe_compiler(config, fullpath, handle, switch, datafilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" #print finalsubset.ix[67][1831]