def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:], "H:r:d:k:D:") except getopt.GetoptError as e: print(str(e)) print( "Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle = a if o == '-r': rhandle = a if o == '-d': dataverse = a if o == '-k': customkey = a if o == '-D': customdv = a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname + "/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) #try: if pid: handle = pid jsonfile = dataextractor(fullpath, path, pid, fileID) if jsonfile: title = 'Test' datasetadd(jsonfile, clearpid, handle, title) print handle print clearpid
def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:], "H:r:d:k:D:") except getopt.GetoptError as e: print(str(e)) print( "Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle = a if o == '-r': rhandle = a if o == '-d': dataverse = a if o == '-k': customkey = a if o == '-D': customdv = a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] title = 'Title' units = 'Units' if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname + "/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) (jsonfile, csvfile) = ('', '') #try: if pid: handle = pid try: (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) except: resultfile = config['tmpdir'] + "/" + fileID (jsonfile, csvfile, tmptitle, tmpunits) = excelvalidator(config['phantompath'], fullpath, resultfile, config['tmpdir']) if jsonfile: remove = removedata('datasets', 'handle', clearpid) try: title = str(tmptitle) units = str(tmpunits) except: donothing = 1 print "ADD " + str(jsonfile) datasetadd(jsonfile, csvfile, clearpid, handle, title, units, datasetID) print handle print clearpid print datasetID
icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile for ffile in onlyfiles: pid = 'clio' fullpath = mypath + '/' + ffile isexcel = re.match('(.+)\.xls', ffile) if isexcel: newfile = isexcel.group(1) + "-historical.xlsx" fulloutfile = outdir + "/" + newfile print fulloutfile pid = newfile handle = pid print handle (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) (dataset, title, units) = compiledataset(csvfile) switch = 'historical' (maindata, metadata, coder) = geocoding(switch, dataset, modern, historical) (moderndata, historicaldata) = loadgeocoder(config, maindata, '') maindata = conversion(maindata, moderndata, historicaldata) print metadata[handle]['title'] outfile = store_dataset(fulloutfile, maindata, metadata, coder) print outfile
if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile for ffile in onlyfiles: pid = 'clio' fullpath = mypath + '/' + ffile isexcel = re.match('(.+)\.xls', ffile) if isexcel: newfile = isexcel.group(1) + "-historical.xlsx" fulloutfile = outdir + "/" + newfile print fulloutfile pid = newfile handle = pid print handle (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) (dataset, title, units) = compiledataset(csvfile) switch = 'historical' (maindata, metadata, coder) = geocoding(switch, dataset, modern, historical) (moderndata, historicaldata) = loadgeocoder(config, maindata, '') maindata = conversion(maindata, moderndata, historicaldata) print metadata[handle]['title'] outfile = store_dataset(fulloutfile, maindata, metadata, coder) print outfile
def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:],"H:r:d:k:D:") except getopt.GetoptError as e: print (str(e)) print("Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle=a if o == '-r': rhandle=a if o == '-d': dataverse=a if o == '-k': customkey=a if o == '-D': customdv=a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] title = 'Title' units = 'Units' if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname +"/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) (jsonfile, csvfile) =('', '') #try: if pid: handle = pid try: (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) except: resultfile = config['tmpdir'] + "/" + fileID (jsonfile, csvfile, tmptitle, tmpunits) = excelvalidator(config['phantompath'], fullpath, resultfile, config['tmpdir']) if jsonfile: remove = removedata('datasets', 'handle', clearpid) try: title = str(tmptitle) units = str(tmpunits) except: donothing = 1 print "ADD " + str(jsonfile) datasetadd(jsonfile, csvfile, clearpid, handle, title, units, datasetID) print handle print clearpid print datasetID