def usgs_get_sitedata(sites, type='instantaneous', params="{'format':'json'}", data_provider='USGS'): dcommons = datacommons.toolkit(username, password) records = dcommons.get_data('ows', {'spec': {'data_provider': data_provider}, 'fields': ['sources']}) sources = records[0]['sources'] result = {} #http://waterservices.usgs.gov/nwis/iv/?format=json&sites=07230000&period=P1D¶meterCd=00060,00065 #http://waterservices.usgs.gov/nwis/iv/?format=json&sites=07230000&startDT=2013-02-01&endDT=2013-02-22¶meterCd=00060,00065 for source, val in sources.items(): #src_url.append(val['url']) #for url in src_url: temp = '' #print params if source == type: param = json.loads(params.replace("'", '"')) for k, v in param.items(): temp = temp + k + "=" + v + '&' url = val['url'] + temp + 'sites=' + sites #print url urlcheck = commands.getoutput("wget --spider '" + url + "' 2>&1| grep 'Remote file exists'") if urlcheck: try: res = urllib2.urlopen(url) if param['format'] == 'rdb': data = res.read() return data else: data = json.loads(res.read()) result[source] = {'url': url, 'data': data} except: pass return json.dumps(result, indent=2)
def owrb_sync_geojson(data_type='groundwater', database=config.owrb_database, tmp_fldr='/data/owrb/', data_provider='OWRB', delete=True): ''' Load OWRB shape files and convert to geojson and store on static web server. Catalog location so avaialbe for applications''' dcommons = datacommons.toolkit(username, password) records = dcommons.get_data('ows', {'spec': {'data_provider': data_provider}}, showids=True) sources = records[0]['sources'] result = {} if not data_type in sources: dt = [] for source in sources: dt.append(source) return json.dumps({'status': 'Error - Unkown data_type', 'available data_types': dt}, indent=2) for source, val in sources[data_type].items(): url = val['url'] res = urllib2.urlopen(url) file_dl = tmp_fldr + source + '.zip' output = open(file_dl, 'wb') output.write(res.read()) output.close() call(['unzip', '-o', file_dl, '-d', tmp_fldr + source]) ows_url = [] for fl in glob(tmp_fldr + source + '/*.shp'): shpfile = fl outfile = shpfile.split('.')[0] + '.json' fname = os.path.basename(outfile) if os.path.exists(outfile): call(['rm', outfile]) call(['ogr2ogr', '-f', 'GeoJSON', '-t_srs', 'EPSG:3857', outfile, shpfile]) call(['scp', outfile, "[email protected]:/static/OklahomaWaterSurvey/OWRB/geojson/" + fname]) data_url = 'http://static.cybercommons.org/OklahomaWaterSurvey/OWRB/geojson/' + fname ows_url.append(data_url) sources[data_type][source]['ows_url'] = ows_url result[source] = {'source': source, 'url': url, 'geojson': ows_url} dcommons.save('ows', records[0]) return json.dumps(result, indent=2)
def save_csv(urls,path,name): dcommons = datacommons.toolkit(username,password) data,ordercol,head = filezip.meso2json(urls) f1=open(os.path.join(path,name),'w') f1.write(filezip.csvfile_processor(data,cols=ordercol,header=head)) f1.close() host=get_host(dcommons) return os.path.join(path.replace(host['base_directory'],host['url']),name)
def save(path, source, data_items=[]): # name,path,query): """Based function to all source imports in Download module""" dcommons = datacommons.toolkit(username, password) consol_data = consolidate(data_items) sourcepath = os.path.join(path, "OWRB", "Monitor_Wells") call(["mkdir", "-p", sourcepath]) urls = [] database = config.owrb_database collection = config.owrb_MonitorWells_collection host = get_host(dcommons) urlbase = host["base_directory"] # owrb_url="http://test.oklahomawatersurvey.org/mongo/db_find/ows/owrb_monitoring_wells/{'spec':{'site':'%s'},'field':['']}/?outtype=csv" meso_url = "http://www.mesonet.org/index.php/meteogram/data/owrb_text//stid/%s/year/%s/month/%s/day/%s/timelen/%sd/product/GH20/type/csv" db = Connection(config.mongo_host) for key, value in consol_data.items(): if value["query"]["webservice_type"] == "mesonet": filename = "OWRB_MonitoringWell_mesonet%s.csv" % (value["query"]["sites"]) sitedata = db[database]["owrb_monitor_sites"].find_one({"WELL_ID": value["query"]["sites"]}) mesosite = sitedata["mesonetID"] start = dateutil.parser.parse(value["query"]["startDT"]) end = dateutil.parser.parse(value["query"]["endDT"]) day_count = (end - start).days + 1 month = end.strftime("%m") year = end.strftime("%Y") day = end.strftime("%d") url = meso_url % (mesosite, year, month, day, day_count) f1 = open(os.path.join(sourcepath, filename), "w") res = urllib2.urlopen(url) f1.write(res.read()) f1.close() urls.append(os.path.join(sourcepath.replace(urlbase, host["url"]), filename)) else: filename = "OWRB_MonitoringWell_%s.csv" % (value["query"]["sites"]) f1 = open(os.path.join(sourcepath, filename), "w") head = "site,date,measurement,unit,status,project\n" f1.write(head) # print head temp_tmpl = "%s,%s,%s,%s,%s,%s\n" for row in db[database][collection].find({"site": value["query"]["sites"]}).sort([("sort_date", -1)]): temp = temp_tmpl % ( row["site"], row["observed_date"], row["value"], row["unit"], row["status"], row["project"], ) f1.write(temp) f1.close() urls.append(os.path.join(sourcepath.replace(urlbase, host["url"]), filename)) return urls
def save_sitedata(name,path,query,data_provider='USGS-Tools-TypeSet',default_format='rdb'): '''Load data from USGS websevice and store local NGINX web server. Returns url of file''' #Load source web service data from metadata catalog dcommons = datacommons.toolkit(username,password) sources = dcommons.get_data('ows',{'spec':{'data_provider':data_provider}})[0] #Get Host information - NGINX root and urls from metadata catalog host=None hosts = dcommons.get_data('ows',{'spec':{'data_provider':'APP_HOSTS'},'fields':['sources']})[0]['sources'] for item in(item for item in hosts if item['host']==os.uname()[1]): host=item if not host: raise Exception('No Host specified, Please upadate Catalog') sites=query['sites'] params=query.copy() if not 'format' in params: params['format'] = default_format #default format params.pop('sites') #Setup metadata web service from data catalog metadata = sources[query['webservice_type']] qtype=query['webservice_type'] if qtype=='qw': params.pop('webservice_type') params.pop('parameterCd') params.pop('format') else: params.pop('webservice_type') temp='' for k,v in params.items(): if temp=='': temp = "%s%s%s" % (k,"=",v) else: temp = "%s%s%s%s%s" % (temp,"&",k,"=",v) #temp= temp + k + "=" + v + '&' if qtype=='qw': url =metadata['webservice'] + temp else: url =metadata['webservice'] + temp + '&sites=' + sites urlcheck = commands.getoutput("wget --spider '" + url + "' 2>&1| grep 'Remote file exists'") if urlcheck: try: res=urllib2.urlopen(url) filename= "%s.txt" % (name) f1=open(os.path.join(path,filename),'w') print url f1.write(res.read()) #print 'after read()' urlbase= host['base_directory'] return os.path.join(path.replace(urlbase ,host['url']),filename) except Exception as inst: raise inst else: raise Exception('URL ERROR: ' + url)
def save_reports(path,query): #Load source web service data from metadata catalog dcommons = datacommons.toolkit(username,password) host = get_host(dcommons) urlbase= host['base_directory'] rpts=query['special'] newpath= '%s/%s' % (path,'reports') call(['mkdir','-p',newpath]) urls=[] for key,val in rpts.items(): call(['wget','-P',newpath,val]) name= val.split('/')[-1] urls.append(os.path.join(path.replace(urlbase , host['url']),name)) return urls
def save_csv(url,path,query): if query['webservice_type']!='ad': dcommons = datacommons.toolkit(username,password) if query['webservice_type']!='qw': data,ordercol,head = filezip.rdb2json(url) else: data,ordercol,head = filezip.rdb2json(url,skip='no') fileName, fileExtension = os.path.splitext( url.split('/')[-1]) fileExtension='.csv' filename= fileName + fileExtension f1=open(os.path.join(path,filename),'w') f1.write(filezip.csvfile_processor(data,cols=ordercol,header=head)) f1.close() host=get_host(dcommons) return os.path.join(path.replace(host['base_directory'],host['url']),filename) return None
def save(path, source, data_items=[]): #name,path,query): '''Based function to all source imports in Download module''' dcommons = datacommons.toolkit(username, password) consol_data = consolidate(data_items) sourcepath = os.path.join(path, 'OWRB', 'Monitor_Wells') call(['mkdir', '-p', sourcepath]) urls = [] database = config.owrb_database collection = config.owrb_well_collection host = get_host(dcommons) urlbase = host['base_directory'] meso_url = "http://www.mesonet.org/index.php/meteogram/data/owrb_text//stid/%s/year/%s/month/%s/day/%s/timelen/%sd/product/GH20/type/csv" db = Connection(config.mongo_host) for key, value in consol_data.items(): if value['query']['webservice_type'] == 'mesonet': filename = 'OWRB_MonitoringWell_mesonet%s.csv' % (value['query']['sites']) sitedata = db[database]['owrb_monitor_sites'].find_one({'WELL_ID': value['query']['sites']}) mesosite = sitedata['mesonetID'] start = dateutil.parser.parse(value['query']['startDT']) end = dateutil.parser.parse(value['query']['endDT']) day_count = (end - start).days + 1 month = end.strftime("%m") year = end.strftime("%Y") day = end.strftime("%d") url = meso_url % (mesosite, year, month, day, day_count) f1 = open(os.path.join(sourcepath, filename), 'w') res = urllib2.urlopen(url) f1.write(res.read()) f1.close() urls.append(os.path.join(sourcepath.replace(urlbase, host['url']), filename)) else: filename = 'OWRB_MonitoringWell_%s.csv' % (value['query']['sites']) f1 = open(os.path.join(sourcepath, filename), 'w') head = "site,date,measurement,unit,status,project\n" f1.write(head) #print head temp_tmpl = "%s,%s,%s,%s,%s,%s\n" for row in db[database][collection].find({'site': value['query']['sites']}).sort([('sort_date', -1), ]): temp = temp_tmpl % ( row['site'], row['observed_date'], row['value'], row['unit'], row['status'], row['project']) f1.write(temp) f1.close() urls.append(os.path.join(sourcepath.replace(urlbase, host['url']), filename)) return urls
def usgs_get_sitedata(sites, type='instantaneous', params="{'format':'json'}", data_provider='USGS'): dcommons = datacommons.toolkit(username, password) records = dcommons.get_data('ows', { 'spec': { 'data_provider': data_provider }, 'fields': ['sources'] }) sources = records[0]['sources'] result = {} #http://waterservices.usgs.gov/nwis/iv/?format=json&sites=07230000&period=P1D¶meterCd=00060,00065 #http://waterservices.usgs.gov/nwis/iv/?format=json&sites=07230000&startDT=2013-02-01&endDT=2013-02-22¶meterCd=00060,00065 for source, val in sources.items(): #src_url.append(val['url']) #for url in src_url: temp = '' #print params if source == type: param = json.loads(params.replace("'", '"')) for k, v in param.items(): temp = temp + k + "=" + v + '&' url = val['url'] + temp + 'sites=' + sites #print url urlcheck = commands.getoutput("wget --spider '" + url + "' 2>&1| grep 'Remote file exists'") if urlcheck: try: res = urllib2.urlopen(url) if param['format'] == 'rdb': data = res.read() return data else: data = json.loads(res.read()) result[source] = {'url': url, 'data': data} except: pass return json.dumps(result, indent=2)
def save(path,source,data_items=[]):#name,path,query): '''Based function to all source imports in Download module''' dcommons = datacommons.toolkit(username,password) counties=consolidate(data_items) sourcepath = os.path.join(path,source) call(['mkdir','-p',sourcepath]) #urls=[] database=config.owrb_database collection=config.owrb_welllog_collection url= "http://test.oklahomawatersurvey.org/mongo/db_find/" + database + "/" + collection + "/{'spec':{'COUNTY':{'$in':" + str(counties).replace("', '","','") + "}}}/?outtype=csv" print url res=urllib2.urlopen(url) filename='OWRB_WellLogs.csv' f1=open(os.path.join(sourcepath,filename),'w') f1.write(res.read()) f1.close() host = get_host(dcommons) urlbase= host['base_directory'] urls=os.path.join(sourcepath.replace(urlbase ,host['url']),filename) print urls return urls
def save_sitedata(path,query): url_tmpl = 'http://www.mesonet.org/index.php/dataMdfMts/dataController/getFile/%s%s/mts/TEXT/' #Load source web service data from metadata catalog dcommons = datacommons.toolkit(username,password) host = get_host(dcommons) urlbase= host['base_directory'] start=dateutil.parser.parse(query['startDT']) end =dateutil.parser.parse(query['endDT']) site = query['site_no'].lower() day_count = (end - start).days + 1 rpts=[] for single_date in (start + timedelta(n) for n in range(day_count)): rpts.append(url_tmpl % (single_date.strftime('%Y%m%d'),site)) #rpts=query['special'] newpath= '%s/%s' % (path,query['sites']) call(['mkdir','-p',newpath]) urls=[] #.split('/')[-4] for url in rpts: name=url.split('/')[-4] result_path = "%s/%s.txt" % (newpath,name) call(['wget','-O',result_path,url]) urls.append(result_path.replace(urlbase , host['url'])) return urls