def sites_occ_update(database=site_database, collection=config.occ_site_collection): #,delete=True): '''************************** Task to update OCC Sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() #get rtree spatial index and data object idx, data = gis_tools.ok_watershed_aquifer_rtree() for rec in db[database][collection].find(): #set watershed and aquifer try: x, y = gis_tools.transform_point(rec['Lat'], rec['Long']) hits = list(idx.intersection((x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) rec['watersheds']=[] rec['aquifers']=[] rec['huc_8']="" rec['huc_4']="" row_data = set_geo(rec, aPoint, hits, data) #Save site data db[database][collection].save(row_data) except Exception as err: pass return {'source': 'occ', 'database': database,'collection': collection, 'record_count': db[database][collection].count()}
def sites_occ_update(database=site_database, collection=config.occ_site_collection): #,delete=True): '''************************** Task to update OCC Sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() #get rtree spatial index and data object idx, data = gis_tools.ok_watershed_aquifer_rtree() for rec in db[database][collection].find(): #set watershed and aquifer try: x, y = gis_tools.transform_point(rec['Lat'], rec['Long']) hits = list(idx.intersection( (x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) rec['watersheds'] = [] rec['aquifers'] = [] rec['huc_8'] = "" rec['huc_4'] = "" row_data = set_geo(rec, aPoint, hits, data) #Save site data db[database][collection].save(row_data) except Exception as err: pass return { 'source': 'occ', 'database': database, 'collection': collection, 'record_count': db[database][collection].count() }
def owrb_set_geo_latest(database=config.owrb_database, collection=config.owrb_site_collection, collection_data=config.owrb_well_collection): db = Connection(config.mongo_host) #get rtree spatial index and data object idx, data = gis_tools.ok_watershed_aquifer_rtree() total_ct = 0 update_ct = 0 for site in db[database][collection].find(): site['watersheds'] = [] site['aquifers'] = [] site["huc_4"] = '' site["huc_8"] = '' site["aquifer"] = '' site['last_activity'] = '' total_ct += 1 #set watershed and aquifer try: try: dates = db[database][collection_data].find({'site': str(site['WELL_ID'])}).distinct('observed_date') days =set([]) for day in dates: date_object = datetime.strptime(day,'%m/%d/%Y %I:%M %p') days.add(date_object.strftime("%Y-%m-%d")) ldays = list(days).sort() site['last_activity'] = ldays[-1] except: pass x, y = gis_tools.transform_point(site['LATITUDE'], site['LONGITUDE']) hits = list(idx.intersection((x, y, x, y))) aPoint = Point(x, y) row_data = set_geo(site, aPoint, hits, data) update_ct += 1 db[database][collection].save(row_data) except: db[database][collection].save(site) return {'total_count': total_ct, 'updated_count': update_ct}
def sites_usgs_wq(database=site_database, collection='usgs_wq_site', delete=True): ''' Task to update USEPA and USGS Water Quality sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S") ) #get rtree spatial index and data object idx, geo_data = gis_tools.ok_watershed_aquifer_rtree() #if delete: # db[database][collection].remove() url_site = config.wqp_site sites = urllib2.urlopen(url_site) output = StringIO.StringIO(sites.read()) head = output.readline() head = head.replace('/', '-').strip('\r\n').split(',') reader = csv.DictReader(output, head) # set up metadata dataframe if os.path.isfile("%s/temp.zip" % config.wqp_tmp): os.remove("%s/temp.zip" % config.wqp_tmp) if os.path.isfile("%s/Result.csv" % config.wqp_tmp): os.remove("%s/Result.csv" % config.wqp_tmp) url_results = config.wqp_result_ok_all location = gis_tools.save_download(url_results, "%s/temp.zip" % config.wqp_tmp, compress='zip') df = pd.read_csv("%s/Result.csv" % (location), error_bad_lines=False) metadata_types = set([]) metadata_analytes = set([]) for rec in reader: rec['watersheds'] = [] rec['aquifers'] = [] rec["MonitoringLocationDescriptionText"] = filter(lambda x: x in string.printable, rec["MonitoringLocationDescriptionText"]) rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\\u00bf", "") rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf", "") rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\\u00bf***", "") rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf***", "") rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf", "") metadata_types.add(rec['MonitoringLocationTypeName'].strip(' \t\n\r')) try: dfloc = df[df.MonitoringLocationIdentifier == rec['MonitoringLocationIdentifier']] #if len(df.index) != 0: # rec['last_activity'] = df['ActivityStartDate'].max() #url = config.wqp_result % (rec['MonitoringLocationIdentifier']) #page = urllib2.urlopen(url) #df = pd.read_csv(page) if len(dfloc.index) != 0: rec['last_activity'] = dfloc['ActivityStartDate'].max() data = [] grouped = dfloc.groupby('CharacteristicName') for idxs, row in grouped.agg([np.min, np.max]).iterrows(): metadata_analytes.add(idxs) try: pcode = "%05d" % row['USGSPCode']['amin'] except: pcode = '' data.append({'name': idxs, 'begin_date': row['ActivityStartDate']['amin'], 'end_date': row['ActivityStartDate']['amax'], 'parm_cd': pcode, 'units': row['ResultMeasure/MeasureUnitCode']['amin']}) rec['parameter'] = data else: #if no data skip site continue except: #if no data skip, may need to look at this is future continue try: x, y = gis_tools.transform_point(rec['LatitudeMeasure'], rec['LongitudeMeasure']) hits = list(idx.intersection((x, y, x, y))) aPoint = Point(x, y) rec = set_geo(rec, aPoint, hits, geo_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future continue #insert when geo and parameters set db[database][collection_backup].insert(rec) rec = db[database]['catalog'].find_one({'metadata_source': 'wqp'}) if rec: rec['types'] = metadata_types rec['analytes'] = metadata_analytes else: rec = {'metadata_source': 'wqp', 'types': list(metadata_types), 'parameters': list(metadata_analytes)} db[database]['catalog'].save(rec) return json.dumps({'source': 'usgs_wq', 'url': url_site, 'database': database, 'collection': collection_backup}, indent=2)
def sites_usgs_update(database=site_database, collection=config.usgs_site_collection, ws_url=config.usgs_site_url): #,delete=True): '''************************** Task to update USGS Sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S") ) #try: # db[database][collection].rename(collection_backup) #except: # raise #pass url = ws_url f1_i = urllib2.urlopen(url + 'inactive') f2_a = urllib2.urlopen(url + 'active') f1_in = StringIO.StringIO(f1_i.read()) f2_act = StringIO.StringIO(f2_a.read()) temp = '#' head = '' while (temp[0] == "#"): temp = f2_act.readline() f1_in.readline() if temp[0] != '#': head = temp.strip('\r\n').split('\t') head.append('status') f2_act.readline() f1_in.readline() #get rtree spatial index and data object idx, data = gis_tools.ok_watershed_aquifer_rtree() #USGS Active sites for row in f2_act: temp = row.strip('\r\n').split('\t') temp.append('Active') rec = dict(zip(head, temp)) rec['watersheds'] = [] rec['aquifers'] = [] try: row_data = rec #set webservices try: row_data['webservice'], row_data['parameter'], row_data['last_activity'] = get_webservice( row_data['site_no'], db) except: row_data['webservice'] = [] row_data['parameter'] = [] row_data['last_date'] = '' #Check if data is available if len(row_data['parameter']) == 0: continue #set watershed and aquifer try: x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va']) hits = list(idx.intersection((x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) row_data = set_geo(rec, aPoint, hits, data) except: pass #Save site data db[database][collection_backup].insert(row_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future pass #db[database][collection_backup].insert(rec) for row in f1_in: temp = row.strip('\r\n').split('\t') temp.append('Inactive') rec = dict(zip(head, temp)) rec['watersheds'] = [] rec['aquifers'] = [] try: row_data = rec #set webservices try: row_data['webservice'], row_data['parameter'], row_data['last_activity'] = get_webservice( row_data['site_no'], db) except: row_data['webservice'] = [] row_data['parameter'] = [] row_data['last_date'] = '' #Check if data is available if len(row_data['parameter']) == 0: continue x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va']) hits = list(idx.intersection((x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) row_data = set_geo(rec, aPoint, hits, data) #save site data db[database][collection_backup].insert(row_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future pass #db[database][collection_backup].insert(rec) return {'source': 'usgs', 'url': [url + 'inactive', url + 'active'], 'database': database, 'collection': collection_backup, 'record_count': db[database][collection_backup].count()}
def sites_usgs_wq(database=site_database, collection='usgs_wq_site', delete=True): ''' Task to update USEPA and USGS Water Quality sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S")) #get rtree spatial index and data object idx, geo_data = gis_tools.ok_watershed_aquifer_rtree() #if delete: # db[database][collection].remove() url_site = config.wqp_site sites = urllib2.urlopen(url_site) output = StringIO.StringIO(sites.read()) head = output.readline() head = head.replace('/', '-').strip('\r\n').split(',') reader = csv.DictReader(output, head) # set up metadata dataframe if os.path.isfile("%s/temp.zip" % config.wqp_tmp): os.remove("%s/temp.zip" % config.wqp_tmp) if os.path.isfile("%s/Result.csv" % config.wqp_tmp): os.remove("%s/Result.csv" % config.wqp_tmp) url_results = config.wqp_result_ok_all location = gis_tools.save_download(url_results, "%s/temp.zip" % config.wqp_tmp, compress='zip') df = pd.read_csv("%s/Result.csv" % (location), error_bad_lines=False) metadata_types = set([]) metadata_analytes = set([]) for rec in reader: rec['watersheds'] = [] rec['aquifers'] = [] rec["MonitoringLocationDescriptionText"] = filter( lambda x: x in string.printable, rec["MonitoringLocationDescriptionText"]) rec["MonitoringLocationDescriptionText"] = rec[ "MonitoringLocationDescriptionText"].replace("\\u00bf", "") rec["MonitoringLocationDescriptionText"] = rec[ "MonitoringLocationDescriptionText"].replace("\u00bf", "") rec["MonitoringLocationDescriptionText"] = rec[ "MonitoringLocationDescriptionText"].replace("\\u00bf***", "") rec["MonitoringLocationDescriptionText"] = rec[ "MonitoringLocationDescriptionText"].replace("\u00bf***", "") rec["MonitoringLocationDescriptionText"] = rec[ "MonitoringLocationDescriptionText"].replace("\u00bf", "") metadata_types.add(rec['MonitoringLocationTypeName'].strip(' \t\n\r')) try: dfloc = df[df.MonitoringLocationIdentifier == rec['MonitoringLocationIdentifier']] #if len(df.index) != 0: # rec['last_activity'] = df['ActivityStartDate'].max() #url = config.wqp_result % (rec['MonitoringLocationIdentifier']) #page = urllib2.urlopen(url) #df = pd.read_csv(page) if len(dfloc.index) != 0: rec['last_activity'] = dfloc['ActivityStartDate'].max() data = [] grouped = dfloc.groupby('CharacteristicName') for idxs, row in grouped.agg([np.min, np.max]).iterrows(): metadata_analytes.add(idxs) try: pcode = "%05d" % row['USGSPCode']['amin'] except: pcode = '' data.append({ 'name': idxs, 'begin_date': row['ActivityStartDate']['amin'], 'end_date': row['ActivityStartDate']['amax'], 'parm_cd': pcode, 'units': row['ResultMeasure/MeasureUnitCode']['amin'] }) rec['parameter'] = data else: #if no data skip site continue except: #if no data skip, may need to look at this is future continue try: x, y = gis_tools.transform_point(rec['LatitudeMeasure'], rec['LongitudeMeasure']) hits = list(idx.intersection((x, y, x, y))) aPoint = Point(x, y) rec = set_geo(rec, aPoint, hits, geo_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future continue #insert when geo and parameters set db[database][collection_backup].insert(rec) rec = db[database]['catalog'].find_one({'metadata_source': 'wqp'}) if rec: rec['types'] = metadata_types rec['analytes'] = metadata_analytes else: rec = { 'metadata_source': 'wqp', 'types': list(metadata_types), 'parameters': list(metadata_analytes) } db[database]['catalog'].save(rec) return json.dumps( { 'source': 'usgs_wq', 'url': url_site, 'database': database, 'collection': collection_backup }, indent=2)
def sites_usgs_update(database=site_database, collection=config.usgs_site_collection, ws_url=config.usgs_site_url): #,delete=True): '''************************** Task to update USGS Sites ''' db = Connection(mongoHost) #backup collection now = datetime.now() collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S")) #try: # db[database][collection].rename(collection_backup) #except: # raise #pass url = ws_url f1_i = urllib2.urlopen(url + 'inactive') f2_a = urllib2.urlopen(url + 'active') f1_in = StringIO.StringIO(f1_i.read()) f2_act = StringIO.StringIO(f2_a.read()) temp = '#' head = '' while (temp[0] == "#"): temp = f2_act.readline() f1_in.readline() if temp[0] != '#': head = temp.strip('\r\n').split('\t') head.append('status') f2_act.readline() f1_in.readline() #get rtree spatial index and data object idx, data = gis_tools.ok_watershed_aquifer_rtree() #USGS Active sites for row in f2_act: temp = row.strip('\r\n').split('\t') temp.append('Active') rec = dict(zip(head, temp)) rec['watersheds'] = [] rec['aquifers'] = [] try: row_data = rec #set webservices try: row_data['webservice'], row_data['parameter'], row_data[ 'last_activity'] = get_webservice(row_data['site_no'], db) except: row_data['webservice'] = [] row_data['parameter'] = [] row_data['last_date'] = '' #Check if data is available if len(row_data['parameter']) == 0: continue #set watershed and aquifer try: x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va']) hits = list(idx.intersection( (x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) row_data = set_geo(rec, aPoint, hits, data) except: pass #Save site data db[database][collection_backup].insert(row_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future pass #db[database][collection_backup].insert(rec) for row in f1_in: temp = row.strip('\r\n').split('\t') temp.append('Inactive') rec = dict(zip(head, temp)) rec['watersheds'] = [] rec['aquifers'] = [] try: row_data = rec #set webservices try: row_data['webservice'], row_data['parameter'], row_data[ 'last_activity'] = get_webservice(row_data['site_no'], db) except: row_data['webservice'] = [] row_data['parameter'] = [] row_data['last_date'] = '' #Check if data is available if len(row_data['parameter']) == 0: continue x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va']) hits = list(idx.intersection( (x, y, x, y))) #, objects=True)) #[0] #[0].object aPoint = Point(x, y) row_data = set_geo(rec, aPoint, hits, data) #save site data db[database][collection_backup].insert(row_data) except: #Legacy code inserted without lat lon and covered errors in code #decided to just pass and eliminate error catching. May change back in the future pass #db[database][collection_backup].insert(rec) return { 'source': 'usgs', 'url': [url + 'inactive', url + 'active'], 'database': database, 'collection': collection_backup, 'record_count': db[database][collection_backup].count() }