Example #1
0
def sites_occ_update(database=site_database, collection=config.occ_site_collection):  #,delete=True):
    '''**************************
        Task to update OCC 
        Sites
    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()
    
    #get rtree spatial index and data object
    idx, data = gis_tools.ok_watershed_aquifer_rtree()
    for rec in db[database][collection].find():
	    #set watershed and aquifer
	    try:
		x, y = gis_tools.transform_point(rec['Lat'], rec['Long'])
		hits = list(idx.intersection((x, y, x, y)))  #, objects=True)) #[0]  #[0].object
		aPoint = Point(x, y)
                rec['watersheds']=[]
                rec['aquifers']=[]
                rec['huc_8']=""
                rec['huc_4']=""
		row_data = set_geo(rec, aPoint, hits, data)
		#Save site data
                db[database][collection].save(row_data)
	    except Exception as err:
                pass
    return {'source': 'occ', 'database': database,'collection': collection,
		'record_count': db[database][collection].count()}
Example #2
0
def sites_occ_update(database=site_database,
                     collection=config.occ_site_collection):  #,delete=True):
    '''**************************
        Task to update OCC 
        Sites
    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()

    #get rtree spatial index and data object
    idx, data = gis_tools.ok_watershed_aquifer_rtree()
    for rec in db[database][collection].find():
        #set watershed and aquifer
        try:
            x, y = gis_tools.transform_point(rec['Lat'], rec['Long'])
            hits = list(idx.intersection(
                (x, y, x, y)))  #, objects=True)) #[0]  #[0].object
            aPoint = Point(x, y)
            rec['watersheds'] = []
            rec['aquifers'] = []
            rec['huc_8'] = ""
            rec['huc_4'] = ""
            row_data = set_geo(rec, aPoint, hits, data)
            #Save site data
            db[database][collection].save(row_data)
        except Exception as err:
            pass
    return {
        'source': 'occ',
        'database': database,
        'collection': collection,
        'record_count': db[database][collection].count()
    }
Example #3
0
def owrb_set_geo_latest(database=config.owrb_database, collection=config.owrb_site_collection,
                        collection_data=config.owrb_well_collection):
    db = Connection(config.mongo_host)

    #get rtree spatial index and data object
    idx, data = gis_tools.ok_watershed_aquifer_rtree()

    total_ct = 0
    update_ct = 0

    for site in db[database][collection].find():
        site['watersheds'] = []
        site['aquifers'] = []
        site["huc_4"] = ''
        site["huc_8"] = ''
        site["aquifer"] = ''
        site['last_activity'] = ''
        total_ct += 1

        #set watershed and aquifer
        try:
            try:
                dates = db[database][collection_data].find({'site': str(site['WELL_ID'])}).distinct('observed_date')
                days =set([])
                for day in dates:
                    date_object = datetime.strptime(day,'%m/%d/%Y %I:%M %p')
                    days.add(date_object.strftime("%Y-%m-%d"))
                ldays = list(days).sort()
                site['last_activity'] = ldays[-1]
            except:
                pass

            x, y = gis_tools.transform_point(site['LATITUDE'], site['LONGITUDE'])
            hits = list(idx.intersection((x, y, x, y)))
            aPoint = Point(x, y)
            row_data = set_geo(site, aPoint, hits, data)
            update_ct += 1
            db[database][collection].save(row_data)
        except:
            db[database][collection].save(site)
    return {'total_count': total_ct, 'updated_count': update_ct}
Example #4
0
def sites_usgs_wq(database=site_database, collection='usgs_wq_site', delete=True):
    '''

        Task to update USEPA and USGS 
        Water Quality sites

    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()
    collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S") )

    #get rtree spatial index and data object
    idx, geo_data = gis_tools.ok_watershed_aquifer_rtree()
    #if delete:
    #    db[database][collection].remove()
    url_site = config.wqp_site
    sites = urllib2.urlopen(url_site)
    output = StringIO.StringIO(sites.read())
    head = output.readline()
    head = head.replace('/', '-').strip('\r\n').split(',')
    reader = csv.DictReader(output, head)

    # set up metadata dataframe
    if os.path.isfile("%s/temp.zip" % config.wqp_tmp):
        os.remove("%s/temp.zip" % config.wqp_tmp)
    if os.path.isfile("%s/Result.csv" % config.wqp_tmp):
        os.remove("%s/Result.csv" % config.wqp_tmp)
    url_results = config.wqp_result_ok_all
    location = gis_tools.save_download(url_results, "%s/temp.zip" % config.wqp_tmp, compress='zip')
    df = pd.read_csv("%s/Result.csv" % (location), error_bad_lines=False)

    metadata_types = set([])
    metadata_analytes = set([])
    for rec in reader:
        rec['watersheds'] = []
        rec['aquifers'] = []
        rec["MonitoringLocationDescriptionText"] = filter(lambda x: x in string.printable,
                                                                  rec["MonitoringLocationDescriptionText"])
        rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\\u00bf", "")
        rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf", "")
        rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\\u00bf***", "")
        rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf***", "")
        rec["MonitoringLocationDescriptionText"] = rec["MonitoringLocationDescriptionText"].replace("\u00bf", "")
        metadata_types.add(rec['MonitoringLocationTypeName'].strip(' \t\n\r'))
        try:
            dfloc = df[df.MonitoringLocationIdentifier == rec['MonitoringLocationIdentifier']]
            #if len(df.index) != 0:
            #    rec['last_activity'] = df['ActivityStartDate'].max()
            #url = config.wqp_result % (rec['MonitoringLocationIdentifier'])
            #page = urllib2.urlopen(url)
            #df = pd.read_csv(page)
            if len(dfloc.index) != 0:
                rec['last_activity'] = dfloc['ActivityStartDate'].max()
                data = []
                grouped = dfloc.groupby('CharacteristicName')
                for idxs, row in grouped.agg([np.min, np.max]).iterrows():
                    metadata_analytes.add(idxs)
                    try:
                        pcode = "%05d" % row['USGSPCode']['amin']
                    except:
                        pcode = ''
                    data.append({'name': idxs, 'begin_date': row['ActivityStartDate']['amin'],
                                 'end_date': row['ActivityStartDate']['amax'],
                                 'parm_cd': pcode,
                                 'units': row['ResultMeasure/MeasureUnitCode']['amin']})
                rec['parameter'] = data
            else:
                #if no data skip site
                continue
        except:
            #if no data skip, may need to look at this is future
            continue
        try:
            x, y = gis_tools.transform_point(rec['LatitudeMeasure'], rec['LongitudeMeasure'])
            hits = list(idx.intersection((x, y, x, y)))
            aPoint = Point(x, y)
            rec = set_geo(rec, aPoint, hits, geo_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            continue
        #insert when geo and parameters set
        db[database][collection_backup].insert(rec)
    rec = db[database]['catalog'].find_one({'metadata_source': 'wqp'})
    if rec:
        rec['types'] = metadata_types
        rec['analytes'] = metadata_analytes
    else:
        rec = {'metadata_source': 'wqp', 'types': list(metadata_types), 'parameters': list(metadata_analytes)}
    db[database]['catalog'].save(rec)
    return json.dumps({'source': 'usgs_wq', 'url': url_site, 'database': database, 'collection': collection_backup},
                      indent=2)
Example #5
0
def sites_usgs_update(database=site_database, collection=config.usgs_site_collection,
                      ws_url=config.usgs_site_url):  #,delete=True):
    '''**************************
        Task to update USGS 
        Sites
    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()
    collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S") )
    #try:
    #    db[database][collection].rename(collection_backup)
    #except:
    #    raise #pass
    url = ws_url
    f1_i = urllib2.urlopen(url + 'inactive')
    f2_a = urllib2.urlopen(url + 'active')
    f1_in = StringIO.StringIO(f1_i.read())
    f2_act = StringIO.StringIO(f2_a.read())
    temp = '#'
    head = ''
    while (temp[0] == "#"):
        temp = f2_act.readline()
        f1_in.readline()
        if temp[0] != '#':
            head = temp.strip('\r\n').split('\t')
    head.append('status')
    f2_act.readline()
    f1_in.readline()

    #get rtree spatial index and data object
    idx, data = gis_tools.ok_watershed_aquifer_rtree()

    #USGS Active sites
    for row in f2_act:
        temp = row.strip('\r\n').split('\t')
        temp.append('Active')
        rec = dict(zip(head, temp))
        rec['watersheds'] = []
        rec['aquifers'] = []
        try:
            row_data = rec
            #set webservices
            try:
                row_data['webservice'], row_data['parameter'], row_data['last_activity'] = get_webservice(
                    row_data['site_no'], db)
            except:
                row_data['webservice'] = []
                row_data['parameter'] = []
                row_data['last_date'] = ''

            #Check if data is available
            if len(row_data['parameter']) == 0:
                continue

            #set watershed and aquifer
            try:
                x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va'])
                hits = list(idx.intersection((x, y, x, y)))  #, objects=True)) #[0]  #[0].object
                aPoint = Point(x, y)
                row_data = set_geo(rec, aPoint, hits, data)
            except:
                pass

            #Save site data
            db[database][collection_backup].insert(row_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            pass
            #db[database][collection_backup].insert(rec)

    for row in f1_in:
        temp = row.strip('\r\n').split('\t')
        temp.append('Inactive')
        rec = dict(zip(head, temp))
        rec['watersheds'] = []
        rec['aquifers'] = []
        try:
            row_data = rec
            #set webservices
            try:
                row_data['webservice'], row_data['parameter'], row_data['last_activity'] = get_webservice(
                    row_data['site_no'], db)
            except:
                row_data['webservice'] = []
                row_data['parameter'] = []
                row_data['last_date'] = ''
            #Check if data is available
            if len(row_data['parameter']) == 0:
                continue
            x, y = gis_tools.transform_point(rec['dec_lat_va'], rec['dec_long_va'])
            hits = list(idx.intersection((x, y, x, y)))  #, objects=True)) #[0]  #[0].object
            aPoint = Point(x, y)
            row_data = set_geo(rec, aPoint, hits, data)

            #save site data
            db[database][collection_backup].insert(row_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            pass
            #db[database][collection_backup].insert(rec)

    return {'source': 'usgs', 'url': [url + 'inactive', url + 'active'], 'database': database,
            'collection': collection_backup, 'record_count': db[database][collection_backup].count()}
Example #6
0
def sites_usgs_wq(database=site_database,
                  collection='usgs_wq_site',
                  delete=True):
    '''

        Task to update USEPA and USGS 
        Water Quality sites

    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()
    collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S"))

    #get rtree spatial index and data object
    idx, geo_data = gis_tools.ok_watershed_aquifer_rtree()
    #if delete:
    #    db[database][collection].remove()
    url_site = config.wqp_site
    sites = urllib2.urlopen(url_site)
    output = StringIO.StringIO(sites.read())
    head = output.readline()
    head = head.replace('/', '-').strip('\r\n').split(',')
    reader = csv.DictReader(output, head)

    # set up metadata dataframe
    if os.path.isfile("%s/temp.zip" % config.wqp_tmp):
        os.remove("%s/temp.zip" % config.wqp_tmp)
    if os.path.isfile("%s/Result.csv" % config.wqp_tmp):
        os.remove("%s/Result.csv" % config.wqp_tmp)
    url_results = config.wqp_result_ok_all
    location = gis_tools.save_download(url_results,
                                       "%s/temp.zip" % config.wqp_tmp,
                                       compress='zip')
    df = pd.read_csv("%s/Result.csv" % (location), error_bad_lines=False)

    metadata_types = set([])
    metadata_analytes = set([])
    for rec in reader:
        rec['watersheds'] = []
        rec['aquifers'] = []
        rec["MonitoringLocationDescriptionText"] = filter(
            lambda x: x in string.printable,
            rec["MonitoringLocationDescriptionText"])
        rec["MonitoringLocationDescriptionText"] = rec[
            "MonitoringLocationDescriptionText"].replace("\\u00bf", "")
        rec["MonitoringLocationDescriptionText"] = rec[
            "MonitoringLocationDescriptionText"].replace("\u00bf", "")
        rec["MonitoringLocationDescriptionText"] = rec[
            "MonitoringLocationDescriptionText"].replace("\\u00bf***", "")
        rec["MonitoringLocationDescriptionText"] = rec[
            "MonitoringLocationDescriptionText"].replace("\u00bf***", "")
        rec["MonitoringLocationDescriptionText"] = rec[
            "MonitoringLocationDescriptionText"].replace("\u00bf", "")
        metadata_types.add(rec['MonitoringLocationTypeName'].strip(' \t\n\r'))
        try:
            dfloc = df[df.MonitoringLocationIdentifier ==
                       rec['MonitoringLocationIdentifier']]
            #if len(df.index) != 0:
            #    rec['last_activity'] = df['ActivityStartDate'].max()
            #url = config.wqp_result % (rec['MonitoringLocationIdentifier'])
            #page = urllib2.urlopen(url)
            #df = pd.read_csv(page)
            if len(dfloc.index) != 0:
                rec['last_activity'] = dfloc['ActivityStartDate'].max()
                data = []
                grouped = dfloc.groupby('CharacteristicName')
                for idxs, row in grouped.agg([np.min, np.max]).iterrows():
                    metadata_analytes.add(idxs)
                    try:
                        pcode = "%05d" % row['USGSPCode']['amin']
                    except:
                        pcode = ''
                    data.append({
                        'name':
                        idxs,
                        'begin_date':
                        row['ActivityStartDate']['amin'],
                        'end_date':
                        row['ActivityStartDate']['amax'],
                        'parm_cd':
                        pcode,
                        'units':
                        row['ResultMeasure/MeasureUnitCode']['amin']
                    })
                rec['parameter'] = data
            else:
                #if no data skip site
                continue
        except:
            #if no data skip, may need to look at this is future
            continue
        try:
            x, y = gis_tools.transform_point(rec['LatitudeMeasure'],
                                             rec['LongitudeMeasure'])
            hits = list(idx.intersection((x, y, x, y)))
            aPoint = Point(x, y)
            rec = set_geo(rec, aPoint, hits, geo_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            continue
        #insert when geo and parameters set
        db[database][collection_backup].insert(rec)
    rec = db[database]['catalog'].find_one({'metadata_source': 'wqp'})
    if rec:
        rec['types'] = metadata_types
        rec['analytes'] = metadata_analytes
    else:
        rec = {
            'metadata_source': 'wqp',
            'types': list(metadata_types),
            'parameters': list(metadata_analytes)
        }
    db[database]['catalog'].save(rec)
    return json.dumps(
        {
            'source': 'usgs_wq',
            'url': url_site,
            'database': database,
            'collection': collection_backup
        },
        indent=2)
Example #7
0
def sites_usgs_update(database=site_database,
                      collection=config.usgs_site_collection,
                      ws_url=config.usgs_site_url):  #,delete=True):
    '''**************************
        Task to update USGS 
        Sites
    '''
    db = Connection(mongoHost)
    #backup collection
    now = datetime.now()
    collection_backup = "%s_%s" % (collection, now.strftime("%Y_%m_%d_%H%M%S"))
    #try:
    #    db[database][collection].rename(collection_backup)
    #except:
    #    raise #pass
    url = ws_url
    f1_i = urllib2.urlopen(url + 'inactive')
    f2_a = urllib2.urlopen(url + 'active')
    f1_in = StringIO.StringIO(f1_i.read())
    f2_act = StringIO.StringIO(f2_a.read())
    temp = '#'
    head = ''
    while (temp[0] == "#"):
        temp = f2_act.readline()
        f1_in.readline()
        if temp[0] != '#':
            head = temp.strip('\r\n').split('\t')
    head.append('status')
    f2_act.readline()
    f1_in.readline()

    #get rtree spatial index and data object
    idx, data = gis_tools.ok_watershed_aquifer_rtree()

    #USGS Active sites
    for row in f2_act:
        temp = row.strip('\r\n').split('\t')
        temp.append('Active')
        rec = dict(zip(head, temp))
        rec['watersheds'] = []
        rec['aquifers'] = []
        try:
            row_data = rec
            #set webservices
            try:
                row_data['webservice'], row_data['parameter'], row_data[
                    'last_activity'] = get_webservice(row_data['site_no'], db)
            except:
                row_data['webservice'] = []
                row_data['parameter'] = []
                row_data['last_date'] = ''

            #Check if data is available
            if len(row_data['parameter']) == 0:
                continue

            #set watershed and aquifer
            try:
                x, y = gis_tools.transform_point(rec['dec_lat_va'],
                                                 rec['dec_long_va'])
                hits = list(idx.intersection(
                    (x, y, x, y)))  #, objects=True)) #[0]  #[0].object
                aPoint = Point(x, y)
                row_data = set_geo(rec, aPoint, hits, data)
            except:
                pass

            #Save site data
            db[database][collection_backup].insert(row_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            pass
            #db[database][collection_backup].insert(rec)

    for row in f1_in:
        temp = row.strip('\r\n').split('\t')
        temp.append('Inactive')
        rec = dict(zip(head, temp))
        rec['watersheds'] = []
        rec['aquifers'] = []
        try:
            row_data = rec
            #set webservices
            try:
                row_data['webservice'], row_data['parameter'], row_data[
                    'last_activity'] = get_webservice(row_data['site_no'], db)
            except:
                row_data['webservice'] = []
                row_data['parameter'] = []
                row_data['last_date'] = ''
            #Check if data is available
            if len(row_data['parameter']) == 0:
                continue
            x, y = gis_tools.transform_point(rec['dec_lat_va'],
                                             rec['dec_long_va'])
            hits = list(idx.intersection(
                (x, y, x, y)))  #, objects=True)) #[0]  #[0].object
            aPoint = Point(x, y)
            row_data = set_geo(rec, aPoint, hits, data)

            #save site data
            db[database][collection_backup].insert(row_data)
        except:
            #Legacy code inserted without lat lon and covered errors in code
            #decided to just pass and eliminate error catching. May change back in the future
            pass
            #db[database][collection_backup].insert(rec)

    return {
        'source': 'usgs',
        'url': [url + 'inactive', url + 'active'],
        'database': database,
        'collection': collection_backup,
        'record_count': db[database][collection_backup].count()
    }