def visitProcess(visit, pattern, folder, dryrun): """ PRocess the deletion for each individual visit :param visit: :param pattern: :param folder: :param dryrun: :return: """ log = Logger("DELETE") total = 0 try: if folder is None: files = APIGet('visits/{}/files'.format(visit)) else: files = APIGet('visits/{}/folders/{}/files'.format(visit, folder)) for fileObj in files: name = fileObj['name'] if re.match(pattern, name): total += 1 dryruntext = "" if dryrun == False: APIDelete(fileObj['url'], absolute=True) else: dryruntext = "(dryrun)" log.info("{} DELETED: {}".format(dryruntext, fileObj['url'])) except MissingException, e: log.debug("No files found for visit {}".format(visit))
def fileDeleter(args): log = Logger("APIFileDeleter") if args.dryrun == False: foldertext = "" if args.folder is not None: foldertext = "FROM FOLDER '{}' ".format(args.folder) print("\n\n\nTHIS WILL DELETE ALL VISIT FILES ON THE API {}THAT MATCH THE PATTERN: '{}'").format(foldertext, args.pattern) if not query_yes_no("Are you sure?") or not query_yes_no("\nNo, seriously! You're about to delete data on CM.org. Are you sure!?? (hint: BE SURE!!!)"): print("wise choice") return else: log.info("== DRYRUN DETECTED ==") # Get all the visits we know about visitsraw = APIGet('visits') visitids = [v['id'] for v in visitsraw] visitids.sort() total = 0 for vid in visitids: total += visitProcess(vid, args.pattern, args.folder, args.dryrun) print("Total Found: {}").format(total)
def loadVisitData(watershedName, apiWatershedName, visitData): apiWatersheds = APIGet('watersheds/' + apiWatershedName.lower()) for site in apiWatersheds['sites']: apiSites = APIGet(site['url'], True) for visit in apiSites['visits']: # if len(visitData) > 1: # return apiVisit = APIGet(visit['url'], absolute=True) visitData[apiVisit['id']] = {} visitData[apiVisit['id']]['Watershed'] = watershedName visitData[apiVisit['id']]['Site'] = apiVisit['siteName'] visitData[apiVisit['id']]['VisitID'] = apiVisit['id'] visitData[apiVisit['id']]['Organization'] = apiVisit['organizationName'] visitData[apiVisit['id']]['SampleYear'] = apiVisit['sampleYear'] visitData[apiVisit['id']]['ChannelUnits'] = {} try: apiChannelUnits = APIGet('visits/{0}/measurements/Channel Unit'.format(visit['id'])) for apiCU in apiChannelUnits['values']: cu = apiCU['value'] visitData[apiVisit['id']]['ChannelUnits'][cu['ChannelUnitNumber']] = {} visitData[apiVisit['id']]['ChannelUnits'][cu['ChannelUnitNumber']]['Tier1'] = cu['Tier1'] visitData[apiVisit['id']]['ChannelUnits'][cu['ChannelUnitNumber']]['Tier2'] = cu['Tier2'] except MissingException, e: print("Skipping {0} visit {1} because no channel unit data").format(watershedName, visit['id']) try: # Get the average site width and total reach length from the visit topo metrics apiTopoMetrics = APIGet('visits/{0}/metricschemas/QA - Topo Visit Metrics/metrics'.format(visit['id'])) visitData[apiVisit['id']]['AverageSiteWidth'] = getTopoMetricValue(apiTopoMetrics, 'WetWdth_Avg') visitData[apiVisit['id']]['TotalReachLength'] = getTopoMetricValue(apiTopoMetrics, 'Lgth_Wet') except MissingException, e: visitData[apiVisit['id']]['AverageSiteWidth'] = None visitData[apiVisit['id']]['TotalReachLength'] = None
def getAllVisits(siteID): log = Logger('Visits') log.info("Getting all visits for site: {}".format(siteID)) mangledSiteID = re.sub('[\s_-]', '', siteID) siteData = APIGet('sites/{}'.format(mangledSiteID)) if 'visits' not in siteData or len(siteData['visits']) == 0: raise MissingException("No visits found for site `{}`.".format(siteID)) return [visit for visit in siteData['visits'] if visit['sampleDate'] is not None]
def downloadAPIData(visitID): apiData = {} for name, URL in apiCalls.items(): try: apiData[name] = APIGet('visits/{0}/{1}'.format(visitID, URL)) except MissingException as e: pass # if not (name == 'LargeWoodyDebris' or name == 'LargeWoodyPiece' or name== 'WoodyDebrisJam'): # raise MissingException("Missing API Data {}".format(URL)) # if 'LargeWoodyDebris' not in apiData and 'LargeWoodyPiece' not in apiData: # raise MissingException('Missing large wood API data') return apiData
def loadChannelUnitsFromAPI(vid): apiUnits = APIGet('visits/{}/measurements/Channel Unit'.format(vid)) dUnits = {} for nodUnit in apiUnits['values']: value = nodUnit['value'] nCUNumber = int(value['ChannelUnitNumber']) tier1 = value['Tier1'] tier2 = value['Tier2'] segment = value['ChannelSegmentID'] dUnits[nCUNumber] = (tier1, tier2, segment) log = Logger("Channel Units") log.info("{0} channel units loaded from XML file".format(len(dUnits))) return dUnits
def topomover(jsonfile): log = Logger("TopoMover") visitsraw = APIGet('visits') visitsreorg = {v['id']: v for v in visitsraw} visitids = [v['id'] for v in visitsraw] visitids.sort() # Load the inventory inventory = {} if os.path.isfile(jsonfile): try: with open(jsonfile, "r") as f: inventory = json.load(f) except Exception as e: pass counter = 0 for vid in visitids: strvid = str(vid) APIVisit = visitsreorg[vid] if strvid not in inventory: inventory[strvid] = {} # Decide if there's anything to do: if APIDATEFIELD not in inventory[strvid] \ or APIstrtodate(APIVisit[APIDATEFIELD]) > APIstrtodate(inventory[strvid][APIDATEFIELD]): processZipFile(inventory[strvid], APIVisit) else: log.info("Nothing to do") counter += 1 log.info("STATUS: {:d}% {:d}/{:d}".format( (100 * counter / len(visitids)), counter, len(visitids))) with open(jsonfile, "w+") as f: json.dump(inventory, f, indent=4, sort_keys=True)
def download_file(file_dict, folder): log = Logger('Download') if not file_dict['name']: log.warning('Missing file name in folder {}'.format(folder)) return if not file_dict['downloadUrl'] or file_dict['downloadUrl'].lower( ) == '?download': log.warning('Missing download URL in folder {}'.format(folder)) return file_path = os.path.join(folder, file_dict['name']) if not os.path.isdir(folder): os.makedirs(folder) # Write file info as JSON with open(os.path.splitext(file_path)[0] + '.json', 'w') as json_file: json.dump(file_dict, json_file) # Skip files that exist unless they are zero bytes in which case remove them if os.path.isfile(file_path): if os.stat(file_path).st_size == 0: log.warning('Removing zero byte file {}'.format(file_path)) os.remove(file_path) else: return # Download missing file with open(file_path, 'w+b') as f: response = APIGet(file_dict['downloadUrl'], absolute=True) f.write(response.content) log.info('Downloaded missing file {}'.format(file_path))
def main(): # parse command line options parser = argparse.ArgumentParser() parser.add_argument('visitID', help='the visit id of the site to use (no spaces)', type=str) parser.add_argument('outputfolder', help='Output folder', type=str) parser.add_argument( 'substrate_values', nargs='+', help= "one or more percentiles of grain size to calculate. 50 for D50, 84 for D84, etc", type=int) parser.add_argument('--out_channel_roughness_value', help="i.e. 4000.0", type=float, default=4000.0) parser.add_argument('--ocular_estimates', help="(optional) local json file of ocular estimates") parser.add_argument( '--datafolder', help= '(optional) local folder containing TopoMetrics Riverscapes projects', type=str) parser.add_argument('--env', "-e", help="(optional) local env file", type=str) parser.add_argument('--verbose', help='Get more information in your logs.', action='store_true', default=False) args = parser.parse_args() if not all([ args.visitID, args.outputfolder, args.substrate_values, args.out_channel_roughness_value ]): print("ERROR: Missing arguments") parser.print_help() if args.env: setEnvFromFile(args.env) # Make sure the output folder exists resultsFolder = os.path.join(args.outputfolder, "outputs") # Initiate the log file logg = Logger("Program") logfile = os.path.join(resultsFolder, "substrate_raster.log") logg.setup(logPath=logfile, verbose=args.verbose) # Fiona debug-level loggers can cause problems logging.getLogger("Fiona").setLevel(logging.ERROR) logging.getLogger("fiona").setLevel(logging.ERROR) logging.getLogger("fiona.collection").setLevel(logging.ERROR) logging.getLogger("shapely.geos").setLevel(logging.ERROR) logging.getLogger("rasterio").setLevel(logging.ERROR) try: # Make some folders if we need to: if not os.path.isdir(args.outputfolder): os.makedirs(args.outputfolder) if not os.path.isdir(resultsFolder): os.makedirs(resultsFolder) # If we need to go get our own topodata.zip file and unzip it we do this if args.datafolder is None: topoDataFolder = os.path.join(args.outputfolder, "inputs") if not os.path.isdir(topoDataFolder): os.makedirs(topoDataFolder) fileJSON, projectFolder = downloadUnzipTopo( args.visitID, topoDataFolder) # otherwise just pass in a path to existing data else: projectFolder = args.datafolder if args.ocular_estimates is None: dict_ocular = APIGet( "visits/{}/measurements/Substrate%20Cover".format( str(args.visitID))) dict_units = APIGet("visits/{}/measurements/Channel%20Unit".format( str(args.visitID))) dict_unitkey = { x['value']['ChannelUnitID']: x['value']['ChannelUnitNumber'] for x in dict_units['values'] } for i in range(len(dict_ocular['values'])): dict_ocular['values'][i]['value'][ 'ChannelUnitNumber'] = dict_unitkey[ dict_ocular['values'][i]['value']['ChannelUnitID']] else: dict_ocular = json.load(open(args.ocular_estimates, 'rt')) dict_units = APIGet("visits/{}/measurements/Channel%20Unit".format( str(args.visitID))) dict_unitkey = { x['value']['ChannelUnitID']: x['value']['ChannelUnitNumber'] for x in dict_units['values'] } for i in range(len(dict_ocular['values'])): dict_ocular['values'][i]['value'][ 'ChannelUnitNumber'] = dict_unitkey[ dict_ocular['values'][i]['value']['ChannelUnitID']] generate_substrate_raster(projectFolder, resultsFolder, args.substrate_values, dict_ocular, args.out_channel_roughness_value) except (DataException, MissingException, NetworkException) as e: # Exception class prints the relevant information traceback.print_exc(file=sys.stdout) sys.exit(e.returncode) except AssertionError as e: logg.error(e.message) traceback.print_exc(file=sys.stdout) sys.exit(1) except Exception as e: logg.error(e.message) traceback.print_exc(file=sys.stdout) sys.exit(1) sys.exit(0)
def runAuxMetrics(xmlfile, outputDirectory, visit_id): log = Logger("Validation") # Make a big object we can pass around try: visit = APIGet("visits/{}".format(visit_id)) except MissingException as e: raise MissingException("Visit Not Found in API") protocol = visit["protocol"] iteration = str(visit["iterationID"] + 2010) # {key: urlslug} dict measurekeys = { "snorkelFish": "Snorkel Fish", "snorkelFishBinned": "Snorkel Fish Count Binned", "snorkelFishSteelheadBinned": "Snorkel Fish Count Steelhead Binned", "channelUnits": "Channel Unit", "largeWoodyPieces": "Large Woody Piece", "largeWoodyDebris": "Large Woody Debris", "woodyDebrisJams": "Woody Debris Jam", "jamHasChannelUnits": "Jam Has Channel Unit", "riparianStructures": "Riparian Structure", "pebbles": "Pebble", "pebbleCrossSections": "Pebble Cross-Section", "channelConstraints": "Channel Constraints", "channelConstraintMeasurements": "Channel Constraint Measurements", "bankfullWidths": "Bankfull Width", "driftInverts": "Drift Invertebrate Sample", "driftInvertResults": "Drift Invertebrate Sample Results", "sampleBiomasses": "Sample Biomasses", "undercutBanks": "Undercut Banks", "solarInputMeasurements": "Daily Solar Access Meas", "discharge": "Discharge", "waterChemistry": "Water Chemistry", "poolTailFines": "Pool Tail Fines", } visitobj = { "visit_id": visit_id, "visit": APIGet("visits/{}".format(visit_id)), "iteration": iteration, "protocol": protocol, } log.info("Visit " + str(visit_id) + " - " + protocol + ": " + iteration) # Populate our measurements from the API for key, url in measurekeys.items(): try: visitobj[key] = APIGet("visits/{0}/measurements/{1}".format( visit_id, url)) except MissingException as e: visitobj[key] = None log.info("Writing Metrics for Visit {0} XML File".format(visit_id)) # do metric calcs visitMetrics = calculateMetricsForVisit(visitobj) channelUnitMetrics = calculateMetricsForChannelUnitSummary(visitobj) tier1Metrics = calculateMetricsForTier1Summary(visitobj) structureMetrics = calculateMetricsForStructureSummary(visitobj) # write these files # dMetricsArg, visitID, sourceDir, xmlFilePath, modelEngineRootNode, modelVersion writeMetricsToXML( { "VisitMetrics": visitMetrics, "ChannelUnitMetrics": channelUnitMetrics, "Tier1Metrics": tier1Metrics, "StructureMetrics": structureMetrics }, visit_id, "", xmlfile, "AuxMetrics", __version__)
def metric_downloader(workbench, outputfolder): log = Logger("Measurement Downloader") conn = sqlite3.connect(workbench) curs = conn.cursor() visits = {} for row in curs.execute( 'SELECT WatershedName, SiteName, VisitYear, V.VisitID' ' FROM CHaMP_Visits V' ' INNER JOIN CHaMP_Sites S ON V.SiteID = S.SiteID' ' INNER JOIN CHaMP_Watersheds W ON S.WatershedID = W.WatershedID' ' WHERE V.ProgramID IN (1, 5, 6)' ' AND W.WatershedID IN (15, 32)' # NOT IN ("Asotin", "Big-Navarro-Garcia (CA)", "CHaMP Training")' ' ORDER BY WatershedName, visitYear'): if not row[0] in visits: visits[row[0]] = [] visits[row[0]].append({ 'VisitID': row[3], 'Year': row[2], 'Site': row[1] }) watersheds = list(visits.keys()) watersheds.sort() curs.close() for watershed in watersheds: visitCount = len(visits[watershed]) p = ProgressBar(end=len(visits[watershed]), width=20, fill='=', blank='.', format='[%(fill)s>%(blank)s] %(progress)s%%') for visit in visits[watershed]: p + 1 print(p) visit_path = os.path.join(outputfolder, str(visit['Year']), watershed.replace(' ', ''), visit['Site'].replace(' ', ''), 'VISIT_{}'.format(visit['VisitID'])) measurements = APIGet("visits/{0}/measurements".format( visit['VisitID'])) for meas in measurements: if not os.path.isdir(visit_path): os.makedirs(visit_path) meas_path = os.path.join( visit_path, '{}.json'.format(meas['name'].replace(' ', ''))) data = APIGet(meas['url'], True) json_string = json.dumps(data['values']) with open(meas_path, 'w') as outfile: json.dump(data, outfile) print('Process completed')
def champ_topo_checker(workbench, folder): log = Logger('CHaMP Files') log.setup(logPath=os.path.join( folder, datetime.now().strftime("%Y%m%d-%H%M%S") + '_champ_files.log')) dbCon = sqlite3.connect(workbench) dbCurs = dbCon.cursor() dbCurs.execute( 'SELECT WatershedName, VisitYear, SiteName, VisitID' + ' FROM vwVisits WHERE ProgramID = 1 AND ProtocolID IN (2030, 416, 806, 1966, 2020, 1955, 1880, 10036, 9999)' + ' ORDER BY VisitYear, WatershedName') for row in dbCurs.fetchall(): watershed = row[0] visit_year = row[1] site = row[2] visitID = row[3] visit_path = os.path.join(folder, str(visit_year), watershed.replace(' ', ''), site.replace(' ', ''), 'VISIT_{}'.format(visitID)) log.info('Processing {}'.format(visit_path)) if not os.path.isdir(visit_path): os.makedirs(visit_path) try: visit_data = APIGet('visits/{}'.format(visitID)) # Write visit information to json file with open(os.path.join(visit_path, 'visit_info.json'), 'w') as json_file: json.dump(visit_data, json_file) # Loop over the two lists of folders per visit: field folders and visit folders for api_key, local_folder in { 'fieldFolders': 'Field Folders', 'folders': 'Visit Folders' }.items(): if api_key in visit_data and isinstance( visit_data[api_key], list): for folder_name in visit_data[api_key]: field_folder_path = os.path.join( visit_path, local_folder, folder_name['name']) field_folder_data = APIGet(folder_name['url'], True) if isinstance(field_folder_data, dict) and 'files' in field_folder_data: [ download_file(file_dict, field_folder_path) for file_dict in field_folder_data['files'] ] # Get all the miscellaneous files for the visit [ download_file(file_dict, os.path.join(visit_path, 'Files')) for file_dict in visit_data['files'] ] except Exception as e: log.error('Error for visit {}: {}'.format(visitID, e)) log.info('Process Complete')
def champ_topo_checker(workbench, folder): log = Logger('Topo Checker') log.setup(logPath=os.path.join( folder, datetime.now().strftime("%Y%m%d-%H%M%S") + '_topo_checker.log')) dbCon = sqlite3.connect(workbench) dbCurs = dbCon.cursor() dbCurs.execute( 'SELECT WatershedName, VisitYear, SiteName, VisitID' + ' FROM vwVisits WHERE ProgramID = 1 AND ProtocolID IN (2030, 416, 806, 1966, 2020, 1955, 1880, 10036, 9999)' ) file_exists = 0 file_zero = 0 file_download = [] file_errors = [] for row in dbCurs.fetchall(): watershed = row[0] visit_year = row[1] site = row[2] visitID = row[3] topo_path = os.path.join(folder, str(visit_year), watershed.replace(' ', ''), site, 'VISIT_{}'.format(visitID), 'Field Folders', 'Topo', 'TopoData.zip') download_needed = False if os.path.isfile(topo_path): file_exists += 1 if os.stat(topo_path).st_size == 0: file_zero += 0 download_needed = True else: download_needed = True if not download_needed: continue file_download.append(topo_path) try: topoFieldFolders = APIGet( 'visits/{}/fieldFolders/Topo'.format(visitID)) file = next(file for file in topoFieldFolders['files'] if file['componentTypeID'] == 181) downloadUrl = file['downloadUrl'] except Exception as e: log.warning('No topo data for visit information {}: {}'.format( visitID, topo_path)) continue # Download the file to a temporary location if not os.path.isdir(os.path.dirname(topo_path)): os.makedirs(os.path.dirname(topo_path)) with open(topo_path, 'w+b') as f: response = APIGet(downloadUrl, absolute=True) f.write(response.content) log.info(topo_path) log.info('Downloaded {}'.format(topo_path)) log.info('Existing files: {}'.format(file_exists)) log.info('Zero byte files: {}'.format(file_zero)) log.info('Download files: {}'.format(len(file_download)))