def download_and_statsOneFile(product, date, directory, admin_level, mask_level, admin_specified=None, mask_specified=None, save=False) -> bool: """Wrapper for statsOneFile in case downloading is required""" paths = () try: log.info(f"Downloading {product} {date}") downloader = glam.Downloader() paths = downloader.pullFromS3(product, date, directory) log.info(f"Processing {product} {date}") for p in paths: assert statsOneFile(file_path=p, admin_level=admin_level, mask_level=mask_level, admin_specified=admin_specified, mask_specified=mask_specified) downloader = None return True except: log.exception(f"{product} {date} FAILED") return False finally: if not save: for p in paths: os.remove(p)
def clean(): parser = argparse.ArgumentParser( description="Remove redundant chirps-prelim and/or NRT data") parser.add_argument("-c", "--chirps", action='store_true', help="Clean chirps-prelim data specifically") parser.add_argument("-n", "--nrt", action='store_true', help="Clean NRT NDVI data specifically") args = parser.parse_args() doChirps = args.chirps doNrt = args.nrt # if the user doesn't specify one product, assume they want both if (not doChirps) and (not doNrt): doChirps = True doNrt = True downloader = glam.Downloader() if doChirps: with downloader.engine.begin() as connection: latestChirps = connection.execute( f"SELECT MAX(date) FROM product_status WHERE product='chirps' AND completed=1;" ).fetchone()[0] # gets datetime.date object log.info( f"Latest Chirps file: {latestChirps.strftime('%Y-%m-%d')} / {latestChirps.strftime('%Y.%j')}" ) allPrelim = downloader.getAllS3('chirps-prelim') for ct in allPrelim: d_object = datetime.date(datetime.strptime(ct[1], "%Y-%m-%d")) if d_object <= latestChirps: glam.purge(*ct, auth_key='geoglam!23') log.info(f"chirps-prelim {ct[1]} <- purged") else: log.debug(f"chirps-prelim {ct[1]} <- preserved") if doNrt: with downloader.engine.begin() as connection: latestMod09 = connection.execute( f"SELECT MAX(date) FROM product_status WHERE product='MOD09Q1' AND completed=1;" ).fetchone()[0] # gets datetime.date object latestMyd09 = connection.execute( f"SELECT MAX(date) FROM product_status WHERE product='MYD09Q1' AND completed=1;" ).fetchone()[0] # gets datetime.date object latest8Day = max(latestMod09, latestMyd09) log.info( f"Latest 8-day NDVI file: {latest8Day.strftime('%Y-%m-%d')} / {latest8Day.strftime('%Y.%j')}" ) allNrt = downloader.getAllS3('MOD13Q4N') for nt in allNrt: d_object = datetime.date(datetime.strptime(nt[1], "%Y-%m-%d")) if d_object <= latest8Day: glam.purge(*nt, auth_key='geoglam!23') log.info(f"MOD13Q4N {nt[1]} <- purged") else: log.debug(f"MOD13Q4N {nt[1]} <- preserved")
def fillArchive(): parser = argparse.ArgumentParser( description="pull any missing files from S3 to local archive") parser.add_argument( "directory", help="Path to directory where files of given product are stored") parser.add_argument("-l", "--list_missing", action='store_true', help="List missing files and exit without downloading") args = parser.parse_args() downloader = glam.Downloader() missing = downloader.listMissing(args.directory) l = len([t for t in missing]) if args.list_missing: for t in missing: print(t) log.info("Done. Missing files not downloaded.") else: i = 0 for t in missing: i += 1 log.info(f"Pulling {t} | {i} of {l}") downloader.pullFromS3(*t, args.directory) log.info(f"Done. {args.directory} is up-to-date with S3.")
def main(): ## parse arguments parser = argparse.ArgumentParser( description="Generate new statistics, in parallel") parser.add_argument( "file_directory", help="Directory where imagery files are / will be stored") parser.add_argument( "-al", "--admin_level", default="ALL", choices=["ALL", "GAUL", "BRAZIL"], help="Run statistics for only a subset of administrative divisions") parser.add_argument( "-as", "--admin_specified", choices=glam.admins, help="Run statistics for a single administrative division") parser.add_argument("-ml", "--mask_level", default="ALL", choices=["ALL", "BRAZIL", "CROPMONITOR", "NOMASK"], help="Run statistics for only a subset of crop masks") parser.add_argument("-ms", "--mask_specified", choices=glam.crops, help="Run statistics for a single crop mask") parser.add_argument("-c", "--cores", required=True, help="How many cores to use") parser.add_argument("-l", "--logfile", help="Where to write output log messages") parser.add_argument( "-d", "--download_files", action='store_true', help= "Download any files not currently on disk. Pull them from S3 before running stats. Requires product flag." ) parser.add_argument( '-p', "--product", help="Which product to pull. Required if --download_files is set.") parser.add_argument("-s", "--save_results", action='store_true', help="If set, does not delete downloaded files") parser.add_argument( "-mo", "--missing_only", action='store_true', help= "Generate statistics only for files NOT currently on disk in file_directory" ) parser.add_argument("-META", metavar='metastring', default=None, action='store', help="Do not set this flag. For internal use only.") args = parser.parse_args() ## META stores file path or product/date as json if args.META: meta_parts = args.META.split(".") if args.download_files: product = meta_parts[0] date = meta_parts[1] #print(product) #print(date) download_and_statsOneFile(product, date, args.file_directory, args.admin_level, args.mask_level, args.admin_specified, args.mask_specified, args.save_results) else: in_file = args.META #print(in_file) statsOneFile(in_file, args.admin_level, args.mask_level, args.admin_specified, args.mask_specified) sys.exit() ## confirm argument validity # download_files requires product try: if args.download_files: assert args.product except AssertionError: log.error("If download_files is set, a product must be specified.") sys.exit() # no overlap of BRAZIL crops and GAUL admins if args.mask_level == "BRAZIL" and args.admin_level == "GAUL": log.error("Can't generate Brazil crop stats for Gaul regions") sys.exit() # don't set both _level and _specified if args.admin_specified and (args.admin_level != "ALL"): log.error("Do not set both --admin_level and --admin_specified") sys.exit() if args.mask_specified and (args.mask_level != "ALL"): log.error("Do not set both --mask_level and --mask_specified") sys.exit() # warn the user if arguments are being ignored if not args.download_files: try: assert not args.save_results except AssertionError: log.warning( "--download_files not set; --save_results flag ignored") try: assert not args.missing_only except AssertionError: log.warning( "--download_files not set; --missing_only flag ignored") if not args.logfile: args.logfile = getUniqueFilename(TEMP_DIR, "gns_log.txt") lines = [] ## perform stats on existing files extant = getAllTiffs(args.file_directory) if not args.missing_only: for f in extant: line = f"python {os.path.abspath(__file__)} {args.file_directory} -al {args.admin_level} -ml {args.mask_level} -c {args.cores} -META '{f}'" if args.save_results: line += " -s" if args.admin_specified: line += f" -as {args.admin_specified}" if args.mask_specified: line += f" -ms {args.mask_specified}" line += "\n" lines += line ## download and process new files if requested if args.download_files: downloader = glam.Downloader() onDisk = [getProductDateTuple(f) for f in extant] #available = downloader.getAllS3(product=args.product) #missing = [t for t in available if t not in onDisk] missing = downloader.listMissing(args.file_directory) if len(missing) == 0: log.info( f"No missing files; all available files on S3 are also in {args.file_directory}" ) else: for t in missing: product, date = t meta = f"{product}.{date}" line = f"python {os.path.abspath(__file__)} {args.file_directory} -al {args.admin_level} -ml {args.mask_level} -c {args.cores} -d -META {meta}" if args.save_results: line += " -s" if args.admin_specified: line += f" -as {args.admin_specified}" if args.mask_specified: line += f" -ms {args.mask_specified}" line += " \n" lines += line downloader = None command_file = getUniqueFilename(TEMP_DIR, "gns_commands.txt") with open(command_file, 'w') as wf: wf.writelines(lines) shell_call = [ "nohup", "sh", "-c", f'"cat {command_file} | parallel -j {args.cores}"', "&>", args.logfile, "&" ] print(" ".join(shell_call)) subprocess.call(shell_call)
def updateData(): ## parse arguments parser = argparse.ArgumentParser( description="Update GLAM system imagery data") parser.add_argument("-a", "--ancillary", action='store_true', help="Only do ancillary data, not NDVI") parser.add_argument("-n", "--ndvi", action='store_true', help="Only do NDVI data, not ancillary") parser.add_argument('-p', '--product', default=None, required=False, choices=octvi.supported_products + glam.ancillary_products, help="Only update the specified product") parser.add_argument("-ml", "--mask_level", default="ALL", choices=["ALL", "BRAZIL", "CROPMONITOR", "NOMASK"], help="Run statistics for only a subset of crop masks") parser.add_argument( "-al", "--admin_level", default="ALL", choices=["ALL", "GAUL", "BRAZIL"], help="Run statistics for only a subset of administrative regions") parser.add_argument("-i", "--ingest", action='store_true', help="Ingest only, no stats generation") parser.add_argument('-s', "--stats", action='store_true', help="Stats generation only, no ingest") parser.add_argument( '-nab', "--no_anomaly_baseline", action='store_true', help="Do not generate anomaly baseline for downloaded data") parser.add_argument( '-l', '--list_missing', action='store_true', help= "Print list of missing imagery; do not download, ingest, or generate statistics" ) parser.add_argument( "-id", "--input_directory", action="store", help= "Run over a directory of existing files, rather than checking for new data" ) parser.add_argument( '-u', "--universal", action='store_true', help="Run over all files, not just those that are flagged as missing") parser.add_argument( '-od', "--output_directory", default=None, help= "Save downloaded files to a directory on disk rather than deleting them." ) parser.add_argument( '-v', '--verbose', action='count', default=0, help="Display more messages; print traceback on failure") args = parser.parse_args() ## confirm exclusivity try: if args.ancillary: assert not args.ndvi assert not args.product elif args.ndvi: assert not args.ancillary assert not args.product elif args.product: assert not args.ancillary assert not args.ndvi except AssertionError: raise glam.BadInputError( "--ancillary, --product, and --ndvi are mutually exclusive") try: if args.ingest: assert not args.stats elif args.stats: assert not args.ingest except AssertionError: raise glam.BadInputError("--ingest and --stats are mutually exclusive") try: if args.universal: assert not args.list_missing elif args.list_missing: assert not args.universal except AssertionError: raise glam.BadInputError( "--list_missing and --universal are mutually exclusive") if args.output_directory is not None and args.product is None: raise glam.BadInputError( "Use of --output_directory requires that --product be set") ## verbosity stuff def speak(message, cutoff=1): if args.verbose >= cutoff: log.info(message) else: log.debug(message) speak(f"Running with verbosity level {args.verbose}") ## set anomaly baseline script name anomaly_script = os.path.join(os.path.dirname(__file__), "add_file_to_anomaly_baseline.py") ## get toDoList or directory listing # toDoList if not args.input_directory: missing = glam.ToDoList() if not args.universal: missing.filterUnavailable() downloader = glam.Downloader() if args.output_directory is not None: tempDir = args.output_directory else: tempDir = os.path.join(os.path.dirname(__file__), "temp") try: os.mkdir(tempDir) except FileExistsError: pass # directory listing else: dirFiles = glob.glob(os.path.join(args.input_directory, "*.tif")) missing = [] for f in dirFiles: img = glam.getImageType(f)(f) missing.append((img.product, img.date, tuple([img.path]))) try: j = 0 l = len([f for f in missing]) for f in missing: j += 1 product = f[0] if product in octvi.supported_products and args.ancillary: continue if product in glam.ancillary_products and args.ndvi: continue if args.product and product != args.product: continue if args.list_missing: print("{0} {1}".format(*f)) continue log.info("{0} {1}, {2} of {3}".format(*f, j, l)) try: # no directory given; pull from source if not args.input_directory: if product in octvi.supported_products: # CHECKSUM!!!!! Current threshold for NDVI mosaic: 1GB pathSize = 0 tries = 1 sizeThreshold = 1000000000 while pathSize < sizeThreshold: # threshold try: os.remove(paths[0]) except: pass if tries > 3: # don't try more than three times raise glam.UnavailableError( "File size less than 1GB after 3 tries") paths = downloader.pullFromSource(*f, tempDir) try: pathSize = os.path.getsize(paths[0]) if pathSize < sizeThreshold: log.warning( f"File size of {pathSize} bytes below threshold" ) except IndexError: raise glam.UnavailableError("No file detected") tries += 1 # increment tries else: paths = downloader.pullFromSource(*f, tempDir) # check that at least one file was downloaded if len(paths) < 1: raise glam.UnavailableError("No file detected") speak("-downloaded") # directory provided; use paths on disk else: paths = f[2] # iterate over file paths for p in paths: speak(p) image = glam.getImageType(p)(p) if (image.product == 'chirps') and (not args.stats) and ( not args.ingest) and ( args.mask_level == "ALL") and (args.admin_level == "ALL"): speak("-purging corresponding chirps-prelim product") try: glam.purge('chirps-prelim', image.date, os.environ['glam_purge_key']) except KeyError: log.warning( "glam_purge_key not set. Chirps preliminary product not purged." ) image.setStatus('downloaded', True) speak(f"-collection: {image.collection}", 2) if not args.stats: image.ingest() image.setStatus('processed', True) speak("--ingested") if not args.ingest: image.uploadStats(crop_level=args.mask_level, admin_level=args.admin_level) image.setStatus('statGen', True) speak("--stats generated") # generate anomaly baselines if (not args.no_anomaly_baseline) and ( not (image.product in ["mera-2", "chirps-prelim", "MOD13Q4N"])): anomaly_args = [ "python", anomaly_script, p, "-n", "20" ] try: subprocess.call(anomaly_args) speak("--anomaly baseline updated") except: log.exception( "Failed to generate anomaly baseline") if args.output_directory is None: os.remove(p) speak("--file removed") except glam.UnavailableError: log.info("(No file available)") except: if args.verbose > 0: log.exception("(FAILED)") else: log.error("(FAILED)") finally: if not args.input_directory and args.output_directory is None: for f in glob.glob(os.path.join(tempDir, "*")): os.remove(f)