Ejemplo n.º 1
0
def download_and_statsOneFile(product,
                              date,
                              directory,
                              admin_level,
                              mask_level,
                              admin_specified=None,
                              mask_specified=None,
                              save=False) -> bool:
    """Wrapper for statsOneFile in case downloading is required"""
    paths = ()
    try:
        log.info(f"Downloading {product} {date}")
        downloader = glam.Downloader()
        paths = downloader.pullFromS3(product, date, directory)
        log.info(f"Processing {product} {date}")
        for p in paths:
            assert statsOneFile(file_path=p,
                                admin_level=admin_level,
                                mask_level=mask_level,
                                admin_specified=admin_specified,
                                mask_specified=mask_specified)
        downloader = None
        return True
    except:
        log.exception(f"{product} {date} FAILED")
        return False
    finally:
        if not save:
            for p in paths:
                os.remove(p)
def clean():
    parser = argparse.ArgumentParser(
        description="Remove redundant chirps-prelim and/or NRT data")
    parser.add_argument("-c",
                        "--chirps",
                        action='store_true',
                        help="Clean chirps-prelim data specifically")
    parser.add_argument("-n",
                        "--nrt",
                        action='store_true',
                        help="Clean NRT NDVI data specifically")
    args = parser.parse_args()

    doChirps = args.chirps
    doNrt = args.nrt

    # if the user doesn't specify one product, assume they want both
    if (not doChirps) and (not doNrt):
        doChirps = True
        doNrt = True

    downloader = glam.Downloader()

    if doChirps:
        with downloader.engine.begin() as connection:
            latestChirps = connection.execute(
                f"SELECT MAX(date) FROM product_status WHERE product='chirps' AND completed=1;"
            ).fetchone()[0]  # gets datetime.date object
        log.info(
            f"Latest Chirps file: {latestChirps.strftime('%Y-%m-%d')} / {latestChirps.strftime('%Y.%j')}"
        )
        allPrelim = downloader.getAllS3('chirps-prelim')
        for ct in allPrelim:
            d_object = datetime.date(datetime.strptime(ct[1], "%Y-%m-%d"))
            if d_object <= latestChirps:
                glam.purge(*ct, auth_key='geoglam!23')
                log.info(f"chirps-prelim {ct[1]} <- purged")
            else:
                log.debug(f"chirps-prelim {ct[1]} <- preserved")

    if doNrt:
        with downloader.engine.begin() as connection:
            latestMod09 = connection.execute(
                f"SELECT MAX(date) FROM product_status WHERE product='MOD09Q1' AND completed=1;"
            ).fetchone()[0]  # gets datetime.date object
            latestMyd09 = connection.execute(
                f"SELECT MAX(date) FROM product_status WHERE product='MYD09Q1' AND completed=1;"
            ).fetchone()[0]  # gets datetime.date object
        latest8Day = max(latestMod09, latestMyd09)
        log.info(
            f"Latest 8-day NDVI file: {latest8Day.strftime('%Y-%m-%d')} / {latest8Day.strftime('%Y.%j')}"
        )
        allNrt = downloader.getAllS3('MOD13Q4N')
        for nt in allNrt:
            d_object = datetime.date(datetime.strptime(nt[1], "%Y-%m-%d"))
            if d_object <= latest8Day:
                glam.purge(*nt, auth_key='geoglam!23')
                log.info(f"MOD13Q4N {nt[1]} <- purged")
            else:
                log.debug(f"MOD13Q4N {nt[1]} <- preserved")
def fillArchive():
    parser = argparse.ArgumentParser(
        description="pull any missing files from S3 to local archive")
    parser.add_argument(
        "directory",
        help="Path to directory where files of given product are stored")
    parser.add_argument("-l",
                        "--list_missing",
                        action='store_true',
                        help="List missing files and exit without downloading")
    args = parser.parse_args()
    downloader = glam.Downloader()
    missing = downloader.listMissing(args.directory)
    l = len([t for t in missing])
    if args.list_missing:
        for t in missing:
            print(t)
        log.info("Done. Missing files not downloaded.")
    else:
        i = 0
        for t in missing:
            i += 1
            log.info(f"Pulling {t} | {i} of {l}")
            downloader.pullFromS3(*t, args.directory)
        log.info(f"Done. {args.directory} is up-to-date with S3.")
Ejemplo n.º 4
0
def main():
    ## parse arguments
    parser = argparse.ArgumentParser(
        description="Generate new statistics, in parallel")
    parser.add_argument(
        "file_directory",
        help="Directory where imagery files are / will be stored")
    parser.add_argument(
        "-al",
        "--admin_level",
        default="ALL",
        choices=["ALL", "GAUL", "BRAZIL"],
        help="Run statistics for only a subset of administrative divisions")
    parser.add_argument(
        "-as",
        "--admin_specified",
        choices=glam.admins,
        help="Run statistics for a single administrative division")
    parser.add_argument("-ml",
                        "--mask_level",
                        default="ALL",
                        choices=["ALL", "BRAZIL", "CROPMONITOR", "NOMASK"],
                        help="Run statistics for only a subset of crop masks")
    parser.add_argument("-ms",
                        "--mask_specified",
                        choices=glam.crops,
                        help="Run statistics for a single crop mask")
    parser.add_argument("-c",
                        "--cores",
                        required=True,
                        help="How many cores to use")
    parser.add_argument("-l",
                        "--logfile",
                        help="Where to write output log messages")
    parser.add_argument(
        "-d",
        "--download_files",
        action='store_true',
        help=
        "Download any files not currently on disk. Pull them from S3 before running stats. Requires product flag."
    )
    parser.add_argument(
        '-p',
        "--product",
        help="Which product to pull. Required if --download_files is set.")
    parser.add_argument("-s",
                        "--save_results",
                        action='store_true',
                        help="If set, does not delete downloaded files")
    parser.add_argument(
        "-mo",
        "--missing_only",
        action='store_true',
        help=
        "Generate statistics only for files NOT currently on disk in file_directory"
    )
    parser.add_argument("-META",
                        metavar='metastring',
                        default=None,
                        action='store',
                        help="Do not set this flag. For internal use only.")

    args = parser.parse_args()

    ## META stores file path or product/date as json
    if args.META:
        meta_parts = args.META.split(".")
        if args.download_files:
            product = meta_parts[0]
            date = meta_parts[1]
            #print(product)
            #print(date)
            download_and_statsOneFile(product, date, args.file_directory,
                                      args.admin_level, args.mask_level,
                                      args.admin_specified,
                                      args.mask_specified, args.save_results)
        else:
            in_file = args.META
            #print(in_file)
            statsOneFile(in_file, args.admin_level, args.mask_level,
                         args.admin_specified, args.mask_specified)
        sys.exit()

    ## confirm argument validity
    # download_files requires product
    try:
        if args.download_files:
            assert args.product
    except AssertionError:
        log.error("If download_files is set, a product must be specified.")
        sys.exit()
    # no overlap of BRAZIL crops and GAUL admins
    if args.mask_level == "BRAZIL" and args.admin_level == "GAUL":
        log.error("Can't generate Brazil crop stats for Gaul regions")
        sys.exit()
    # don't set both _level and _specified
    if args.admin_specified and (args.admin_level != "ALL"):
        log.error("Do not set both --admin_level and --admin_specified")
        sys.exit()
    if args.mask_specified and (args.mask_level != "ALL"):
        log.error("Do not set both --mask_level and --mask_specified")
        sys.exit()
    # warn the user if arguments are being ignored
    if not args.download_files:
        try:
            assert not args.save_results
        except AssertionError:
            log.warning(
                "--download_files not set; --save_results flag ignored")
        try:
            assert not args.missing_only
        except AssertionError:
            log.warning(
                "--download_files not set; --missing_only flag ignored")
    if not args.logfile:
        args.logfile = getUniqueFilename(TEMP_DIR, "gns_log.txt")

    lines = []
    ## perform stats on existing files
    extant = getAllTiffs(args.file_directory)
    if not args.missing_only:
        for f in extant:
            line = f"python {os.path.abspath(__file__)} {args.file_directory} -al {args.admin_level} -ml {args.mask_level} -c {args.cores} -META '{f}'"
            if args.save_results:
                line += " -s"
            if args.admin_specified:
                line += f" -as {args.admin_specified}"
            if args.mask_specified:
                line += f" -ms {args.mask_specified}"
            line += "\n"
            lines += line

    ## download and process new files if requested
    if args.download_files:
        downloader = glam.Downloader()
        onDisk = [getProductDateTuple(f) for f in extant]
        #available = downloader.getAllS3(product=args.product)
        #missing = [t for t in available if t not in onDisk]
        missing = downloader.listMissing(args.file_directory)
        if len(missing) == 0:
            log.info(
                f"No missing files; all available files on S3 are also in {args.file_directory}"
            )
        else:
            for t in missing:
                product, date = t
                meta = f"{product}.{date}"
                line = f"python {os.path.abspath(__file__)} {args.file_directory} -al {args.admin_level} -ml {args.mask_level} -c {args.cores} -d -META {meta}"
                if args.save_results:
                    line += " -s"
                if args.admin_specified:
                    line += f" -as {args.admin_specified}"
                if args.mask_specified:
                    line += f" -ms {args.mask_specified}"
                line += " \n"
                lines += line

            downloader = None
    command_file = getUniqueFilename(TEMP_DIR, "gns_commands.txt")

    with open(command_file, 'w') as wf:
        wf.writelines(lines)
    shell_call = [
        "nohup", "sh", "-c",
        f'"cat {command_file} | parallel -j {args.cores}"', "&>", args.logfile,
        "&"
    ]
    print(" ".join(shell_call))
    subprocess.call(shell_call)
def updateData():
    ## parse arguments
    parser = argparse.ArgumentParser(
        description="Update GLAM system imagery data")
    parser.add_argument("-a",
                        "--ancillary",
                        action='store_true',
                        help="Only do ancillary data, not NDVI")
    parser.add_argument("-n",
                        "--ndvi",
                        action='store_true',
                        help="Only do NDVI data, not ancillary")
    parser.add_argument('-p',
                        '--product',
                        default=None,
                        required=False,
                        choices=octvi.supported_products +
                        glam.ancillary_products,
                        help="Only update the specified product")
    parser.add_argument("-ml",
                        "--mask_level",
                        default="ALL",
                        choices=["ALL", "BRAZIL", "CROPMONITOR", "NOMASK"],
                        help="Run statistics for only a subset of crop masks")
    parser.add_argument(
        "-al",
        "--admin_level",
        default="ALL",
        choices=["ALL", "GAUL", "BRAZIL"],
        help="Run statistics for only a subset of administrative regions")
    parser.add_argument("-i",
                        "--ingest",
                        action='store_true',
                        help="Ingest only, no stats generation")
    parser.add_argument('-s',
                        "--stats",
                        action='store_true',
                        help="Stats generation only, no ingest")
    parser.add_argument(
        '-nab',
        "--no_anomaly_baseline",
        action='store_true',
        help="Do not generate anomaly baseline for downloaded data")
    parser.add_argument(
        '-l',
        '--list_missing',
        action='store_true',
        help=
        "Print list of missing imagery; do not download, ingest, or generate statistics"
    )
    parser.add_argument(
        "-id",
        "--input_directory",
        action="store",
        help=
        "Run over a directory of existing files, rather than checking for new data"
    )
    parser.add_argument(
        '-u',
        "--universal",
        action='store_true',
        help="Run over all files, not just those that are flagged as missing")
    parser.add_argument(
        '-od',
        "--output_directory",
        default=None,
        help=
        "Save downloaded files to a directory on disk rather than deleting them."
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='count',
        default=0,
        help="Display more messages; print traceback on failure")
    args = parser.parse_args()

    ## confirm exclusivity
    try:
        if args.ancillary:
            assert not args.ndvi
            assert not args.product
        elif args.ndvi:
            assert not args.ancillary
            assert not args.product
        elif args.product:
            assert not args.ancillary
            assert not args.ndvi
    except AssertionError:
        raise glam.BadInputError(
            "--ancillary, --product, and --ndvi are mutually exclusive")
    try:
        if args.ingest:
            assert not args.stats
        elif args.stats:
            assert not args.ingest
    except AssertionError:
        raise glam.BadInputError("--ingest and --stats are mutually exclusive")
    try:
        if args.universal:
            assert not args.list_missing
        elif args.list_missing:
            assert not args.universal
    except AssertionError:
        raise glam.BadInputError(
            "--list_missing and --universal are mutually exclusive")
    if args.output_directory is not None and args.product is None:
        raise glam.BadInputError(
            "Use of --output_directory requires that --product be set")

    ## verbosity stuff
    def speak(message, cutoff=1):
        if args.verbose >= cutoff:
            log.info(message)
        else:
            log.debug(message)

    speak(f"Running with verbosity level {args.verbose}")

    ## set anomaly baseline script name
    anomaly_script = os.path.join(os.path.dirname(__file__),
                                  "add_file_to_anomaly_baseline.py")

    ## get toDoList or directory listing
    # toDoList
    if not args.input_directory:
        missing = glam.ToDoList()
        if not args.universal:
            missing.filterUnavailable()
        downloader = glam.Downloader()
        if args.output_directory is not None:
            tempDir = args.output_directory
        else:
            tempDir = os.path.join(os.path.dirname(__file__), "temp")
        try:
            os.mkdir(tempDir)
        except FileExistsError:
            pass
    # directory listing
    else:
        dirFiles = glob.glob(os.path.join(args.input_directory, "*.tif"))
        missing = []
        for f in dirFiles:
            img = glam.getImageType(f)(f)
            missing.append((img.product, img.date, tuple([img.path])))
    try:
        j = 0
        l = len([f for f in missing])
        for f in missing:
            j += 1
            product = f[0]
            if product in octvi.supported_products and args.ancillary:
                continue
            if product in glam.ancillary_products and args.ndvi:
                continue
            if args.product and product != args.product:
                continue
            if args.list_missing:
                print("{0} {1}".format(*f))
                continue
            log.info("{0} {1}, {2} of {3}".format(*f, j, l))
            try:
                # no directory given; pull from source
                if not args.input_directory:
                    if product in octvi.supported_products:
                        # CHECKSUM!!!!! Current threshold for NDVI mosaic: 1GB
                        pathSize = 0
                        tries = 1
                        sizeThreshold = 1000000000
                        while pathSize < sizeThreshold:  # threshold
                            try:
                                os.remove(paths[0])
                            except:
                                pass
                            if tries > 3:  # don't try more than three times
                                raise glam.UnavailableError(
                                    "File size less than 1GB after 3 tries")
                            paths = downloader.pullFromSource(*f, tempDir)
                            try:
                                pathSize = os.path.getsize(paths[0])
                                if pathSize < sizeThreshold:
                                    log.warning(
                                        f"File size of {pathSize} bytes below threshold"
                                    )
                            except IndexError:
                                raise glam.UnavailableError("No file detected")
                            tries += 1  # increment tries
                    else:
                        paths = downloader.pullFromSource(*f, tempDir)
                        # check that at least one file was downloaded
                        if len(paths) < 1:
                            raise glam.UnavailableError("No file detected")
                        speak("-downloaded")
                # directory provided; use paths on disk
                else:
                    paths = f[2]
                # iterate over file paths
                for p in paths:
                    speak(p)
                    image = glam.getImageType(p)(p)
                    if (image.product == 'chirps') and (not args.stats) and (
                            not args.ingest) and (
                                args.mask_level == "ALL") and (args.admin_level
                                                               == "ALL"):
                        speak("-purging corresponding chirps-prelim product")
                        try:
                            glam.purge('chirps-prelim', image.date,
                                       os.environ['glam_purge_key'])
                        except KeyError:
                            log.warning(
                                "glam_purge_key not set. Chirps preliminary product not purged."
                            )
                    image.setStatus('downloaded', True)
                    speak(f"-collection: {image.collection}", 2)
                    if not args.stats:
                        image.ingest()
                        image.setStatus('processed', True)
                        speak("--ingested")
                    if not args.ingest:
                        image.uploadStats(crop_level=args.mask_level,
                                          admin_level=args.admin_level)
                        image.setStatus('statGen', True)
                        speak("--stats generated")
                    # generate anomaly baselines
                    if (not args.no_anomaly_baseline) and (
                            not (image.product
                                 in ["mera-2", "chirps-prelim", "MOD13Q4N"])):
                        anomaly_args = [
                            "python", anomaly_script, p, "-n", "20"
                        ]
                        try:
                            subprocess.call(anomaly_args)
                            speak("--anomaly baseline updated")
                        except:
                            log.exception(
                                "Failed to generate anomaly baseline")
                    if args.output_directory is None:
                        os.remove(p)
                        speak("--file removed")
            except glam.UnavailableError:
                log.info("(No file available)")
            except:
                if args.verbose > 0:
                    log.exception("(FAILED)")
                else:
                    log.error("(FAILED)")
    finally:
        if not args.input_directory and args.output_directory is None:
            for f in glob.glob(os.path.join(tempDir, "*")):
                os.remove(f)