Example #1
0
def run_pynotebook(pyfile: str):
    """ Wraps the execution of a python3 script

    Parameters
    ----------
    pyfile : The path and filename of the python3 script to run.
    """
    log.info(f'Running {pyfile}')

    try:
        result = subprocess.run(['python3', pyfile],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True,
                                text=True)
        print(result.stdout)
        if result.returncode != 0:
            log.exception(f'{pyfile} returned non zero exit ...')
            traceback.print_stack()
            raise signals.FAIL()

    except Exception as e:
        log.exception(f'{pyfile} caused an exception ...')
        traceback.print_stack()
        raise signals.FAIL()

    return
Example #2
0
    def remote_mount(self, hosts: list):
        """ Create the symbolic link on each host

        Parameters
        ----------
        hosts : list of str
          The list of remote hosts
        """

        for host in hosts:
            result = subprocess.run(['ssh', host, 'sudo', 'rm', '-Rf', self.mountpath], 
                                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            if result.returncode != 0:
                print(result.stdout)
                log.warning(f'Unable to remove {mountpath} on {host}')

            try:
                result = subprocess.run(['ssh', host, 'sudo', 'ln', '-s', self.mount, self.mountpath],
                                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

                if result.returncode != 0:
                    print(result.stdout)
                    log.exception(f'Unable to mount scratch disk on {host}')
                    raise signals.FAIL()

            except Exception as e:
                log.exception(f'Unable to mount scratch disk on {host}')
                traceback.print_stack()
                raise signals.FAIL()
        return
Example #3
0
    def remote_mount(self, hosts: list):
        """ Mount this FSx disk on remote hosts 

        Parameters
        ----------
        hosts : list of str
          The list of remote hosts
        """

        # TODO: synchronization issue if two new jobs are started at the same time
        #       We need to make sure that the FSx disk is already spun up otherwise this will fail
        for host in hosts:
            try:
                result = subprocess.run([
                    'ssh', host, 'sudo', 'mount', '-t', 'lustre', '-o',
                    'noatime,flock', f'{self.dnsname}@tcp:/{self.mountname}',
                    self.mountpath
                ],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                if result.returncode != 0:
                    print(result.stdout)
                    log.exception(
                        f'unable to mount scratch disk on host: {host}')
                    raise signals.FAIL()
            except Exception as e:
                log.exception('unable to mount scratch disk on host...', host)
                traceback.print_stack()
                raise signals.FAIL()
        return
Example #4
0
def create_scratch(provider: str,
                   configfile: str,
                   mountpath: str = '/ptmp') -> ScratchDisk:
    """ Provides a high speed scratch disk if available. Creates and mounts the disk.

    Parameters
    ----------
    provider : str
      Name of an implemented provider.

    configfile : str
        The Job configuration file
      

    Returns
    -------
    scratch : ScratchDisk
      Returns the ScratchDisk object

    """

    if provider == 'FSx':
        scratch = FSxScratchDisk(configfile)
    elif provider == 'NFS':
        scratch = NFSScratchDisk(configfile)
    elif provider == 'Local':
        log.error('Coming soon ...')
        raise signals.FAIL()
    else:
        log.error('Unsupported provider')
        raise signals.FAIL()

    scratch.create(mountpath)
    return scratch
Example #5
0
    def _verify_workspace_requirements(self, workspace, families):
        """ Check that an existing workspace family mets the required families

        Parameters
        ----------
        workspace: dict
            Dictionary representation of workspace details.
        families: dict
            Dictionary of family to version number with the required metadata
            families and versions. A ``None`` value means `"latest"`.

        Returns
        -------
        None

        Raises
        ------
        prefect.signals.PrefectStateSignal
            A :py:class:`prefect.signals.FAIL` signal when the verification fails.

        """
        for name, version in families.items():
            if name not in workspace['families']:
                raise signals.FAIL('Workspace exists but does not have '
                                   'the required families')
            # version == None means that we want the latest.
            # No check is done but we could implement something more strict later
            if version is not None and version > families[name]:
                raise signals.FAIL('Workspace does not meet family version '
                                   'requirement')
Example #6
0
def forecast_run(cluster: Cluster, job: Job):
    """ Run the forecast

    Parameters
    ----------
    cluster : Cluster
        The cluster to run on
    job : Job
        The job to run
    """
    PPN = cluster.getCoresPN()

    # Easier to read
    CDATE = job.CDATE
    HH = job.HH
    OFS = job.OFS
    NPROCS = job.NPROCS
    OUTDIR = job.OUTDIR
    #EXEC = job.EXEC

    runscript = f"{curdir}/fcst_launcher.sh"

    try:
        HOSTS = cluster.getHostsCSV()
    except Exception as e:
        log.exception('In driver: execption retrieving list of hostnames:' +
                      str(e))
        raise signals.FAIL()

    try:

        if OFS == "adnoc":
            time.sleep(60)
            result = subprocess.run([runscript, CDATE, HH, OUTDIR, str(NPROCS), str(PPN), HOSTS, OFS, job.EXEC], \
                                    stderr=subprocess.STDOUT)
        else:
            result = subprocess.run([runscript, CDATE, HH, OUTDIR, str(NPROCS), str(PPN), HOSTS, OFS], \
                                    stderr=subprocess.STDOUT)

        if result.returncode != 0:
            log.exception(f'Forecast failed ... result: {result.returncode}')
            raise signals.FAIL()

    except Exception as e:
        log.exception('In driver: Exception during subprocess.run :' + str(e))
        raise signals.FAIL()

    log.info('Forecast finished successfully')

    curfcst = f"{job.COMROT}/current.fcst"
    with open(curfcst, 'w') as cf:
        cf.write(f"{OFS}.{CDATE}{HH}\n")

    return
Example #7
0
def run_workflow(parametrised_workflow: Tuple[Flow, Dict[str, Any]]) -> None:
    """
    Run a workflow.

    Parameters
    ----------
    parametrised_workflow : tuple (prefect.Flow, dict)
        Workflow to run, and parameters to run it with.

    Notes
    -----

    The workflow will run once, starting immediately. If the workflow has a
    schedule, the schedule will be ignored.
    """
    workflow, parameters = parametrised_workflow
    prefect.context.logger.info(
        f"Running workflow '{workflow.name}' with parameters {parameters}.")
    state = workflow.run(parameters=parameters, run_on_schedule=False)
    if state.is_successful():
        prefect.context.logger.info(
            f"Workflow '{workflow.name}' ran successfully with parameters {parameters}."
        )
    else:
        raise signals.FAIL(
            f"Workflow '{workflow.name}' failed when run with parameters {parameters}."
        )
Example #8
0
def signal_task(message):
    if message == 'go!':
        raise signals.SUCCESS(message='going!')
    elif message == 'stop!':
        raise signals.FAIL(message='stopping!')
    elif message == 'skip!':
        raise signals.SKIP(message='skipping!')
Example #9
0
def _acquire(mountpath: str):
    tries = 0
    maxtries = 3
    delay = 0.1

    lockpath = f'{_LOCKROOT}{mountpath}'
    lockfile = f'{lockpath}/.lockctl'
    if not os.path.exists(lockpath):
        os.makedirs(lockpath)

    while tries < maxtries:
        # If lockfile exists, some other process is holding the lock
        if os.path.exists(lockfile):
            #print(f'lock not acquired ... trying again in {delay} seconds')
            time.sleep(delay)
            tries += 1
            continue
        else:
            lock = open(lockfile, "w")
            #print('lock acquired ')
            lock.close()
            return

    log.exception(
        f'ERROR: Unable to obtain lock on {lockfile}. You may need to delete it.'
    )
    traceback.print_stack()
    raise signals.FAIL()
    return
Example #10
0
    def cluster(self, configfile):
        """ Creates a new Cluster object

        Parameters
        ----------
        configfile : string
            Full path and filename of a JSON configuration file for this cluster.

        Returns
        -------
        newcluster : Cluster
            Returns a new instance of a Cluster implementation.
        """

        cfdict = self.readconfig(configfile)

        provider = cfdict['platform']

        if provider == 'AWS':

            log.info(f'Attempting to make a new cluster : {provider}')
            try:
                newcluster = AWSCluster(configfile)
            except Exception as e:
                log.exception('Could not create cluster: ' + str(e))
                raise signals.FAIL()
        elif provider == 'Local':
            newcluster = LocalCluster(configfile)

        log.info(f"Created new {provider} cluster")
        return newcluster
Example #11
0
 def run(self, threshold: int) -> int:
     r = Random()
     v = r.randint(a=self.min, b=self.max)
     if v > threshold:
         raise signals.FAIL(message=f'{v} is greater than {threshold}')
     self.logger.info(f'Value is {v}')
     return v
Example #12
0
def task_3():
    logger = prefect.context.get("logger")
    interval = randrange(0, 60)
    logger.info(interval)
    time.sleep(interval)
    if interval > 50:
        logger.info("Failing flow...")
        raise signals.FAIL()
Example #13
0
def execute_query(client, table_name):
    logger = prefect.context.get("logger")
    logger.info(f"Table Name: {table_name}")
    if table_name == "Users":
        time.sleep(9)
        raise signals.FAIL(
            message="TableNotFound: The table specified does not exist.")
    else:
        time.sleep(9)
    return table_name
Example #14
0
def _release(mountpath: str):

    try:
        os.remove(f'{_LOCKROOT}{mountpath}/.lockctl')
        #print('lock released')
    except Exception as e:
        log.exception(
            f'ERROR: error releasing lock {_LOCKROOT}{mountpath}/.lockctl')
        raise signals.FAIL()
    return
Example #15
0
def subprocesscall(cmd, stdout=None):
    print('SHELL CMD ----------------------------------')
    print(cmd)
    print('--------- ----------------------------------')
    if stdout is None:
        rslt = subprocess.call(cmd, shell=True)
    else:
        with open(stdout, 'w') as sout:
            rslt = subprocess.call(cmd, stdout=sout, shell=True)
    if rslt != 0:
        raise signals.FAIL(message=f'{cmd} returned non zero result {rslt}')
Example #16
0
    def run(
        self,
        df: pd.DataFrame = None,
        expectations_path: str = None,
        keep_output: bool = None,
        **kwargs,
    ):

        ge_project_path = str(Path(expectations_path).parent)

        batch_kwargs = self._get_batch_kwargs(df)
        context = self._get_ge_context_local(ge_project_path)

        self.logger.info("Beginning validation run...")

        try:
            results = super().run(
                batch_kwargs=batch_kwargs,  # input data
                context=context,  # ~project config
                **kwargs,
            )
        except signals.FAIL as e:
            results = e.state.result

        # Show summary of results
        n_successful, n_expectations = self._get_stats_from_results(results)
        status = "success" if results.success else "failure"
        level = logging.INFO if results.success else logging.ERROR
        self.logger.log(
            msg=f"Validation finished with status '{status}'. {n_successful}/{n_expectations} test(s) passed.",
            level=level,
        )

        validation_ids = [res for res in results["run_results"]]
        validation_id = validation_ids[0]
        url_dicts = context.get_docs_sites_urls(resource_identifier=validation_id)
        validation_site_url = url_dicts[0]["site_url"]

        if keep_output:
            docs_msg = f"To explore the docs, visit {validation_site_url}"
            docs_msg += " or the 'Artifacts' tab on the Prefect flow run dashboard."
            self.logger.info(docs_msg)

        else:
            docs_path = os.path.join(ge_project_path, "uncommitted")
            checkpoints_path = os.path.join(ge_project_path, "checkpoints")

            shutil.rmtree(docs_path)
            shutil.rmtree(checkpoints_path)

        if not results.success:
            raise signals.FAIL(result=results)

        return results
Example #17
0
def storage_init(provider: str) -> StorageService:
    """Class factory that returns an implementation of StorageService.

    StorageService is the abstract base class that provides a generic interface for
    multiple cloud platforms.

    Parameters
    ----------
    provider : str
      Name of an implemented provider.

    Returns
    -------
    service : StorageService
      Returns a specific implementation of the StorageService interface.

    Raises
    ------
    signals.FAIL
      Triggers and exception if `provider` is not supported.

    Notes
    -----
    The following providers are implemented:
      AWS S3 - S3Storage

    """

    if provider == 'AWS':
        service = S3Storage()

    elif provider == 'Local':
        log.error('Coming soon ...')
        raise signals.FAIL()
    else:
        log.error('Unsupported provider')
        raise signals.FAIL()

    return service
Example #18
0
    def run(self,
            query: str,
            dialect: str = 'postgresql',
            workspace_id: Optional[int] = None,
            id_column: Optional[str] = None) -> List[ResultSetType]:
        """ Perform the Quetzal SQL query

        Parameters
        ----------
        query: str
            Quetzal query.
        dialect: str
            Dialect used to express the `query`.
        workspace_id: int
            Workspace where the query should be executed. If not set, it uses
            the global workspace.
        id_column: str
            Name of the column on the query that represents a Quetzal file id.

        Returns
        -------
        results
            A list of dictionaries, one for each result row.

        """
        if not query:
            raise signals.FAIL('Query is empty')

        self.logger.info('Querying Quetzal at %s with SQL (dialect %s)=\n%s',
                         self.client.configuration.host, dialect, query)
        rows, total = helpers.query(self.client, workspace_id, query, dialect)

        # Handle results
        self.logger.info('Query gave %d results', total)

        # Shuffle the results
        if self.shuffle:
            random.shuffle(rows)

        # Only keep N results
        if self.limit is not None and total > self.limit:
            rows = rows[:self.limit]
            total = len(rows)
            self.logger.info('Query was limited to %d results', total)

        if self._as_file_adapter:
            for i, row in enumerate(rows):
                rows[i] = QuetzalFile.retrieve(file_id=row['id'],
                                               workspace_id=workspace_id)

        return rows
Example #19
0
    def delete(self):
        """ Delete this FSx disk """

        log.debug(f'Attempting to delete FSx disk at {self.mountpath}')
        log.debug(f'This processes lockid: {self.lockid}')

        ScratchDiskModule.removelock(self.mountpath, self.lockid)

        # Is the disk in use by anyone else? There is a potential for a race condition here.
        # If another process is blocking on entering the mutex to add a lock, this process will still remove the disk
        # TODO: possibly make __acquire non-blocking
        if ScratchDiskModule.haslocks(self.mountpath):
            log.info(
                f'FSx disk at {self.mountpath} is currently in use. Unable to remove it.'
            )
            return

        log.info(f'Unmounting FSx disk at {self.mountpath} ...')
        try:
            # umount -f = force, -l = lazy
            result = subprocess.run(['sudo', 'umount', '-fl', self.mountpath],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT)
            if result.returncode != 0:
                print(result.stdout)
                log.exception(
                    f'error while unmounting scratch disk at {self.mountpath} ...'
                )
        except Exception as e:
            log.exception(
                'Exception while unmounting scratch disk at {self.mountpath} ...'
            )

        # Remove the AWS FSx resource
        client = boto3.client('fsx', region_name=self.region)
        try:
            response = client.delete_file_system(
                FileSystemId=self.filesystemid)
            if response['Lifecycle'] == 'DELETING':
                log.info(f'FSx disk {self.filesystemid} is DELETING')
                self.status = 'deleted'
            else:
                log.info(
                    f'Something went wrong when deleting the FSx disk {self.filesystemid} ... manually check the status'
                )
                self.status = 'error'

        except ClientError as e:
            log.exception('ClientError exception in AWSScratch.delete. ' +
                          str(e))
            raise signals.FAIL()
Example #20
0
def mkdirs(pdir: str, mode=0o775) -> None:
    # utility function to create a directory (recursively)
    if os.path.exists(pdir):
        #print(f"Directory {pdir} already exists")
        return
    # make output directory
    try:
        oumask = os.umask(0o777 - mode)  #0o002
        os.makedirs(pdir, exist_ok=True, mode=mode)
        #os.chmod(pdir, mode)
        print(f"Directory {pdir} created successfully")
    except OSError as error:
        signals.FAIL(message=f"Directory {pdir} can not be created")
    finally:
        os.umask(oumask)
Example #21
0
def get_baseline(job: Job, sshuser=None):
    """ Retrieve operational forecast files for comparison to quasi-operational forecasts

    Parameters
    ----------
    job : Job
        The Job object.

    sshuser : str
        The user and host to use for retrieving data from a remote server. Required for LiveOcean.
    """

    cdate = job.CDATE
    ofs = job.OFS
    vdir = job.VERIFDIR
    hh = job.HH

    if ofs == 'liveocean':
        try:
            util.get_baseline_lo(cdate, vdir, sshuser)
        except Exception as e:
            log.exception(f'Retrieving baselines failed ...')
            raise signals.FAIL()
    elif ofs in util.nosofs_models:
        script = f"{curdir}/scripts/getNomadsProd.sh"

        result = subprocess.run([script, ofs, cdate, hh, vdir],
                                stderr=subprocess.STDOUT)
        if result.returncode != 0:
            log.exception(
                f'Retrieving baselines failed ... result: {result.returncode}')
            raise signals.FAIL()
    else:
        log.exception(f'{ofs} is not supported')
        raise signals.FAIL()
    return
Example #22
0
def cluster_start(cluster):
    """ Start the cluster

    Parameters
    ----------
    cluster : Cluster

    """
    log.info('Starting ' + str(cluster.nodeCount) + ' instances ...')
    log.info('Waiting for nodes to start ...')
    try:
        cluster.start()
    except Exception as e:
        log.exception('In driver: Exception while creating nodes :' + str(e))
        raise signals.FAIL()
    return
Example #23
0
 def ramdisk_to_nfs(self, delete_outs1=False):
     """
     Move outputs from local RAMDISK (srcdir) to NFS (dstdir),
     delete other files used in RAMDISK.
     """
     p = self.p
     nfs = p.get('nfs', '')
     ramdisk = p.get('ramdisk', '')
     inps = self.inps
     inps1 = self._inps
     tmps1 = self._tmps
     outs1 = self._outs
     if (not ramdisk) or (nfs == ramdisk):  # no op
         return
     delete = []
     host = socket.gethostname()
     for k, src in outs1.items():
         if src.startswith(ramdisk):
             dst = src.replace(ramdisk, nfs)
             try:
                 mkdirs(os.path.dirname(dst))
                 if src != dst:
                     shutil.copyfile(src, dst)
                     os.chmod(dst, 0o660)
                     if delete_outs1:
                         os.unlink(src)
                     else:
                         delete.append((host, src))
                     print(
                         '********** RAMDISK => NFS *********************************'
                     )
                     print(f'{host}:{src}=>{dst}')
                     print(
                         '***********************************************************'
                     )
             except:
                 raise signals.FAIL(
                     message=f'shutil.copy {src} to {dst} failed.')
     # delete tmps1 regardless
     deletefiles(tmps1.values())
     # delete inps1 if not original
     tgts = [inps1[k] for k in inps1 if inps[k] != inps1[k]]
     deletefiles(tgts)
     # delete later
     self.param.delete = self.param.get('delete', []) + delete
Example #24
0
 def check_files(self):
     skip = False
     force = self.p.get('force', False)
     signalskip = self.p.get('signalskip', True)
     # check input exists
     for k, f in self.inps.items():
         if not os.path.exists(f):
             raise signals.FAIL(
                 message=f'input file {k}:{f} does not exist')
     done = all([os.path.exists(x) for k, x in self.outs.items()])
     # if force delete tmps and outs
     if force or (not done):  # delete all existing
         deletefiles(list(self.tmps.values()) + list(self.outs.values()))
         # if all outs exists then skip
     elif done:
         deletefiles(list(self.tmps.values()))
         if signalskip:
             raise signals.SKIP(
                 message=f'all outputs {self.outs} already exist')
         skip = True
     return skip
Example #25
0
    def create(self, mountpath: str = '/ptmp'):
        """ The NFS disk is assumed to be mounted, just create a symlink

        Parameters
        ----------
        mountpath : str
            The path where the disk will be mounted. Default = /ptmp" (optional)
        """

        self.lockid = ScratchDiskModule.addlock(mountpath)

        # TODO: maybe, create an additional EFS drive to use as /ptmp
        self.mountpath = mountpath

        if self._mountexists():
            log.info("Scratch disk already exists...")
            return

        elif ScratchDiskModule.get_lockcount(self.mountpath) == 1:
            # Mount does not exist, but another process might be creating it 
            # We just created a lock for this, so lock count must be == 1 if we are the only one starting it

            # Now mount it
            log.info("Creating symbolic link ...")

            # TODO: Check to make sure it is not in use
            subprocess.run(['sudo', 'rm', '-Rf', self.mountpath],
                           stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

            result = subprocess.run(['sudo', 'ln', '-s', self.mount, self.mountpath],
                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

            if result.returncode != 0:
                print(result.stdout)
                log.exception(f'error attempting to create link to scratch disk ...')
                raise signals.FAIL()

            self.status='available'
        return
Example #26
0
def ptmp2com(job: Job):
    """ Transfer completed run from scratch disk to com 

    Parameters
    ----------
    job : Job
        The Job object with CDATE, PTMP, and COMROT attributes set.
    """

    # It takes 20 minutes to copy liveocean data from ptmp to /com 132GB
    # If done in the cluster ~$5.18 of compute cost, do it in the head node instead
    # NOS does it in the forecast script and renames the files in the process
    if job.OFS == "liveocean":
        fdate = util.lo_date(job.CDATE)
        ptmp = f'{job.PTMP}/liveocean/{fdate}/*'
        comout = job.COMROT + '/liveocean/' + fdate

        if debug:
            print(f"ptmp: {ptmp}, comout: {comout}")

        try:
            cmd = f'cp -pf {ptmp} {comout}'
            result = subprocess.run(cmd,
                                    universal_newlines=True,
                                    shell=True,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT)
            if result.returncode != 0:
                log.error(result.stdout)
                log.error(f'error copying data from {ptmp} to {comout}')
        except Exception as e:
            log.exception(result.stdout)
            log.exception(f'exception copying data from {ptmp} to {comout}')
            raise signals.FAIL()
    else:
        log.info("Skipping ... NOSOFS does this in the forecast script")
        pass

    return
Example #27
0
def post_graph(appointment: ExternalAppointmentStruct) -> Dict:
    logger = prefect.context.get("logger")
    logger.info(f"Starting post_graph")
    s = ExternalAppointmentUpdateSummaryStruct()
    external_appointment_schema = ExternalAppointmentStructSchema()
    external_appointment_update_schema = ExternalAppointmentUpdateSummaryStructSchema(
    )

    json_data = external_appointment_schema.dump(appointment)

    summary: ExternalAppointmentUpdateSummaryStruct
    logger.info(f"About to post")
    summary, err = common.post_to_endpoint(1,
                                           json_data,
                                           '/api/external_appointment/update',
                                           external_appointment_update_schema,
                                           commit=False)
    logger.info(f"Finished posting")
    if err:
        raise signals.FAIL(message=str(err))

    return summary
Example #28
0
def get_forcing(job: Job, sshuser=None):
    """ Retrieve operational moddel forcing data and initial conditions

    Parameters
    ----------
    job : Job
        The Job object.

    sshuser : str
        The user and host to use for retrieving data from a remote server. Required for LiveOcean.
    """

    cdate = job.CDATE
    ofs = job.OFS
    comrot = job.COMROT
    hh = job.HH

    comdir = job.OUTDIR  # ex: /com/liveocean/f2020.MM.DD

    if ofs == 'liveocean':

        frcdir = job.COMROT + '/liveocean'
        try:
            util.get_ICs_lo(cdate, frcdir, sshuser)
        except Exception as e:
            log.exception(
                'Problem encountered with downloading forcing data ...')
            raise signals.FAIL()

    # ROMS models
    elif ofs in ('cbofs', 'dbofs', 'tbofs', 'gomofs', 'ciofs'):
        #comdir = f"{comrot}/{ofs}.{cdate}"
        script = f"{curdir}/scripts/getICsROMS.sh"

        result = subprocess.run([script, cdate, hh, ofs, comdir],
                                stderr=subprocess.STDOUT)
        if result.returncode != 0:
            log.exception(
                f'Retrieving ICs failed ... result: {result.returncode}')
            raise signals.FAIL()

    # FVCOM models
    elif ofs in ('ngofs', 'nwgofs', 'negofs', 'leofs', 'sfbofs', 'lmhofs'):
        #comdir = f"{comrot}/{ofs}.{cdate}"
        script = f"{curdir}/scripts/getICsFVCOM.sh"

        result = subprocess.run([script, cdate, hh, ofs, comdir],
                                stderr=subprocess.STDOUT)
        if result.returncode != 0:
            log.exception(
                f'Retrieving ICs failed ... result: {result.returncode}')
            raise signals.FAIL()
    # Coupled WRF/ROMS
    elif ofs == 'wrfroms':
        #comdir = f"{comrot}/{ofs}/{cdate}"
        script = f"{curdir}/scripts/getICsWRFROMS.sh"

        result = subprocess.run([script, cdate, comdir],
                                stderr=subprocess.STDOUT)
        if result.returncode != 0:
            log.exception(
                f'Retrieving ICs failed ... result: {result.returncode}')
            raise signals.FAIL()
    else:
        log.error("Unsupported forecast: ", ofs)
        raise signals.FAIL()

    return
Example #29
0
def handler(signal_received, frame):
    print('SIGINT or CTRL-C detected. Exiting gracefully')
    raise signals.FAIL()
Example #30
0
    def run(
        self,
        checkpoint_name: str = None,
        ge_checkpoint: Checkpoint = None,
        checkpoint_kwargs: dict = None,
        context: ge.DataContext = None,
        assets_to_validate: list = None,
        batch_kwargs: dict = None,
        expectation_suite_name: str = None,
        context_root_dir: str = None,
        runtime_environment: Optional[dict] = None,
        run_name: str = None,
        run_info_at_end: bool = True,
        disable_markdown_artifact: bool = False,
        validation_operator: str = "action_list_operator",
        evaluation_parameters: Optional[dict] = None,
    ):
        """
        Task run method.

        Args:
            - checkpoint_name (str, optional): the name of a pre-configured checkpoint; should match the
                filename of the checkpoint without the extension. Either checkpoint_name or
                checkpoint_config is required when using the Great Expectations v3 API.
            - ge_checkpoint (Checkpoint, optional): an in-memory GE `Checkpoint` object used to perform
                validation. If not provided then `checkpoint_name` will be used to load the specified
                checkpoint.
            - checkpoint_kwargs (Dict, optional): A dictionary whose keys match the parameters of
                `CheckpointConfig` which can be used to update and populate the task's Checkpoint at
                runtime.
            - context (DataContext, optional): an in-memory GE `DataContext` object. e.g.
                `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to
                look for one.
            - assets_to_validate (list, optional): A list of assets to validate when running the
                validation operator. Only used in the Great Expectations v2 API
            - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating
                assets. Only used in the Great Expectations v2 API
            - expectation_suite_name (str, optional): the name of an expectation suite to be used when
                validating assets. Only used in the Great Expectations v2 API
            - context_root_dir (str, optional): the absolute or relative path to the directory holding
                your `great_expectations.yml`
            - runtime_environment (dict, optional): a dictionary of great expectation config key-value
                pairs to overwrite your config in `great_expectations.yml`
            - run_name (str, optional): the name of this  Great Expectation validation run; defaults to
                the task slug
            - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this
                task. Defaults to `True`.
            - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from
                this tasks. Defaults to `False`.
            - evaluation_parameters (Optional[dict], optional): the evaluation parameters to use when
                running validation. For more information, see
                [example](https://docs.prefect.io/api/latest/tasks/great_expectations.html#rungreatexpectationsvalidation)
                and
                [docs](https://docs.greatexpectations.io/en/latest/reference/core_concepts/evaluation_parameters.html).
            - validation_operator (str, optional): configure the actions to be executed after running
                validation. Defaults to `action_list_operator`.

        Raises:
            - 'signals.FAIL' if the validation was not a success

        Returns:
            - result
                ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'):
                The Great Expectations metadata returned from the validation if the v2 (batch_kwargs) API
                is used.

                ('great_expectations.checkpoint.checkpoint.CheckpointResult'):
                The Great Expectations metadata returned from running the provided checkpoint if a
                checkpoint name is provided.

        """

        if version.parse(ge.__version__) < version.parse("0.13.8"):
            self.logger.warning(
                f"You are using great_expectations version {ge.__version__} which may cause"
                "errors in this task. Please upgrade great_expections to 0.13.8 or later."
            )

        runtime_environment = runtime_environment or dict()
        checkpoint_kwargs = checkpoint_kwargs or dict()

        # Load context if not provided directly
        if not context:
            context = ge.DataContext(
                context_root_dir=context_root_dir,
                runtime_environment=runtime_environment,
            )

        # Check that the parameters are mutually exclusive
        if (sum(
                bool(x) for x in [
                    (expectation_suite_name and batch_kwargs),
                    assets_to_validate,
                    checkpoint_name,
                    ge_checkpoint,
                ]) != 1):
            raise ValueError(
                "Exactly one of expectation_suite_name + batch_kwargs, assets_to_validate, "
                "checkpoint_name, or ge_checkpoint is required to run validation."
            )

        results = None
        # If there is a checkpoint or checkpoint name provided, run the checkpoint.
        # Checkpoints are the preferred deployment of validation configuration.
        if ge_checkpoint or checkpoint_name:
            ge_checkpoint = ge_checkpoint or context.get_checkpoint(
                checkpoint_name)
            results = ge_checkpoint.run(
                evaluation_parameters=evaluation_parameters,
                run_id={
                    "run_name": run_name or prefect.context.get("task_slug")
                },
                **checkpoint_kwargs,
            )
        else:
            # If assets are not provided directly through `assets_to_validate` then they need be loaded
            #   get batch from `batch_kwargs` and `expectation_suite_name`
            if not assets_to_validate:
                assets_to_validate = [
                    context.get_batch(batch_kwargs, expectation_suite_name)
                ]

            # Run validation operator
            results = context.run_validation_operator(
                validation_operator,
                assets_to_validate=assets_to_validate,
                run_id={
                    "run_name": run_name or prefect.context.get("task_slug")
                },
                evaluation_parameters=evaluation_parameters,
            )

        # Generate artifact markdown
        if not disable_markdown_artifact:
            validation_results_page_renderer = (
                ge.render.renderer.ValidationResultsPageRenderer(
                    run_info_at_end=run_info_at_end))
            rendered_content_list = validation_results_page_renderer.render_validation_operator_result(
                # This also works with a CheckpointResult because of duck typing.
                # The passed in object needs a list_validation_results method that
                # returns a list of ExpectationSuiteValidationResult.
                validation_operator_result=results)
            markdown_artifact = " ".join(
                ge.render.view.DefaultMarkdownPageView().render(
                    rendered_content_list))

            create_markdown_artifact(markdown_artifact)

        if results.success is False:
            raise signals.FAIL(result=results)

        return results