Ejemplo n.º 1
0
def plot_flow(postconf, postjobfile) -> Flow:
    """ Provides a Prefect Flow for a plotting workflow. Also creates mpegs of sequential plots.
    Plots and mpegs are uploaded to the cloud.

    Parameters
    ----------
    postconf : str
        The JSON configuration file for the Cluster to create

    postjobfile : str
        The JSON configuration file for the plotting Job

    Returns
    -------
    plotflow : prefect.Flow
    """

    with Flow('plotting') as plotflow:
        #####################################################################
        # POST Processing
        #####################################################################

        # Start a machine
        postmach = ctasks.cluster_init(postconf)

        # Setup the post job
        plotjob = tasks.job_init(postmach, postjobfile)

        # Start the machine
        pmStarted = ctasks.cluster_start(postmach)

        # Push the env, install required libs on post machine
        # TODO: install all of the 3rd party dependencies on AMI
        pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

        # Start a dask scheduler on the new post machine
        daskclient: Client = ctasks.start_dask(postmach,
                                               upstream_tasks=[pmStarted])

        # Get list of files from job specified directory
        FILES = jtasks.ncfiles_from_Job(plotjob)

        # Make plots
        plots = jtasks.daskmake_plots(daskclient, FILES, plotjob)
        plots.set_upstream([daskclient])

        # Make movies
        mpegs = jtasks.daskmake_mpegs(daskclient,
                                      plotjob,
                                      upstream_tasks=[plots])
        mp4tocloud = tasks.save_to_cloud(plotjob,
                                         storage_service, ['*.mp4'],
                                         public=True)
        mp4tocloud.set_upstream(mpegs)

        closedask = ctasks.dask_client_close(daskclient,
                                             upstream_tasks=[mpegs])
        pmTerminated = ctasks.cluster_terminate(
            postmach, upstream_tasks=[mpegs, closedask])

        # Inject notebook
        notebook = 'cloudflow/inject/patrick/sandbot_current_fcst.py'
        injected = tasks.fetchpy_and_run(plotjob, storage_service, notebook)

        #######################################################################

    return plotflow
Ejemplo n.º 2
0
def diff_plot_flow(postconf, postjobfile, sshuser=None) -> Flow:
    """ Provides a Prefect Flow for a plotting workflow that plots the difference between the experimental or
    quasi-operational forecast and the official operational forecast. Also creates mpegs of sequential plots.
    Plots and mpegs are uploaded to the cloud.

    Parameters
    ----------
    postconf : str
        The JSON configuration file for the Cluster to create

    postjobfile : str
        The JSON configuration file for the plotting Job

    sshuser : str
        The optional user and host to use for retrieving data from a remote server.

    Returns
    -------
    diff_plotflow : prefect.Flow
    """

    with Flow('diff plotting') as diff_plotflow:
        #####################################################################
        # POST Processing
        #####################################################################

        # Read the post machine config
        postmach = ctasks.cluster_init(postconf)

        # Setup the post job
        plotjob = tasks.job_init(postmach, postjobfile)

        # Retrieve the baseline operational forecast data
        getbaseline = jtasks.get_baseline(plotjob, sshuser)

        # Start the machine
        pmStarted = ctasks.cluster_start(postmach,
                                         upstream_tasks=[getbaseline])

        # Push the env, install required libs on post machine
        # TODO: install all of the 3rd party dependencies on AMI
        pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

        # Start a dask scheduler on the new post machine
        daskclient: Client = ctasks.start_dask(postmach,
                                               upstream_tasks=[pushPy])

        # Get list of files from job specified directory
        FILES = jtasks.ncfiles_from_Job(plotjob)
        BASELINE = jtasks.baseline_from_Job(plotjob,
                                            upstream_tasks=[getbaseline])

        # Make plots
        plots = jtasks.daskmake_diff_plots(daskclient, FILES, BASELINE,
                                           plotjob)
        plots.set_upstream([daskclient, getbaseline])

        storage_service = tasks.storage_init(provider)
        #pngtocloud = tasks.save_to_cloud(plotjob, storage_service, ['*diff.png'], public=True)
        #pngtocloud.set_upstream(plots)

        # Make movies
        mpegs = jtasks.daskmake_mpegs(daskclient,
                                      plotjob,
                                      diff=True,
                                      upstream_tasks=[plots])
        mp4tocloud = tasks.save_to_cloud(plotjob,
                                         storage_service, ['*diff.mp4'],
                                         public=True)
        mp4tocloud.set_upstream(mpegs)

        closedask = ctasks.dask_client_close(daskclient,
                                             upstream_tasks=[mpegs])
        pmTerminated = ctasks.cluster_terminate(
            postmach, upstream_tasks=[mpegs, closedask])

        #######################################################################
        # This will add Kenny's script
        # https://ioos-cloud-sandbox.s3.amazonaws.com/cloudflow/inject/kenny/cloud_sandbot.py
        #notebook = 'cloudflow/inject/kenny/cloud_sandbot.py'
        notebook = 'cloudflow/inject/patrick/sandbot_current_fcst.py'
        injected = tasks.fetchpy_and_run(plotjob, storage_service, notebook)

    return diff_plotflow
Ejemplo n.º 3
0
    fcstconf = 'cluster/configs/local.config'
    fcstjobfile = 'job/jobs/liveocean.job'
    postconf = 'cluster/configs/local.post'
    postjobfile = 'job/jobs/plots.local.job'

# This is used for obtaining liveocean forcing data
sshuser = '******'

with Flow('plot only') as plotonly:
    # Start a machine
    postmach = ctasks.cluster_init(postconf)
    pmStarted = ctasks.cluster_start(postmach)

    # Push the env, install required libs on post machine
    # TODO: install all of the 3rd party dependencies on AMI
    pushPy = ctasks.push_pyEnv(postmach, upstream_tasks=[pmStarted])

    # Start a dask scheduler on the new post machine
    daskclient: Client = ctasks.start_dask(postmach, upstream_tasks=[pmStarted])

    # Setup the post job
    postjob = tasks.job_init(postmach, postjobfile, upstream_tasks=[pmStarted])

    # Get list of files from fcstjob
    FILES = jtasks.ncfiles_from_Job(postjob)

    # Make plots
    plots = jtasks.daskmake_plots(daskclient, FILES, postjob)
    plots.set_upstream([daskclient])

    closedask = ctasks.dask_client_close(daskclient, upstream_tasks=[plots])