예제 #1
0
def daily_cron(step):
    if step > 60 or step < 0:
        return None
    time_series = ecflow.TimeSeries(ecflow.TimeSlot(0, 0),
                                    ecflow.TimeSlot(23, 60 - step),
                                    ecflow.TimeSlot(0, step), False)
    cron = ecflow.Cron()
    cron.set_time_series(time_series)
    return cron
예제 #2
0
shutil.copyfile("%s/init.ecf" % run_path, "%s/init.ecf" % suite_folder)

defs = ecflow.Defs()
suite = defs.add_suite(suite_name)

# make the suite a real clock rather than hybrid
# so that date information follows the system clock
# rather than being fixed at the original submission date
hybrid = False
clock = ecflow.Clock(hybrid)
suite.add_clock(clock)

if runtime != "":
    cron = ecflow.Cron()
    h, m = runtime.split(":")
    time_series = ecflow.TimeSeries(ecflow.TimeSlot(int(h), int(m)))
    cron.set_time_series(time_series)
    suite.add_cron(cron)
    #suite.add_time(runtime)
    #suite.add_date(0,0,0)

suite.add_variable("ECF_INCLUDE", working_folder)
suite.add_variable("ECF_HOME", working_folder)
suite.add_variable("TOOLS_HOME", "%s/.." % run_path)
suite.add_variable("OUTPUT_FOLDER", output_folder)

suite.add_variable("INPUT_FOLDER", input_folder)
suite.add_variable("VERBOSE_FLAG", verbose_flag)
suite.add_variable("DIFFERENCE_THRESHOLD_FACTOR", difference_threshold_factor)

#begin of modification 06.08.2012 cgjd
def build_suite():
    """
    Build the ecflow suite.
    """
    logger.info('Building suite.')

    # ========================
    # GENERAL SUITE PROPERTIES
    # ========================

    defs = ecflow.Defs()
    suite = defs.add_suite(mysuite)

    # Set suite level variables
    set_vars(suite)

    # Set default status
    suite.add_defstatus(ecflow.DState.suspended)

    # Define thread limits
    suite.add_limit("mpmd_threads", mpmd_threads_number)
    suite.add_limit("serial_threads", serial_threads_number)

    # ========================
    # ADD CRON JOB
    # ========================

    start = ecflow.TimeSlot(0, 0)
    finish = ecflow.TimeSlot(23, 59)
    incr = ecflow.TimeSlot(0, 1)
    time_series = ecflow.TimeSeries(start, finish, incr, False)
    cron = ecflow.Cron()
    cron.set_time_series(time_series)
    fam_submit = suite.add_family('queue_submitter')
    submit = fam_submit.add_task('submit')
    submit.add_cron(cron)
    fam_submit.add_variable('ECF_JOB_CMD', ecgate_job_cmd)

    # ========================
    # DEFINE TOPLEVEL FAMILIES
    # ========================

    fam_proc = suite.add_family('proc')
    fam_dearch = suite.add_family('dearchiving')

    start = ecflow.TimeSlot(0, 0)
    finish = ecflow.TimeSlot(23, 55)
    incr = ecflow.TimeSlot(0, 5)
    time_series = ecflow.TimeSeries(start, finish, incr, False)
    cron = ecflow.Cron()
    cron.set_time_series(time_series)
    fam_arch = suite.add_family('archiving')
    storedata = fam_arch.add_task('storedata')
    storedata.add_cron(cron)
    fam_arch.add_variable('ECF_JOB_CMD', serial_job_cmd)

    # Activate thread limits
    fam_proc.add_inlimit('mpmd_threads')
    fam_dearch.add_inlimit('serial_threads')

    # Define job commands
    fam_proc.add_variable('ECF_JOB_CMD', mpmd_job_cmd)
    fam_dearch.add_variable('ECF_JOB_CMD', serial_job_cmd)

    # ===============================
    # DEFINE DYNAMIC FAMILIES & TASKS
    # ===============================

    dearch_interval = interval_value
    io_hiding_offset = io_offset_value
    dearch_counter = 0
    tar_counter = 0

    fam_tar = None
    fam_chunk = None

    mpmd_families = list()
    tarfiles_within_current_interval = list()

    # Make sure dearchiving interval is at least one 
    # greater than IO hiding offset.
    if not (dearch_interval - io_hiding_offset) >= 1:
        raise ValueError('Dearchiving interval must be at least one greater '
                         'than IO hiding offset.')

    # connect to database and get_sats list
    db = AvhrrGacDatabase(dbfile=gacdb_sqlite_file)

    if args.satellites:
        satellites = args.satellites
    else:
        if args.ignoresats: 
            satellites = db.get_sats(start_date=args.sdate, 
                                     end_date=args.edate, 
                                     ignore_sats=args.ignoresats)
        else:
            satellites = db.get_sats(start_date=args.sdate, 
                                     end_date=args.edate)

    # -- loop over satellites
    for sat in satellites:

        # create sat family
        fam_sat = fam_proc.add_family(sat)

        # add satellite variable
        fam_sat.add_variable("SATELLITE", sat)

        # get years list
        years = db.get_years(sat)

        # -- loop over years for satellite
        for year in years:

            if args.userdatelimit:
                if year < args.sdate.year or year > args.edate.year:
                    continue
                # create family year for satellite
                fam_year = fam_sat.add_family(str(year))
                # start and end date for year & satellite
                if year == args.sdate.year:
                    sd = datetime.date(year, args.sdate.month, args.sdate.day)
                else:
                    sd = datetime.date(year, 1, 1)
                if year == args.edate.year:
                    ed = datetime.date(year, args.edate.month, args.edate.day)
                else:
                    ed = datetime.date(year, 12, 31)
            else:
                # create family year for satellite
                fam_year = fam_sat.add_family(str(year))
                # start and end date for year & satellite
                sd = datetime.date(year, 1, 1)
                ed = datetime.date(year, 12, 31)

            # get tarfile list
            tarfiles = db.get_tarfiles(start_date=sd, end_date=ed,
                                       sats=[sat], include_blacklisted=False,
                                       strict=False)

            # -- loop over tarfiles for year & satellite
            for tarfil in tarfiles:

                logger.info("Working on: {0}".format(tarfil))

                # split tarfilename "NOAA7_1985_01.tar"
                tarbase = os.path.basename(tarfil)
                tarmonth = ((tarbase.split("."))[0].split("_"))[2]
                taryear  = ((tarbase.split("."))[0].split("_"))[1]

                # calendar.monthrange(year, month)
                #   Returns weekday of first day of the month and number of days
                #   in month, for the specified year and month.
                mr = monthrange(int(taryear), int(tarmonth))
                first_tar_date = datetime.date(int(taryear), int(tarmonth), 1)
                last_tar_date = datetime.date(int(taryear), int(tarmonth), mr[1])
                date_str = first_tar_date.strftime("%Y%m%d") + \
                           '_' + last_tar_date.strftime("%Y%m%d")

                if tar_counter % dearch_interval == 0:
                    if fam_chunk:
                        # Add all collected tarfiles to the 
                        # current dearchiving family
                        fam_chunk.add_variable('TARFILES', 
                                ' '.join(tarfiles_within_current_interval))

                        # Reset list of tarfiles within current interval
                        tarfiles_within_current_interval = []

                    # Create new family for dearchiving the next chunk of data.
                    fam_chunk = fam_dearch.add_family('chunk{0}'.
                            format(dearch_counter))
                    add_dearchiving_tasks(fam_chunk)
                    fam_chunk.add_variable("ECF_TRIES", 2)
                    dearch_counter += 1

                    # Make it wait for the current MPMD family minus a possible
                    # offset in order to hide IO time behind computation time.
                    if fam_tar:
                        add_trigger(fam_chunk, 
                                mpmd_families[tar_counter - io_hiding_offset - 1])
                    else:
                        # There is no trigger for the first IO chunk.
                        pass

                # Create one MPMD family for each tar_range_archive
                fam_tar = fam_year.add_family('{0}'.format(tarmonth))
                tar_counter += 1

                # add start and end day of fam_tar
                add_family_variables(fam_tar,
                                     first_tar_date.strftime("%Y%m%d"),
                                     last_tar_date.strftime("%Y%m%d"))

                # Make it wait for the current dearchiving family.
                add_trigger(fam_tar, fam_chunk)

                # Add MPMD tasks to each tarfile family
                add_mpmd_tasks(fam_tar)

                # Save the created family for later use
                mpmd_families.append(fam_tar)
                tarfiles_within_current_interval.append(tarfil)

    # -- end of loop over satellites

    # Add last chunk of collected tarfiles to the last dearchiving family
    fam_chunk.add_variable('TARFILES', 
            ' '.join(tarfiles_within_current_interval))

    # close database connection
    db.close()

    # ============================
    # CREATE SUITE DEFINITION FILE
    # ============================

    # Check job creation
    defs.check_job_creation()

    # Save suite to file
    suite_def_file = mysuite + '.def'
    logger.info('Saving suite definition to file: {0}'.format(suite_def_file))
    defs.save_as_defs(suite_def_file)

    # ======================
    # CREATE LOG DIRECTORIES
    # ======================

    logger.info('Creating log directories on both the local and '
                'the remote machine.')

    # Create a tree of all families in the suite 
    # (i.e. families, subfamilies, subsubfamilies etc)
    tree = familytree(suite)

    # Create corresponding log-directory tree:
    # 1.) Local machine
    for node in tree:
        dirname = os.path.join(ecf_out_dir, node)
        if not os.path.isdir(dirname):
            os.makedirs(dirname)

    # 2.) Remote machine
    ssh = SSHClient(user=remote_user_name, host=remote_host_name)
    for node in tree:
        remote_dir = os.path.join(remote_log_dir, node)
        ssh.mkdir(remote_dir, batch=True)  # batch=True appends this mkdir
        # call to the command batch.

    # Create all remote directories in one step (is much faster)
    ssh.execute_batch()
예제 #4
0
def build_suite():
    """
    Build the ecflow suite.
    """
    logger.info('Building suite.')

    # ========================
    # GENERAL SUITE PROPERTIES
    # ========================

    defs = ecflow.Defs()
    suite = defs.add_suite(mysuite)

    # Set suite level variables
    set_vars(suite)

    # Set default status
    suite.add_defstatus(ecflow.DState.suspended)

    # Define thread limits
    suite.add_limit("mpmd_threads", mpmd_threads_number)
    suite.add_limit("serial_threads", serial_threads_number)

    # ========================
    # ADD CRON JOB
    # ========================

    start = ecflow.TimeSlot(0, 0)
    finish = ecflow.TimeSlot(23, 59)
    incr = ecflow.TimeSlot(0, 1)
    time_series = ecflow.TimeSeries(start, finish, incr, False)
    cron = ecflow.Cron()
    cron.set_time_series(time_series)
    fam_submit = suite.add_family('queue_submitter')
    submit = fam_submit.add_task('submit')
    submit.add_cron(cron)
    fam_submit.add_variable('ECF_JOB_CMD', ecgate_job_cmd)

    # ========================
    # DEFINE TOPLEVEL FAMILIES
    # ========================

    fam_dearch = suite.add_family('dearchiving')
    fam_proc = suite.add_family('processing')
    fam_make = suite.add_family('make_tarfile')
    fam_arch = suite.add_family('archiving')

    # Activate thread limits
    fam_dearch.add_inlimit('serial_threads')
    fam_proc.add_inlimit('mpmd_threads')
    fam_make.add_inlimit('serial_threads')
    fam_arch.add_inlimit('serial_threads')

    # Define job commands
    fam_dearch.add_variable('ECF_JOB_CMD', serial_job_cmd)
    fam_proc.add_variable('ECF_JOB_CMD', mpmd_job_cmd)
    fam_make.add_variable('ECF_JOB_CMD', serial_job_cmd)
    fam_arch.add_variable('ECF_JOB_CMD', serial_job_cmd)

    # ===============================
    # DEFINE DYNAMIC FAMILIES & TASKS
    # ===============================

    for mm in rrule(MONTHLY, dtstart=args.sdate, until=args.edate):

        yearstr = mm.strftime("%Y")
        monthstr = mm.strftime("%m")
        act_date = datetime.date(int(yearstr), int(monthstr), 1)
        first_day = "01"
        last_day = calendar.monthrange(int(yearstr), int(monthstr))[1]
        yyyymm = yearstr + monthstr
        start_date = yearstr + monthstr + first_day
        end_date = yearstr + monthstr + str(last_day)

        if args.ignore_months:
            if int(monthstr) in args.ignore_months:
                continue

        try:
            # dearchiving family
            fam_year_dearch = add_fam(fam_dearch, yearstr)

            # processing family
            fam_year_proc = add_fam(fam_proc, yearstr)
            add_trigger(fam_year_proc, fam_year_dearch)

            # make yearly tarfile family
            fam_year_make = add_fam(fam_make, yearstr)
            fam_year_make.add_variable("YEAR", yearstr)
            add_make_tarfile_task(fam_year_make)
            add_trigger(fam_year_make, fam_year_proc)

        except RuntimeError:
            pass

        # dearchiving family
        fam_month_dearch = add_fam(fam_year_dearch, monthstr)
        fam_month_dearch.add_variable("YYYYMM", yyyymm)
        fam_month_dearch.add_variable("START_DATE", start_date)
        fam_month_dearch.add_variable("END_DATE", end_date)
        fam_month_dearch.add_variable("NDAYS", last_day)
        add_dearchiving_task(fam_month_dearch)

        # processing family
        fam_month_proc = add_fam(fam_year_proc, monthstr)
        fam_month_proc.add_variable("YYYYMM", yyyymm)
        fam_month_proc.add_variable("SY", yearstr)
        fam_month_proc.add_variable("EY", yearstr)
        fam_month_proc.add_variable("SM", monthstr)
        fam_month_proc.add_variable("EM", monthstr)
        fam_month_proc.add_variable("SD", first_day)
        fam_month_proc.add_variable("ED", last_day)
        add_mpmd_tasks(fam_month_proc)

    # create 1 tarball containing time series tarfiles
    add_archiving_task(fam_arch)
    add_trigger(fam_arch, fam_make)

    # ============================
    # CREATE SUITE DEFINITION FILE
    # ============================

    # Check job creation
    defs.check_job_creation()

    # Save suite to file
    suite_def_file = mysuite + '.def'
    logger.info('Saving suite definition to file: {0}'.format(suite_def_file))
    defs.save_as_defs(suite_def_file)

    # ======================
    # CREATE LOG DIRECTORIES
    # ======================

    logger.info('Creating log directories on both the local and '
                'the remote machine.')

    # Create a tree of all families in the suite
    # (i.e. families, subfamilies, subsubfamilies etc)
    tree = familytree(suite)

    # Create corresponding log-directory tree:
    # 1.) Local machine
    for node in tree:
        dirname = os.path.join(ecf_out_dir, node)
        if not os.path.isdir(dirname):
            os.makedirs(dirname)

    # 2.) Remote machine
    ssh = SSHClient(user=remote_user_name, host=remote_host_name)
    for node in tree:
        remote_dir = os.path.join(remote_log_dir, node)
        ssh.mkdir(remote_dir, batch=True)  # batch=True appends this mkdir
        # call to the command batch.

    # Create all remote directories in one step (is much faster)
    ssh.execute_batch()