def daily_cron(step): if step > 60 or step < 0: return None time_series = ecflow.TimeSeries(ecflow.TimeSlot(0, 0), ecflow.TimeSlot(23, 60 - step), ecflow.TimeSlot(0, step), False) cron = ecflow.Cron() cron.set_time_series(time_series) return cron
shutil.copyfile("%s/init.ecf" % run_path, "%s/init.ecf" % suite_folder) defs = ecflow.Defs() suite = defs.add_suite(suite_name) # make the suite a real clock rather than hybrid # so that date information follows the system clock # rather than being fixed at the original submission date hybrid = False clock = ecflow.Clock(hybrid) suite.add_clock(clock) if runtime != "": cron = ecflow.Cron() h, m = runtime.split(":") time_series = ecflow.TimeSeries(ecflow.TimeSlot(int(h), int(m))) cron.set_time_series(time_series) suite.add_cron(cron) #suite.add_time(runtime) #suite.add_date(0,0,0) suite.add_variable("ECF_INCLUDE", working_folder) suite.add_variable("ECF_HOME", working_folder) suite.add_variable("TOOLS_HOME", "%s/.." % run_path) suite.add_variable("OUTPUT_FOLDER", output_folder) suite.add_variable("INPUT_FOLDER", input_folder) suite.add_variable("VERBOSE_FLAG", verbose_flag) suite.add_variable("DIFFERENCE_THRESHOLD_FACTOR", difference_threshold_factor) #begin of modification 06.08.2012 cgjd
def build_suite(): """ Build the ecflow suite. """ logger.info('Building suite.') # ======================== # GENERAL SUITE PROPERTIES # ======================== defs = ecflow.Defs() suite = defs.add_suite(mysuite) # Set suite level variables set_vars(suite) # Set default status suite.add_defstatus(ecflow.DState.suspended) # Define thread limits suite.add_limit("mpmd_threads", mpmd_threads_number) suite.add_limit("serial_threads", serial_threads_number) # ======================== # ADD CRON JOB # ======================== start = ecflow.TimeSlot(0, 0) finish = ecflow.TimeSlot(23, 59) incr = ecflow.TimeSlot(0, 1) time_series = ecflow.TimeSeries(start, finish, incr, False) cron = ecflow.Cron() cron.set_time_series(time_series) fam_submit = suite.add_family('queue_submitter') submit = fam_submit.add_task('submit') submit.add_cron(cron) fam_submit.add_variable('ECF_JOB_CMD', ecgate_job_cmd) # ======================== # DEFINE TOPLEVEL FAMILIES # ======================== fam_proc = suite.add_family('proc') fam_dearch = suite.add_family('dearchiving') start = ecflow.TimeSlot(0, 0) finish = ecflow.TimeSlot(23, 55) incr = ecflow.TimeSlot(0, 5) time_series = ecflow.TimeSeries(start, finish, incr, False) cron = ecflow.Cron() cron.set_time_series(time_series) fam_arch = suite.add_family('archiving') storedata = fam_arch.add_task('storedata') storedata.add_cron(cron) fam_arch.add_variable('ECF_JOB_CMD', serial_job_cmd) # Activate thread limits fam_proc.add_inlimit('mpmd_threads') fam_dearch.add_inlimit('serial_threads') # Define job commands fam_proc.add_variable('ECF_JOB_CMD', mpmd_job_cmd) fam_dearch.add_variable('ECF_JOB_CMD', serial_job_cmd) # =============================== # DEFINE DYNAMIC FAMILIES & TASKS # =============================== dearch_interval = interval_value io_hiding_offset = io_offset_value dearch_counter = 0 tar_counter = 0 fam_tar = None fam_chunk = None mpmd_families = list() tarfiles_within_current_interval = list() # Make sure dearchiving interval is at least one # greater than IO hiding offset. if not (dearch_interval - io_hiding_offset) >= 1: raise ValueError('Dearchiving interval must be at least one greater ' 'than IO hiding offset.') # connect to database and get_sats list db = AvhrrGacDatabase(dbfile=gacdb_sqlite_file) if args.satellites: satellites = args.satellites else: if args.ignoresats: satellites = db.get_sats(start_date=args.sdate, end_date=args.edate, ignore_sats=args.ignoresats) else: satellites = db.get_sats(start_date=args.sdate, end_date=args.edate) # -- loop over satellites for sat in satellites: # create sat family fam_sat = fam_proc.add_family(sat) # add satellite variable fam_sat.add_variable("SATELLITE", sat) # get years list years = db.get_years(sat) # -- loop over years for satellite for year in years: if args.userdatelimit: if year < args.sdate.year or year > args.edate.year: continue # create family year for satellite fam_year = fam_sat.add_family(str(year)) # start and end date for year & satellite if year == args.sdate.year: sd = datetime.date(year, args.sdate.month, args.sdate.day) else: sd = datetime.date(year, 1, 1) if year == args.edate.year: ed = datetime.date(year, args.edate.month, args.edate.day) else: ed = datetime.date(year, 12, 31) else: # create family year for satellite fam_year = fam_sat.add_family(str(year)) # start and end date for year & satellite sd = datetime.date(year, 1, 1) ed = datetime.date(year, 12, 31) # get tarfile list tarfiles = db.get_tarfiles(start_date=sd, end_date=ed, sats=[sat], include_blacklisted=False, strict=False) # -- loop over tarfiles for year & satellite for tarfil in tarfiles: logger.info("Working on: {0}".format(tarfil)) # split tarfilename "NOAA7_1985_01.tar" tarbase = os.path.basename(tarfil) tarmonth = ((tarbase.split("."))[0].split("_"))[2] taryear = ((tarbase.split("."))[0].split("_"))[1] # calendar.monthrange(year, month) # Returns weekday of first day of the month and number of days # in month, for the specified year and month. mr = monthrange(int(taryear), int(tarmonth)) first_tar_date = datetime.date(int(taryear), int(tarmonth), 1) last_tar_date = datetime.date(int(taryear), int(tarmonth), mr[1]) date_str = first_tar_date.strftime("%Y%m%d") + \ '_' + last_tar_date.strftime("%Y%m%d") if tar_counter % dearch_interval == 0: if fam_chunk: # Add all collected tarfiles to the # current dearchiving family fam_chunk.add_variable('TARFILES', ' '.join(tarfiles_within_current_interval)) # Reset list of tarfiles within current interval tarfiles_within_current_interval = [] # Create new family for dearchiving the next chunk of data. fam_chunk = fam_dearch.add_family('chunk{0}'. format(dearch_counter)) add_dearchiving_tasks(fam_chunk) fam_chunk.add_variable("ECF_TRIES", 2) dearch_counter += 1 # Make it wait for the current MPMD family minus a possible # offset in order to hide IO time behind computation time. if fam_tar: add_trigger(fam_chunk, mpmd_families[tar_counter - io_hiding_offset - 1]) else: # There is no trigger for the first IO chunk. pass # Create one MPMD family for each tar_range_archive fam_tar = fam_year.add_family('{0}'.format(tarmonth)) tar_counter += 1 # add start and end day of fam_tar add_family_variables(fam_tar, first_tar_date.strftime("%Y%m%d"), last_tar_date.strftime("%Y%m%d")) # Make it wait for the current dearchiving family. add_trigger(fam_tar, fam_chunk) # Add MPMD tasks to each tarfile family add_mpmd_tasks(fam_tar) # Save the created family for later use mpmd_families.append(fam_tar) tarfiles_within_current_interval.append(tarfil) # -- end of loop over satellites # Add last chunk of collected tarfiles to the last dearchiving family fam_chunk.add_variable('TARFILES', ' '.join(tarfiles_within_current_interval)) # close database connection db.close() # ============================ # CREATE SUITE DEFINITION FILE # ============================ # Check job creation defs.check_job_creation() # Save suite to file suite_def_file = mysuite + '.def' logger.info('Saving suite definition to file: {0}'.format(suite_def_file)) defs.save_as_defs(suite_def_file) # ====================== # CREATE LOG DIRECTORIES # ====================== logger.info('Creating log directories on both the local and ' 'the remote machine.') # Create a tree of all families in the suite # (i.e. families, subfamilies, subsubfamilies etc) tree = familytree(suite) # Create corresponding log-directory tree: # 1.) Local machine for node in tree: dirname = os.path.join(ecf_out_dir, node) if not os.path.isdir(dirname): os.makedirs(dirname) # 2.) Remote machine ssh = SSHClient(user=remote_user_name, host=remote_host_name) for node in tree: remote_dir = os.path.join(remote_log_dir, node) ssh.mkdir(remote_dir, batch=True) # batch=True appends this mkdir # call to the command batch. # Create all remote directories in one step (is much faster) ssh.execute_batch()
def build_suite(): """ Build the ecflow suite. """ logger.info('Building suite.') # ======================== # GENERAL SUITE PROPERTIES # ======================== defs = ecflow.Defs() suite = defs.add_suite(mysuite) # Set suite level variables set_vars(suite) # Set default status suite.add_defstatus(ecflow.DState.suspended) # Define thread limits suite.add_limit("mpmd_threads", mpmd_threads_number) suite.add_limit("serial_threads", serial_threads_number) # ======================== # ADD CRON JOB # ======================== start = ecflow.TimeSlot(0, 0) finish = ecflow.TimeSlot(23, 59) incr = ecflow.TimeSlot(0, 1) time_series = ecflow.TimeSeries(start, finish, incr, False) cron = ecflow.Cron() cron.set_time_series(time_series) fam_submit = suite.add_family('queue_submitter') submit = fam_submit.add_task('submit') submit.add_cron(cron) fam_submit.add_variable('ECF_JOB_CMD', ecgate_job_cmd) # ======================== # DEFINE TOPLEVEL FAMILIES # ======================== fam_dearch = suite.add_family('dearchiving') fam_proc = suite.add_family('processing') fam_make = suite.add_family('make_tarfile') fam_arch = suite.add_family('archiving') # Activate thread limits fam_dearch.add_inlimit('serial_threads') fam_proc.add_inlimit('mpmd_threads') fam_make.add_inlimit('serial_threads') fam_arch.add_inlimit('serial_threads') # Define job commands fam_dearch.add_variable('ECF_JOB_CMD', serial_job_cmd) fam_proc.add_variable('ECF_JOB_CMD', mpmd_job_cmd) fam_make.add_variable('ECF_JOB_CMD', serial_job_cmd) fam_arch.add_variable('ECF_JOB_CMD', serial_job_cmd) # =============================== # DEFINE DYNAMIC FAMILIES & TASKS # =============================== for mm in rrule(MONTHLY, dtstart=args.sdate, until=args.edate): yearstr = mm.strftime("%Y") monthstr = mm.strftime("%m") act_date = datetime.date(int(yearstr), int(monthstr), 1) first_day = "01" last_day = calendar.monthrange(int(yearstr), int(monthstr))[1] yyyymm = yearstr + monthstr start_date = yearstr + monthstr + first_day end_date = yearstr + monthstr + str(last_day) if args.ignore_months: if int(monthstr) in args.ignore_months: continue try: # dearchiving family fam_year_dearch = add_fam(fam_dearch, yearstr) # processing family fam_year_proc = add_fam(fam_proc, yearstr) add_trigger(fam_year_proc, fam_year_dearch) # make yearly tarfile family fam_year_make = add_fam(fam_make, yearstr) fam_year_make.add_variable("YEAR", yearstr) add_make_tarfile_task(fam_year_make) add_trigger(fam_year_make, fam_year_proc) except RuntimeError: pass # dearchiving family fam_month_dearch = add_fam(fam_year_dearch, monthstr) fam_month_dearch.add_variable("YYYYMM", yyyymm) fam_month_dearch.add_variable("START_DATE", start_date) fam_month_dearch.add_variable("END_DATE", end_date) fam_month_dearch.add_variable("NDAYS", last_day) add_dearchiving_task(fam_month_dearch) # processing family fam_month_proc = add_fam(fam_year_proc, monthstr) fam_month_proc.add_variable("YYYYMM", yyyymm) fam_month_proc.add_variable("SY", yearstr) fam_month_proc.add_variable("EY", yearstr) fam_month_proc.add_variable("SM", monthstr) fam_month_proc.add_variable("EM", monthstr) fam_month_proc.add_variable("SD", first_day) fam_month_proc.add_variable("ED", last_day) add_mpmd_tasks(fam_month_proc) # create 1 tarball containing time series tarfiles add_archiving_task(fam_arch) add_trigger(fam_arch, fam_make) # ============================ # CREATE SUITE DEFINITION FILE # ============================ # Check job creation defs.check_job_creation() # Save suite to file suite_def_file = mysuite + '.def' logger.info('Saving suite definition to file: {0}'.format(suite_def_file)) defs.save_as_defs(suite_def_file) # ====================== # CREATE LOG DIRECTORIES # ====================== logger.info('Creating log directories on both the local and ' 'the remote machine.') # Create a tree of all families in the suite # (i.e. families, subfamilies, subsubfamilies etc) tree = familytree(suite) # Create corresponding log-directory tree: # 1.) Local machine for node in tree: dirname = os.path.join(ecf_out_dir, node) if not os.path.isdir(dirname): os.makedirs(dirname) # 2.) Remote machine ssh = SSHClient(user=remote_user_name, host=remote_host_name) for node in tree: remote_dir = os.path.join(remote_log_dir, node) ssh.mkdir(remote_dir, batch=True) # batch=True appends this mkdir # call to the command batch. # Create all remote directories in one step (is much faster) ssh.execute_batch()