def merge_update_state(self, MERGE_INFO_CONTENTS): # read MERGE.LOG, check LOG & DONE files. # Return update row list MERGE tables. submit_info_yaml = self.config_prep['submit_info_yaml'] output_dir = self.config_prep['output_dir'] script_dir = submit_info_yaml['SCRIPT_DIR'] n_job_split = submit_info_yaml['N_JOB_SPLIT'] COLNUM_STATE = COLNUM_MERGE_STATE COLNUM_TRAINOPT = COLNUM_TRAIN_MERGE_TRAINOPT COLNUM_NLC = COLNUM_TRAIN_MERGE_NLC COLNUM_NSPEC = COLNUM_TRAIN_MERGE_NSPEC COLNUM_CPU = COLNUM_TRAIN_MERGE_CPU # init outputs of function n_state_change = 0 row_list_merge_new = [] row_list_merge = MERGE_INFO_CONTENTS[TABLE_MERGE] # keynames_for_job_stats returns 3 keynames : # {base}, {base}_sum, {base}_list key_nlc, key_nlc_sum, key_nlc_list = \ self.keynames_for_job_stats('NUM_SNE') key_nspec, key_nspec_sum, key_nspec_list = \ self.keynames_for_job_stats('NUM_SPECTRA') key_cpu, key_cpu_sum, key_cpu_list = \ self.keynames_for_job_stats('CPU_MINUTES') key_list = [key_nlc, key_nspec, key_cpu] nrow_check = 0 for row in row_list_merge: row_list_merge_new.append(row) # default output is same as input nrow_check += 1 irow = nrow_check - 1 # row index trainopt = row[COLNUM_TRAINOPT] # e.g., TRAINOPT001 search_wildcard = (f"{trainopt}*") # strip off row info STATE = row[COLNUM_STATE] # check if DONE or FAIL ; i.e., if Finished Finished = (STATE == SUBMIT_STATE_DONE) or \ (STATE == SUBMIT_STATE_FAIL) if not Finished: NEW_STATE = STATE # get list of LOG, DONE, and YAML files log_list, done_list, yaml_list = \ util.get_file_lists_wildcard(script_dir,search_wildcard) # careful to sum only the files that are NOT None NLOG = sum(x is not None for x in log_list) NDONE = sum(x is not None for x in done_list) NYAML = sum(x is not None for x in yaml_list) if NLOG > 0: NEW_STATE = SUBMIT_STATE_RUN if NDONE == n_job_split: NEW_STATE = SUBMIT_STATE_DONE job_stats = self.get_job_stats(script_dir, log_list, yaml_list, key_list) row[COLNUM_STATE] = NEW_STATE row[COLNUM_NLC] = job_stats[key_nlc_sum] row[COLNUM_NSPEC] = job_stats[key_nspec_sum] row[COLNUM_CPU] = job_stats[key_cpu_sum] row_list_merge_new[irow] = row # update new row n_state_change += 1 # first return arg (row_split) is null since there is # no need for a SPLIT table return [], row_list_merge_new, n_state_change
def merge_update_state(self, MERGE_INFO_CONTENTS): # read MERGE.LOG, check LOG & DONE files. # Return update row list MERGE tables. submit_info_yaml = self.config_prep['submit_info_yaml'] output_dir = self.config_prep['output_dir'] script_dir = submit_info_yaml['SCRIPT_DIR'] n_job_split = submit_info_yaml['N_JOB_SPLIT'] COLNUM_STATE = COLNUM_MERGE_STATE COLNUM_DIROPT = COLNUM_WFIT_MERGE_DIROPT COLNUM_COVOPT = COLNUM_WFIT_MERGE_COVOPT COLNUM_WFITOPT = COLNUM_WFIT_MERGE_WFITOPT COLNUM_NDOF = COLNUM_WFIT_MERGE_NDOF COLNUM_CPU = COLNUM_WFIT_MERGE_CPU NROW_DUMP = 0 key_ndof, key_ndof_sum, key_ndof_list = \ self.keynames_for_job_stats('Ndof') key_cpu, key_cpu_sum, key_cpu_list = \ self.keynames_for_job_stats('CPU_MINUTES') key_list = [key_ndof, key_cpu] row_list_merge = MERGE_INFO_CONTENTS[TABLE_MERGE] # init outputs of function n_state_change = 0 row_list_merge_new = [] nrow_check = 0 for row in row_list_merge: row_list_merge_new.append(row) # default output is same as input nrow_check += 1 irow = nrow_check - 1 # row index # strip off row info STATE = row[COLNUM_STATE] prefix = self.wfit_prefix(row) search_wildcard = f"{prefix}*" # check if DONE or FAIL ; i.e., if Finished Finished = (STATE == SUBMIT_STATE_DONE) or \ (STATE == SUBMIT_STATE_FAIL) if not Finished: NEW_STATE = STATE # get list of LOG, DONE, and YAML files log_list, done_list, yaml_list = \ util.get_file_lists_wildcard(script_dir,search_wildcard) # careful to sum only the files that are NOT None NLOG = sum(x is not None for x in log_list) NDONE = sum(x is not None for x in done_list) NYAML = sum(x is not None for x in yaml_list) if NLOG > 0: NEW_STATE = SUBMIT_STATE_RUN if NDONE == n_job_split: NEW_STATE = SUBMIT_STATE_DONE wfit_stats = self.get_job_stats(script_dir, log_list, yaml_list, key_list) # check for failures in snlc_fit jobs. nfail = wfit_stats['nfail'] if nfail > 0: NEW_STATE = SUBMIT_STATE_FAIL row[COLNUM_STATE] = NEW_STATE row[COLNUM_NDOF] = wfit_stats[key_ndof_sum] row[COLNUM_CPU] = wfit_stats[key_cpu_sum] row_list_merge_new[irow] = row # update new row n_state_change += 1 # assume nevt changes # - - - - - - - # The first return arg (row_split) is null since there is # no need for a SPLIT table row_list_dict = { 'row_split_list': [], 'row_merge_list': row_list_merge_new, 'row_extra_list': [] } return row_list_dict, n_state_change
def merge_update_state(self, MERGE_INFO_CONTENTS): # read MERGE.LOG, check LOG & DONE files. # Return update row list MERGE tables. submit_info_yaml = self.config_prep['submit_info_yaml'] output_dir = self.config_prep['output_dir'] script_dir = submit_info_yaml['SCRIPT_DIR'] n_job_split = submit_info_yaml['N_JOB_SPLIT'] COLNUM_STATE = COLNUM_MERGE_STATE COLNUM_TRAINOPT = COLNUM_TRAIN_MERGE_TRAINOPT COLNUM_NEVT = COLNUM_TRAIN_MERGE_NEVT COLNUM_CPU = COLNUM_TRAIN_MERGE_CPU # init outputs of function n_state_change = 0 row_list_merge_new = [] row_list_merge = MERGE_INFO_CONTENTS[TABLE_MERGE] nrow_check = 0 for row in row_list_merge: row_list_merge_new.append(row) # default output is same as input nrow_check += 1 irow = nrow_check - 1 # row index trainopt = row[COLNUM_TRAINOPT] # e.g., TRAINOPT001 search_wildcard = (f"{trainopt}*") # strip off row info STATE = row[COLNUM_STATE] # check if DONE or FAIL ; i.e., if Finished Finished = (STATE == SUBMIT_STATE_DONE) or \ (STATE == SUBMIT_STATE_FAIL) if not Finished: NEW_STATE = STATE # get list of LOG, DONE, and YAML files log_list, done_list, yaml_list = \ util.get_file_lists_wildcard(script_dir,search_wildcard) # careful to sum only the files that are NOT None NLOG = sum(x is not None for x in log_list) NDONE = sum(x is not None for x in done_list) NYAML = sum(x is not None for x in yaml_list) if NLOG > 0: NEW_STATE = SUBMIT_STATE_RUN if NDONE == n_job_split: NEW_STATE = SUBMIT_STATE_DONE # since there is no YAML file to examine, we have a # kluge check on success success, tproc = self.get_train_status(trainopt) if not success: self.check_for_failure(log_list[0], -1, +1) NEW_STATE = SUBMIT_STATE_FAIL row[COLNUM_STATE] = NEW_STATE row[COLNUM_NEVT] = 0 # ??? fill this later row[COLNUM_CPU] = tproc row_list_merge_new[irow] = row # update new row n_state_change += 1 # - - - - - - - # The first return arg (row_split) is null since there is # no need for a SPLIT table return [], row_list_merge_new, n_state_change
def merge_update_state(self, MERGE_INFO_CONTENTS): # Called from base to # read MERGE.LOG, check LOG & DONE files. # Return update row list MERGE tables. # For lsst alerts, also update COMPRESS table. submit_info_yaml = self.config_prep['submit_info_yaml'] output_dir = self.config_prep['output_dir'] script_dir = submit_info_yaml['SCRIPT_DIR'] n_job_split = submit_info_yaml['N_JOB_SPLIT'] output_format = submit_info_yaml['OUTPUT_FORMAT'] out_lsst_alert = (output_format == OUTPUT_FORMAT_LSST_ALERTS) COLNUM_STATE = COLNUM_MERGE_STATE COLNUM_DATAUNIT = COLNUM_MKDATA_MERGE_DATAUNIT COLNUM_NEVT = COLNUM_MKDATA_MERGE_NEVT COLNUM_NEVT_SPECZ = COLNUM_MKDATA_MERGE_NEVT_SPECZ COLNUM_NEVT_PHOTOZ = COLNUM_MKDATA_MERGE_NEVT_PHOTOZ COLNUM_NOBS_ALERT = COLNUM_MKDATA_MERGE_NOBS_ALERT COLNUM_RATE = COLNUM_MKDATA_MERGE_RATE if out_lsst_alert: COLNUM_RATE += 1 # init outputs of function n_state_change = 0 row_list_merge_new = [] row_list_merge = MERGE_INFO_CONTENTS[TABLE_MERGE] # keynames_for_job_stats returns 3 keynames : # {base}, {base}_sum, {base}_list key_nall, key_nall_sum, key_nall_list = \ self.keynames_for_job_stats('NEVT_ALL') key_nspecz, key_nspecz_sum, key_nspecz_list = \ self.keynames_for_job_stats('NEVT_HOSTGAL_SPECZ') key_nphotz, key_nphotz_sum, key_nphotz_list = \ self.keynames_for_job_stats('NEVT_HOSTGAL_PHOTOZ') key_tproc, key_tproc_sum, key_tproc_list = \ self.keynames_for_job_stats('WALLTIME') key_list = [key_nall, key_nspecz, key_nphotz, key_tproc] if out_lsst_alert: key_nalert, key_nalert_sum, key_nalert_list = \ self.keynames_for_job_stats('NOBS_ALERT') key_list += [key_nalert] nrow_check = 0 for row in row_list_merge: row_list_merge_new.append(row) # default output is same as input nrow_check += 1 irow = nrow_check - 1 # row index data_unit = row[COLNUM_DATAUNIT] search_wildcard = (f"{data_unit}*") # strip off row info STATE = row[COLNUM_STATE] # check if DONE or FAIL ; i.e., if Finished Finished = (STATE == SUBMIT_STATE_DONE) or \ (STATE == SUBMIT_STATE_FAIL) if not Finished: NEW_STATE = STATE # get list of LOG, DONE, and YAML files log_list, done_list, yaml_list = \ util.get_file_lists_wildcard(script_dir,search_wildcard) # careful to sum only the files that are NOT None NLOG = sum(x is not None for x in log_list) NDONE = sum(x is not None for x in done_list) NYAML = sum(x is not None for x in yaml_list) if NLOG > 0: NEW_STATE = SUBMIT_STATE_RUN if NDONE == n_job_split: NEW_STATE = SUBMIT_STATE_DONE job_stats = self.get_job_stats(script_dir, log_list, yaml_list, key_list) row[COLNUM_STATE] = NEW_STATE row[COLNUM_NEVT] = job_stats[key_nall_sum] row[COLNUM_NEVT_SPECZ] = job_stats[key_nspecz_sum] row[COLNUM_NEVT_PHOTOZ] = job_stats[key_nphotz_sum] if out_lsst_alert: row[COLNUM_NOBS_ALERT] = job_stats[key_nalert_sum] n_tmp = row[COLNUM_NOBS_ALERT] else: n_tmp = row[COLNUM_NEVT] # load N/sec instead of CPU time t_proc = job_stats[key_tproc_sum] rate = 0.0 if t_proc > 0.0: rate = n_tmp / t_proc row[COLNUM_RATE] = float(f"{rate:.1f}") row_list_merge_new[irow] = row # update new row n_state_change += 1 # - - - - - - - - - - - # check for optional extra table row_extra_list = [] if out_lsst_alert: row_extra_list = self.compress_update_state(MERGE_INFO_CONTENTS) # first return arg (row_split) is null since there is # no need for a SPLIT table row_list_dict = { 'row_split_list': [], 'row_merge_list': row_list_merge_new, 'row_extra_list': row_extra_list, 'table_names': [TABLE_SPLIT, TABLE_MERGE, TABLE_COMPRESS] } return row_list_dict, n_state_change