def process(data_type, study_id, center_id, center_ids, dataset_type_id, out_file, output_format, server_url): # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, study_id, out_file)) # get URL of the API server from utils import ConfigData main_cfg = ConfigData('configs/main_config.yaml') api_server_url = main_cfg.get_value('SAMPLEINFO_CLI_URL') if server_url: # print('server_url: {}'.format(api_server_url)) click.echo('server_url: {}'.format(api_server_url)) if check_data_type_value(data_type): api_url, err_msg = identify_api_url(api_server_url, data_type, study_id, center_id, center_ids, dataset_type_id) else: api_url = '' err_msg = 'Unexpected data_type value ({}) was provided. Run --help for the list of expected values.'\ .format(data_type) if len(err_msg) == 0: if len(api_url) > 0: # access api and retrieve the data response = requests.get(api_url) # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, stu) # print(response.status_code) # json_parsed = output_data(response.json(), out_file, output_format) else: print( 'Error: Cannot identify the database call for the given parameters.' ) else: # report an error print('Error: {}'.format(err_msg))
def __init__(self, filepath, conf_source, log_obj, file_type=None, file_delim=None): # setup default parameters if file_type is None: file_type = 1 if file_delim is None: file_delim = ',' #'\t' File.__init__(self, filepath, file_type, file_delim) self.conf_src = ConfigData('', conf_source) self.logger = log_obj self.map = { } # it will hold a dict where key is an aliquot id and value is the relative path to the file # set file properties before loading it self.file_delim = self.conf_src.get_value('file_delim') \ if self.conf_src.get_value('file_delim') else self.file_delim self.header_row_num = self.conf_src.get_value('header_row_num') \ if self.conf_src.get_value('header_row_num') else self.header_row_num # load the file self.get_file_content()
def send_yagmail(emails_to, subject, message, email_from = None, attachment_path = None, smtp_server = None, smtp_server_port = None): root_dir = cm.get_project_root() cnf_path = str(root_dir.joinpath(gc.MAIN_CONFIG_FILE)) m_cfg = ConfigData(cnf_path) if not email_from: email_from = m_cfg.get_value('Email/default_from_email') if not smtp_server: smtp_server = m_cfg.get_value('Email/smtp_server') if not smtp_server_port: smtp_server_port = m_cfg.get_value('Email/smtp_server_port') # receiver = emails_to # '[email protected], [email protected], [email protected]' body = message filename = attachment_path # 'test.png' yag = yagmail.SMTP(email_from, host=smtp_server, smtp_skip_login=True, smtp_ssl=False, soft_email_validation=False, port=smtp_server_port) yag.send( to=emails_to, subject=subject, contents=body, attachments=filename, )
def load_configuration(fl_class, loc_cfg_path): # load global configuration # m_cfg = ConfigData(gc.MAIN_CONFIG_FILE) m_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') m_logger = logging.getLogger(m_logger_name) m_logger.debug('Loading Global config file {} for file: {}'.format( gc.MAIN_CONFIG_FILE, fl_class.filepath)) StudyConfig.config_glb = ConfigData(gc.MAIN_CONFIG_FILE) m_logger.info('Loading Study config file {} for file: {}'.format( loc_cfg_path, fl_class.filepath)) # load local configuration try: StudyConfig.config_loc = ConfigData(loc_cfg_path) except Exception as ex: m_logger.error( 'Error "{}" occurred during loading study config file "{}"\n{}'. format(ex, loc_cfg_path, traceback.format_exc())) # raise return False # load global logging setting StudyConfig.study_logger_name = gc.FILE_LOG_NAME # StudyConfig.config_glb.get_value(gc.STUDY_LOGGER_NAME_CFG_PATH) StudyConfig.study_logging_level = StudyConfig.config_glb.get_value( gc.STUDY_LOGGING_LEVEL_CFG_PATH) return True
def load_project_config_into_main(self, project): # load project specific "project_config" config file cfg_project = ConfigData( gc.CONFIG_FILE_PROJECT.replace('{project}', project)) if cfg_project.loaded: # if cfg_project was loaded, update it with the environment specific settings (from project_location config) cfg_project_updated = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_PROJECT_LOCATION, self.project, cfg_project.get_whole_dictionary()) # update main config with the outcome of the previous updates self.conf_main.update(cfg_project_updated)
def convert_sub_aliq_to_aliquot(sa, assay): aliquot = sa fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY) assay_postfixes = fl_cfg_dict.get_value('assay_sub_aliquot_postfix/' + assay) # get_item_by_key if assay_postfixes is not None: for assay_postfix in assay_postfixes: apf_len = len(assay_postfix) if sa[-apf_len:] == assay_postfix: aliquot = sa[:len(sa) - apf_len] break # exit loop if a match was found return aliquot
def key_exists_in_dict(key, section): fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY) key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE) try: v = fl_cfg_dict.get_item_by_key(section + "/" + key) if v is not None: return True else: return False except Exception: return False
def get_dict_value(key, section): fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY) # replace spaces and slashes with "_" key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE) try: v = fl_cfg_dict.get_item_by_key(section + "/" + key) if v is not None: return v else: return key except Exception: return key
def prepare_form(self, form_name): forms_location = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + self.req_obj.project) # identify paths for json and config (yaml) files fl_path_json_common = forms_location / (form_name + '.json') fl_path_json_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.json') fl_path_json_schema = forms_location / (form_name + '_schema.json') fl_path_cfg_common = forms_location / (form_name + '.yaml') # fl_path_json_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.json') # fl_path_json_assay = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_' + # str(self.req_obj.assay).lower() + '.json') # fl_path_json_schema = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_schema.json') # fl_path_cfg_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.yaml') # check the value assigned to the current request's data_source_forms_assignment # and select assay config file accordingly if self.req_obj.data_source_forms_assignment == 'file': fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.yaml') elif self.req_obj.data_source_forms_assignment == 'db': fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '_db.yaml') else: # data_source_forms_assignment = 'db' will be treated as a default assignment fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.yaml') # check if assay specific json exists; if yes - use it, if not - use common one if cm.file_exists(fl_path_json_assay): fl_path_json = fl_path_json_assay else: fl_path_json = fl_path_json_common # load json and config files self.fl_json = FileJson(fl_path_json, self.req_obj.error, self.req_obj.logger) self.fl_json_schema = FileJson(fl_path_json_schema, self.req_obj.error, self.req_obj.logger) self.fl_cfg_common = ConfigData(fl_path_cfg_common) self.fl_cfg_assay = ConfigData(fl_path_cfg_assay) # self.fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY) # print(self.fl_json.json_data) # loop through all json keys and fill those with associated data self.get_json_keys(self.fl_json.json_data) # print(self.fl_json.json_data) # validate final json file against json schema (if present) self.validate_json(self.fl_json, self.fl_json_schema)
def load_assay_conf(self, assay, project): assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project) cfg_assay = ConfigData(assay_cfg_path) assay_config = cfg_assay.get_value(assay.upper()) if assay_config: self.logger.info( "Configuration for the {} assay was loaded from the assay config file: {}. " .format(assay.upper(), assay_cfg_path)) else: _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \ "Aborting execution.".format(assay.upper(), assay_cfg_path) self.logger.error(_str) self.error.add_error(_str) return assay_config
def update_cfg_dictionary_with_location_details(self, location_path, project, cfg_to_update): cfg_location = ConfigData(location_path.replace('{project}', project)) if cfg_location.loaded: self.logger.info( 'Local config file "{}" was loaded and being used.'.format( cfg_location.cfg_path)) cfg_to_update = cm.update_dictionary_matching_keys( cfg_to_update, cfg_location.get_whole_dictionary()) else: _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ .format(cfg_location.cfg_path) self.logger.error(_str) self.error.add_error(_str) return cfg_to_update
def __init__(self, api_cfg_file, log_obj): self.loaded = False # set logger object self.logger = log_obj self.dataset = None # set error object self.error = ApiError(self) self.logger.info( 'Start processing API call for the following conig file: {}'. format(api_cfg_file)) # load config file for the current api process cfg_file_path = gc.CONFIGS_DIR + api_cfg_file self.api_cfg = ConfigData(cfg_file_path) if not self.api_cfg.loaded: _str = 'Cannot load the config file: "{}"'.format(cfg_file_path) self.logger.error(_str) self.error.add_error(_str) return # get values from the config file self.api_name = self.api_cfg.get_value('API/name') self.api_url = self.api_cfg.get_value('API/url') self.post_fields = self.api_cfg.get_value('API/post_fields') # verify if "eval" is present in any of the post fields and perform the evaluation, if needed if self.post_fields: for pf in self.post_fields: self.post_fields[pf] = cm.eval_cfg_value( self.post_fields[pf], self.logger, self.error) # if no errors were generated during init, set loaded = True if not self.error.errors_exist(): self.loaded = True
class Inquiry(File): def __init__(self, filepath, conf_main=None, file_type=2, sheet_name=''): # load_configuration (main_cfg_obj) # load global and local configureations File.__init__(self, filepath, file_type) self.sheet_name = sheet_name # .strip() if conf_main: self.conf_main = conf_main else: self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) self.error = InquiryError(self) self.log_handler = None self.logger = self.setup_logger(self.wrkdir, self.filename) self.logger.info( 'Start working with Download Inquiry file {}'.format(filepath)) self.inq_match_arr = [] self.columns_arr = [] self.inq_sources = {} self.inq_line_sources = {} # load common for all programs dictionary config self.conf_dict = DictConfigData(gc.CONFIG_FILE_DICTIONARY) if not self.conf_dict.loaded: # disqualify the current inquiry file _str = 'Aborting processing of the inquiry file - the following common dictionary config file cannot ' \ 'be loaded: {}.'.format(gc.CONFIG_FILE_MAIN) self.error.add_error(_str) self.logger.error(_str) return # save inquiry file structure into a dedicated variables self.file_structure_by_col_num = self.conf_dict.get_inqury_file_structure( 'by_col_num') self.file_structure_by_col_name = self.conf_dict.get_inqury_file_structure( 'by_col_name') self.processed_folder = gc.INQUIRY_PROCESSED_DIR # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(self.processed_folder): self.processed_folder = Path(self.wrkdir) / self.processed_folder else: self.processed_folder = Path(self.processed_folder) self.download_request_path = None self.disqualified_items = {} self.disqualified_inquiry_path = '' # will store path to a inquiry file with disqualified sub-aliquots if not self.sheet_name or len(self.sheet_name) == 0: # if sheet name was not passed as a parameter, try to get it from config file self.sheet_name = gc.INQUIRY_EXCEL_WK_SHEET_NAME # 'wk_sheet_name' # print (self.sheet_name) self.logger.info('Data will be loaded from worksheet: "{}"'.format( self.sheet_name)) self.conf_process_entity = None self.db_access = DBAccess(self.logger, self.conf_main, self.error) self.get_file_content() def get_file_content(self): if not self.columns_arr or not self.lines_arr: self.columns_arr = [] self.lines_arr = [] if cm.file_exists(self.filepath): self.logger.debug('Loading file content of "{}"'.format( self.filepath)) with xlrd.open_workbook(self.filepath) as wb: if not self.sheet_name or len(self.sheet_name) == 0: # by default retrieve the first sheet in the excel file sheet = wb.sheet_by_index(0) else: # if sheet name was provided sheets = wb.sheet_names() # get list of all sheets if self.sheet_name in sheets: # if given sheet name in the list of available sheets, load the sheet sheet = wb.sheet_by_name(self.sheet_name) else: # report an error if given sheet name not in the list of available sheets _str = ( 'Given worksheet name "{}" was not found in the file "{}". ' 'Verify that the worksheet name exists in the file.' ).format(self.sheet_name, self.filepath) self.error.add_error(_str) self.logger.error(_str) self.lines_arr = None self.loaded = False return self.lines_arr sheet.cell_value(0, 0) lines = [ ] # will hold content of the inquiry file as an array of arrays (rows) columns = [] for i in range(sheet.ncols): column = [] for j in range(sheet.nrows): if i == 0: lines.append( [] ) # adds an array for each new row in the inquiry file # print(sheet.cell_value(i, j)) cell = sheet.cell(j, i) cell_value = cell.value # take care of number and dates received from Excel and converted to float by default if cell.ctype == 2 and int(cell_value) == cell_value: # the key is integer cell_value = str(int(cell_value)) elif cell.ctype == 2: # the key is float cell_value = str(cell_value) # convert date back to human readable date format # print ('cell_value = {}'.format(cell_value)) if cell.ctype == 3: cell_value_date = xlrd.xldate_as_datetime( cell_value, wb.datemode) cell_value = cell_value_date.strftime( "%Y-%m-%directory") column.append( cell_value ) # adds value to the current column array # lines[j].append('"' + cell_value + '"') # adds value in "csv" format for a current row lines[j].append(cell_value) # self.columns_arr.append(','.join(column)) columns.append( column) # adds a column to a list of columns # populate lines_arr and columns_arr properties self.lines_arr = lines self.columns_arr = columns # populate lineList value as required for the base class self.lineList = [] for ln in lines: self.lineList.append(','.join(str(ln))) wb.unload_sheet(sheet.name) # perform validation of the current inquiry file self.validate_inquiry_file() if self.error.exist(): # report that errors exist self.loaded = False # print(self.error.count) # print(self.error.get_errors_to_str()) _str = 'Errors ({}) were identified during validating of the inquiry. \nError(s): {}'.format( self.error.count, self.error.get_errors_to_str()) else: self.loaded = True else: _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format( self.filepath) self.error.add_error(_str) self.logger.error(_str) self.columns_arr = None self.lines_arr = None self.loaded = False return self.lineList def validate_inquiry_file(self): self.logger.info( 'Start validating the current inquiry file "{}".'.format( self.filepath)) row_count = 1 failed_cnt = 0 valid_aliquot_flag = self.conf_main.get_value( 'Validate/aliquot_id_vs_manifest') valid_inquiry_values_flag = self.conf_main.get_value( 'Validate/inquiry_values_vs_dictionary') inquiry_min_number_columns = self.conf_main.get_value( 'Validate/inquiry_min_number_columns') inquiry_validate_number_columns = self.conf_main.get_value( 'Validate/inquiry_validate_number_columns') if not inquiry_min_number_columns or not isinstance( inquiry_min_number_columns, int): inquiry_min_number_columns = 6 # set a default value if it is not provided in the config file if not inquiry_validate_number_columns or not isinstance( inquiry_validate_number_columns, int): inquiry_validate_number_columns = 6 # set a default value if it is not provided in the config file for row in self.lines_arr: if row_count == self.header_row_num: # 1 # skip the first column as it is a header row_count += 1 continue sub_al = 'ND' # set blank value as default assay = '' # set blank value as default valid_aliquot_performed = False skip_final_check = False # check if inquiry file contain min number of columns if len(row) < inquiry_min_number_columns: # disqualify the current inquiry file _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \ .format(len(row), inquiry_min_number_columns) self.error.add_error(_str) self.logger.error(_str) return # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # get sub-aliquot value before looping through all fields, so it can be used for reporting errors # also get program_code assigned to the row program_code = self.get_inquiry_value_by_field_name( 'program_code', row) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', row, False) # validate program_code value if conf_dict.key_exists_in_dict( str(program_code).lower(), 'program_code'): # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\ .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code')) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) else: _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \ .format(program_code, row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True if not skip_final_check: # go through fields and validate the provided values for i in range(len(row)): if i + 1 > inquiry_validate_number_columns: # if number of columns in the inquiry file > expected maximum, exit the loop break col_category = conf_dict.get_dict_value( str(i + 1), 'inquiry_file_structure') if col_category in ('program_code', 'sub-aliquot'): # no checking is needed for the listed field, proceed further continue elif col_category == 'db_center_id': # get center id value and validate it db_center_id = row[i] # validate center_code or center_id value self.logger.info( 'Start validation of center value "{}" provided for the current row' .format(db_center_id)) db = DBAccess(self.logger, self.conf_main, self.error) # create DBAccess object db.open_connection() # test center value assuming center code was provided dataset = db.validate_center_code( db_center_id, program_code, 'code', 'code') _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_code') if center_id_out1: # center id was returned, meaning center was validated fine db_center_id = center_id_out1 else: # if center code was not validated at first attempt, validate it assuming the center id was given dataset = db.validate_center_code( db_center_id, program_code, 'id', 'code') _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_id') if center_id_out2: # center id was validated at the 2nd attempt, ignore the 1st validation attempt db_center_id = center_id_out2 else: # center validation attempts failed, report both failures _str = 'Provided center value cannot be interpreted neither as code nor id; ' \ 'here are both validation outcomes: ' + \ ' | '.join([_str_err_out1, _str_err_out2]) self.logger.warning(_str) self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True break # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value if valid_aliquot_flag: # aliquot id validation is required valid_aliquot_performed = True # flag that aliquot validation was done if isinstance(db_center_id, int): # db_center_id.isnumeric(): # since center is numeric, proceed here # get aliquot id based on the verified earlier assay value and given sub_aliquot id aliquot = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) valid_status, valid_desc = self.db_access.validate_aliquot_id( aliquot, db_center_id) if valid_status != 'OK': # disqualify an inquiry file row, if returned status is not OK _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \ 'of the database. DB response => Status: "{}"; Description: "{}".'\ .format(aliquot, row_count, valid_status, valid_desc) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break else: # report unexpected center id value _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \ 'critical error because this value is required (based on the configuration setting ' \ '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \ .format(db_center_id, row_count, i + 1, sub_al) self.logger.warning(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True # break else: self.logger.info( 'Validating of the provided aliquot_id "{}" is not required based on the ' 'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".' .format(sub_al, valid_aliquot_flag)) else: if col_category == 'assay': assay = row[i].strip().lower( ) # save assay value to a dedicated variable if valid_inquiry_values_flag: # if validation of the inquiry values vs dictionary is required validate_values = [] validate_categories = [] if col_category == 'bulk_location': # get inquiry_file_structure_bulk_location value bulk_value_delim = conf_dict.get_dict_value( 'inquiry_file_structure_bulk_location_delim', '') validate_values = str( row[i]).split(bulk_value_delim) validate_categories = conf_dict.get_dict_object( 'inquiry_file_structure_bulk_location', '') else: validate_values.append(str(row[i]).lower()) validate_categories.append(col_category) for vv, vc in zip(validate_values, validate_categories): if not conf_dict.key_exists_in_dict( vv.lower(), vc): if col_category == 'bulk_location': _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \ 'the "bulk_location" value (line #{}, column #{})' \ .format(vv, vc, row_count, i + 1) else: _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\ .format(vv, vc, row_count, i+1) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break if skip_final_check: break # check that if aliquot validation is required it was actually performed if not skip_final_check: if valid_aliquot_flag and not valid_aliquot_performed: _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \ '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \ 'was not provided). ' \ .format(row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 row_count += 1 self.logger.info('Finish validating the inquiry file with{}.'.format( ' no errors' if failed_cnt == 0 else ' errors; {} records were disqualified - see earlier log entries for details' .format(failed_cnt))) def check_validation_dataset_outcome(self, dataset, validation_id_column, validation_id_name): _str_err = '' validation_id_out = None if dataset: for row in dataset: if 'status' in row: status = row['status'] if 'description' in row: description = row['description'] if validation_id_column in row: # center_id validation_id = row[validation_id_column] break # read only first row of the dataset if status == 'OK': # validation was successful validation_id_out = validation_id elif status == 'Failed': # validation has failed _str_err = 'Validation of the provided {} value vs DB has Failed, description: {}'\ .format(validation_id_name, description) else: # unexpected status value was returned _str_err = 'Validation of the provided {} value vs DB returned unexpected status {}'\ .format(validation_id_name, status) else: _str_err = 'Unexpected error was reported during validating {} in the DB. ' \ 'Check earlier entries in the log file.'\ .format(validation_id_name) return _str_err, validation_id_out def setup_logger(self, wrkdir, filename): # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) log_folder_name = gc.INQUIRY_LOG_DIR # gc.LOG_FOLDER_NAME # m_logger_name = gc.MAIN_LOG_NAME # m_logger = logging.getLogger(m_logger_name) logger_name = gc.INQUIRY_LOG_NAME logging_level = self.conf_main.get_value('Logging/inquiry_log_level') # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): log_folder_path = Path(wrkdir) / log_folder_name else: log_folder_path = Path(log_folder_name) lg = setup_logger_common( logger_name, logging_level, log_folder_path, # Path(wrkdir) / log_folder_name, str(filename) + '_' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log') self.log_handler = lg['handler'] return lg['logger'] # function will combine the datasource_id for the current inquiry line # it is possible that different lines will have the same datasource_id and thus can share the datasource def get_inquiry_line_datasource_id(self, inq_line): datasource_id = '' for col in self.file_structure_by_col_name: if col in ['program_code', 'assay', 'source_id']: datasource_id += '|' + self.get_inquiry_value_by_field_name( col, inq_line) elif 'source_' in col: datasource_id += '|' + self.get_inquiry_value_by_field_name( col, inq_line, False) return datasource_id def get_inquiry_value_by_field_name(self, field_name, inq_line, validate_by_dictionary=None): if validate_by_dictionary is None: validate_by_dictionary = True # set default value to True if field_name in self.file_structure_by_col_name: col_num = self.file_structure_by_col_name[field_name] value = inq_line[col_num - 1].strip() else: value = '' # validate the provided program code through the dictionary if validate_by_dictionary: value = self.conf_dict.get_dict_value( str(value).lower(), field_name) return value def process_inquiry_sources(self): cur_row = 0 for inq_line in self.lines_arr: if cur_row == self.header_row_num - 1: # skip the header row cur_row += 1 continue # get program code assigned to the current row program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # get assay assigned to the current row assay = self.get_inquiry_value_by_field_name('assay', inq_line) # get source id assigned to the current row source_id = self.get_inquiry_value_by_field_name( 'source_id', inq_line) # get source config file # 2 values are saved in tuple: program name specific path and default one. # if program name specific path does not exist, the default will be used cfg_source_path = ( # configuration path for the current program by name gc.CONFIG_FILE_SOURCE_PATH\ .replace('{program}', program_code)\ .replace('{assay}', assay)\ .replace('{source_id}', source_id), # configuration path for the default program (used if no program specific path is present) gc.CONFIG_FILE_SOURCE_PATH \ .replace('{program}', 'default') \ .replace('{assay}', assay) \ .replace('{source_id}', source_id) ) # get the source location config file path cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace( '{source_id}', source_id) # attempt to load configuration for the program specific path cfg_source = ConfigData(Path(cfg_source_path[0])) if not cfg_source.loaded: # if config was not loaded from the program specific path, load the default one cfg_source = ConfigData(Path(cfg_source_path[1])) if cfg_source.loaded: # proceed here if the source config was loaded # load source location config with location specific settings for the current source cfg_source_location = ConfigData( Path(cfg_source_location_path)) if cfg_source_location.loaded: # if the source location config was loaded, update cfg_source config with the source location config cfg_source.update( cfg_source_location.get_whole_dictionary()) # get unique id of the datasource and check if the same id was used already, reuse that in such case inq_line_datasource_id = self.get_inquiry_line_datasource_id( inq_line) self.logger.info( 'Current inquiry row #{} was identified with the following data source id: {}' .format(cur_row, inq_line_datasource_id)) # assign source id (inq_line_datasource_id) to the current inquiry line self.inq_line_sources[cur_row] = inq_line_datasource_id if inq_line_datasource_id in self.inq_sources: # reuse existing datasource self.logger.info( 'Identified data source id for the current inquiry row #{} was identified as ' 'earlier retrieved one (for this or another row) and will be re-used for ' 'the current row.'.format(cur_row)) else: # create a new datasource object inq_line_datasource = DataSource(self, cfg_source, inq_line, inq_line_datasource_id) self.inq_sources[ inq_line_datasource_id] = inq_line_datasource else: sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \ 'None of the expected to exist files is accessible: {}'\ .format(cur_row, sub_al, ' | '.join(cfg_source_path)) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, cur_row ) # TODO: verify if inq_line should be used instead of curr_row cur_row += 1 pass def process_inquiry(self): self.process_inquiry_sources() self.match_inquiry_items_to_sources() self.create_download_request_file() self.create_inquiry_file_for_disqualified_entries() # check for errors and put final log entry for the inquiry. if self.error.exist(): _str = 'Processing of the current inquiry was finished with the following errors: {}\n'.format( self.error.get_errors_to_str()) self.logger.error(_str) else: _str = 'Processing of the current inquiry was finished successfully.\n' self.logger.info(_str) def match_inquiry_items_to_sources(self): cur_row = -1 for inq_line in self.lines_arr: cur_row += 1 # increase row counter if cur_row == self.header_row_num - 1: continue # program_code = str(inq_line[0]) # get program code that must be a first column program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace( '{program}', program_code) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) # print (inq_study_path) bulk_location = self.get_inquiry_value_by_field_name( 'bulk_location', inq_line, False) assay = self.get_inquiry_value_by_field_name('assay', inq_line) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) # inq_study_path = '/'.join([program_code, bulk_location, assay]) inq_study_path = self.conf_main.get_value( 'Destination/study_path_template') inq_study_path = inq_study_path.replace('{program_code}', program_code) inq_study_path = inq_study_path.replace('{bulk_location}', bulk_location) inq_study_path = inq_study_path.replace('{assay}', assay) # check if current sub-aliquot is not part of disqualified items array if self.disqualified_items and sub_al in self.disqualified_items.keys( ): # if sub-aliquot was disqualifed already, skip this line continue # identify aliquot for the given sub-aliquot al = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) # identify aliquot for the current inquiry line match = False # get reference to the Datasource object assigned to the current row if cur_row in self.inq_line_sources: cur_source = self.inq_sources[self.inq_line_sources[cur_row]] else: # if the data source was not assigned to the current row, skip the row using this datasource cur_source = None continue # check if any source types were disqualified during loading the datasource if cur_source.disqualified_data_sources: # if at least one source of the datasource was disqualified, skip the row using this datasource # and disqualify the current sub-aliquot as well self.disqualify_inquiry_item( sub_al, 'Datasource associated with this aliquot_id was marked as disqualified.', inq_line) continue # get a copy of the source type ids of the current datasource; # it will track number of items found for each source type cur_source_types = copy.deepcopy(cur_source.source_types) # loop through items of the source for src_item in cur_source.source_content_arr: match_out = False # attempt match by the sub-aliquot match_out, match_details = \ self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if sub-aliquot match was not success, attempt to match by the aliquot elif src_item['aliquot_match']: match_out, match_details = \ self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if a match was found using one of the above methods, record the item to inq_match_arr if match_out: # since a match was found, verify that the source path is accessible (except for web locations) web_loc = src_item['web_location'] # real_path = os.path.realpath(src_item['path']) # real path of the current item if web_loc or not web_loc and os.path.exists( src_item['path']): item_details = { 'sub-aliquot': sub_al, 'study': inq_study_path, # 'source': src_item, 'source_item_name': src_item['name'], 'target_subfolder': src_item['target_subfolder'], 'real_path': src_item['path'], 'target_copied_item_name': src_item['target_copied_item_name'], 'match_details': match_details, 'source_type_id': src_item['source_type_id'], 'obj_type': src_item['obj_type'], 'source_name_generic': cur_source.source_name_generic } self.inq_match_arr.append(item_details) # record the source type id of an item to track quantity of found matches for each source type cur_source_types[ src_item['source_type_id']]['items_count'] += 1 else: self.disqualify_inquiry_item( sub_al, 'A match was found, but the identified source path is not accessible. Match details: {}. ' 'Source path: "{}". Real source path: "{}".'. format(match_details, src_item['path'], src_item['path']), inq_line) # report if no match was found and # verify that a match was found for each of the source types of the current datasource if not match: # no matches were found for the current datasource self.disqualify_inquiry_item( sub_al, 'No matching items (files/folders) were found in the current data source.', inq_line) else: if not cur_source.allow_nomatch_per_sourcetype: # some matches were found; verify that a match was found for each of the source types for src_type in cur_source_types: if cur_source_types[src_type]['items_count'] == 0: # no matches were found for this source type self.disqualify_inquiry_item( sub_al, 'No matches were found for the "{}" source type id in the datasource.' .format(src_type), inq_line) def is_item_found_soft_match(self, srch_item, srch_in_str, soft_match_arr, item_to_be_reported): out = False _str = '' # identify if the search is performed for sub_aliquot (full value) or aliquot (partial value) if srch_item == item_to_be_reported: entity = 'sub-aliquot' else: entity = 'aliquot' soft_match = False self.logger.debug("srch_item = {}| srch_in_str = {}".format( srch_item, srch_in_str)) if srch_item in srch_in_str: out = True self.logger.debug("Exact match found between: {} | {}".format( srch_item, srch_in_str)) else: if soft_match_arr: self.logger.debug("Starting soft match for: {} | {}".format( srch_item, srch_in_str)) for item in soft_match_arr: srch_in_str = srch_in_str.replace(item['find'], item['replace']) srch_item = srch_item.replace(item['find'], item['replace']) self.logger.debug( "Updated for soft match: srch_item = {}| srch_in_str = {}". format(srch_item, srch_in_str)) if srch_item in srch_in_str: out = True soft_match = True self.logger.debug( "Soft match found between: {} | {}".format( srch_item, srch_in_str)) # prepare log entry if out: _str = str('Loose' if soft_match else 'Exact') + \ ' match was ' + \ 'found for {} item "{}". Match values are as following: "{}" and "{}".'\ .format(entity, item_to_be_reported, srch_item, srch_in_str) # log outcome of the match process, the "soft" match will logged as warning if out: if entity == 'aliquot': # if match was found by aliquot (partial id value), always report it as "warning" self.logger.warning(_str) else: # proceed here if match was found by sub-aliquot (full id value) if soft_match: self.logger.warning(_str) else: self.logger.info(_str) # prepare match details to output from this function match_type = '' if soft_match: # this was a soft match if entity == 'aliquot': match_type = 'loose/aliquot' else: match_type = 'loose' else: # this was an exact match if entity == 'aliquot': match_type = 'exact/aliquot' else: match_type = 'exact' out_details = {'match_type': match_type, 'details': _str} return out, out_details def create_download_request_file(self): self.logger.info("Start preparing download_request file.") # path for the script file being created rf_path = Path(gc.OUTPUT_REQUESTS_DIR + "/" + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_' + self.filename.replace(' ', '') + '.tsv') self.download_request_path = rf_path if not self.inq_match_arr: self.logger.warning( 'No inquiries with matched datasources exists for the current inquiry file. ' 'Skipping creating a download request file.') return with open(rf_path, "w") as rf: # write headers to the file headers = '\t'.join([ 'Source', 'Destination', 'Aliquot_id', 'Obj_Type', 'Target_Item_Name' ]) rf.write(headers + '\n') for item in self.inq_match_arr: src_path = item['real_path'] # item['source']['path'] #prepare values for the current inquiry row to put into the outcome file # project_path = self.conf_process_entity.get_value('Destination/location/project_path') bulk_data_path = self.conf_main.get_value( 'Destination/bulk_data_path') study_path = item['study'] target_subfolder = item[ 'target_subfolder'] # item['source']['target_subfolder'] sub_aliquot = item['sub-aliquot'] obj_type = item['obj_type'] target_copied_item_name = item['target_copied_item_name'] # check if current sub-aliquot is not part of disqualified items array if self.disqualified_items and sub_aliquot in self.disqualified_items.keys( ): # if sub-aliquot was disqualifed already, skip this line continue # get template for the destination path and replace placeholders with values # "{project_path}/{study_path}/{target_subfolder}" dest_path = self.conf_main.get_value( 'Destination/path_template') dest_path = dest_path.replace('{bulk_data_path}', bulk_data_path) dest_path = dest_path.replace('{study_path}', study_path) dest_path = dest_path.replace('{target_subfolder}', target_subfolder) line = '\t'.join([ str(src_path), str(Path(dest_path)), str(sub_aliquot), str(obj_type), target_copied_item_name ]) rf.write(line + '\n') self.logger.info( "Finish preparing download_request file '{}'.".format(rf_path)) def disqualify_inquiry_item(self, sa, disqualify_status, inquiry_item): # adds a sub aliquots to the dictionary of disqualified items # key = sub-aliquot, values: dictionary with 2 values: # 'status' - reason for disqualification # 'inquiry_item: array of values for inquiry row from an inquiry file details = {'status': disqualify_status, 'inquiry_item': inquiry_item} if not sa in self.disqualified_items: self.disqualified_items[sa] = details self.logger.warning( 'Sub-aliquot "{}" was disqualified with the following status: "{}"' .format(sa, disqualify_status)) else: self.logger.warning( 'Sub-aliquot "{}" was already disqualified earlier. ' 'The following disqualification call will be ignored: "{}"'. format(sa, disqualify_status)) def create_inquiry_file_for_disqualified_entries(self): if self.disqualified_items: self.logger.info( "Start preparing inquiry file for disqualified sub-aliquots.") # path for the script file being created wb = xlwt.Workbook() # create empty workbook object sh = wb.add_sheet( 'Re-process_inquiry' ) # sheet name can not be longer than 32 characters cur_row = 0 # first row for 0-based array cur_col = 0 # first col for 0-based array # write headers to the file headers = self.lines_arr[0] for val in headers: sh.write(cur_row, cur_col, val) cur_col += 1 cur_row += 1 for di in self.disqualified_items: fields = self.disqualified_items[di]['inquiry_item'] cur_col = 0 for val in fields: sh.write(cur_row, cur_col, val) cur_col += 1 cur_row += 1 if not os.path.isabs(gc.DISQUALIFIED_INQUIRIES): disq_dir = Path(self.wrkdir) / gc.DISQUALIFIED_INQUIRIES else: disq_dir = Path(gc.DISQUALIFIED_INQUIRIES) # if DISQUALIFIED_INQUIRIES folder does not exist, it will be created os.makedirs(disq_dir, exist_ok=True) # identify path for the disqualified inquiry file self.disqualified_inquiry_path = Path( str(disq_dir) + '/' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_reprocess_disqualified_' + # .stem method is used to get file name without an extension Path(self.filename).stem.replace(' ', '') + '.xls') wb.save(str(self.disqualified_inquiry_path)) self.logger.info( "Successfully prepared the inquiry file for disqualified sub-aliquots and saved in '{}'." .format(str(self.disqualified_inquiry_path)))
def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''): # load_configuration (main_cfg_obj) # load global and local configureations File.__init__(self, filepath, file_type) if main_cfg: self.conf_main = main_cfg else: self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # if cfg_path=='': # self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # else: # self.conf_main = ConfigData(cfg_path) self.error = RequestError(self) self.log_handler = None self.logger = self.setup_logger(self.wrkdir, self.filename) self.logger.info( 'Start working with Submission request file {}'.format(filepath)) # self.file_dict = OrderedDict() # self.rows = OrderedDict() self.columnlist = [] self.samples = [] self.sub_aliquots = [] self.disqualified_sub_aliquots = {} self.aliquots_to_subaliquots_map = { } # holds the map of aliquots to sub-aliquots for interpreting DB responses self.disqualified_request_path = '' # will store path to a request file with disqualified sub-aliquots self.project = '' self.bulk_location = '' self.assay = '' self.center = '' self.center_id = None self.center_code = None self.experiment_id = '' self.data_source_names = '' self.data_source_objects = { } # dictionary to store all collected data sources for the request self.aliquots = None self.qualified_aliquots = None self.raw_data = None self.assay_data = None self.attachments = None self.submission_forms = None self.submission_package = None self.data_source_names = None # will hold value corresponding to the type of data source being used (attachments are not ignored) # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used self.data_source_forms_assignment = None # self.sheet_name = '' self.sheet_name = sheet_name.strip() if not self.sheet_name or len(self.sheet_name) == 0: # if sheet name was not passed as a parameter, try to get it from config file self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME # 'wk_sheet_name' # print (self.sheet_name) self.logger.info('Data will be loaded from worksheet: "{}"'.format( self.sheet_name)) self.conf_assay = None self.get_file_content()
def process_inquiry_sources(self): cur_row = 0 for inq_line in self.lines_arr: if cur_row == self.header_row_num - 1: # skip the header row cur_row += 1 continue # get program code assigned to the current row program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # get assay assigned to the current row assay = self.get_inquiry_value_by_field_name('assay', inq_line) # get source id assigned to the current row source_id = self.get_inquiry_value_by_field_name( 'source_id', inq_line) # get source config file # 2 values are saved in tuple: program name specific path and default one. # if program name specific path does not exist, the default will be used cfg_source_path = ( # configuration path for the current program by name gc.CONFIG_FILE_SOURCE_PATH\ .replace('{program}', program_code)\ .replace('{assay}', assay)\ .replace('{source_id}', source_id), # configuration path for the default program (used if no program specific path is present) gc.CONFIG_FILE_SOURCE_PATH \ .replace('{program}', 'default') \ .replace('{assay}', assay) \ .replace('{source_id}', source_id) ) # get the source location config file path cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace( '{source_id}', source_id) # attempt to load configuration for the program specific path cfg_source = ConfigData(Path(cfg_source_path[0])) if not cfg_source.loaded: # if config was not loaded from the program specific path, load the default one cfg_source = ConfigData(Path(cfg_source_path[1])) if cfg_source.loaded: # proceed here if the source config was loaded # load source location config with location specific settings for the current source cfg_source_location = ConfigData( Path(cfg_source_location_path)) if cfg_source_location.loaded: # if the source location config was loaded, update cfg_source config with the source location config cfg_source.update( cfg_source_location.get_whole_dictionary()) # get unique id of the datasource and check if the same id was used already, reuse that in such case inq_line_datasource_id = self.get_inquiry_line_datasource_id( inq_line) self.logger.info( 'Current inquiry row #{} was identified with the following data source id: {}' .format(cur_row, inq_line_datasource_id)) # assign source id (inq_line_datasource_id) to the current inquiry line self.inq_line_sources[cur_row] = inq_line_datasource_id if inq_line_datasource_id in self.inq_sources: # reuse existing datasource self.logger.info( 'Identified data source id for the current inquiry row #{} was identified as ' 'earlier retrieved one (for this or another row) and will be re-used for ' 'the current row.'.format(cur_row)) else: # create a new datasource object inq_line_datasource = DataSource(self, cfg_source, inq_line, inq_line_datasource_id) self.inq_sources[ inq_line_datasource_id] = inq_line_datasource else: sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \ 'None of the expected to exist files is accessible: {}'\ .format(cur_row, sub_al, ' | '.join(cfg_source_path)) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, cur_row ) # TODO: verify if inq_line should be used instead of curr_row cur_row += 1 pass
import os import sys from os import walk import getpass from pathlib import Path import traceback from utils import Monitor from utils import ConfigData, common as cm, common2 as cm2, global_const as gc, send_yagmail #, send_email as email # if executed by itself, do the following if __name__ == '__main__': gc.CURRENT_PROCCESS_LOG_ID = 'monitor_file' # load main config file and get required values m_cfg = ConfigData(gc.MAIN_CONFIG_FILE) # setup application level logger cur_dir = Path(os.path.dirname(os.path.abspath(__file__))) mlog, log_handler = cm.setup_logger(m_cfg, cur_dir, gc.CURRENT_PROCCESS_LOG_ID) monitor_path = m_cfg.get_value('Location/monitor_configs') # Verify that target directory (df_path) is accessible for the current user (under which the app is running) # Identify the user under which the app is running if the df_path is not accessible if not os.path.exists(monitor_path): _str = 'Directory "{}" does not exist or not accessible for the current user. Aborting execution. ' \ 'Expected user login: "******", Effective user: "******"'.format(monitor_path, os.getlogin(),getpass.getuser()) mlog.error(_str) # send notification email alerting about the error case email_subject = 'Error occurred during running file_monitoring tool.'
class Request(File): def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''): # load_configuration (main_cfg_obj) # load global and local configureations File.__init__(self, filepath, file_type) if main_cfg: self.conf_main = main_cfg else: self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # if cfg_path=='': # self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # else: # self.conf_main = ConfigData(cfg_path) self.error = RequestError(self) self.log_handler = None self.logger = self.setup_logger(self.wrkdir, self.filename) self.logger.info( 'Start working with Submission request file {}'.format(filepath)) # self.file_dict = OrderedDict() # self.rows = OrderedDict() self.columnlist = [] self.samples = [] self.sub_aliquots = [] self.disqualified_sub_aliquots = {} self.aliquots_to_subaliquots_map = { } # holds the map of aliquots to sub-aliquots for interpreting DB responses self.disqualified_request_path = '' # will store path to a request file with disqualified sub-aliquots self.project = '' self.bulk_location = '' self.assay = '' self.center = '' self.center_id = None self.center_code = None self.experiment_id = '' self.data_source_names = '' self.data_source_objects = { } # dictionary to store all collected data sources for the request self.aliquots = None self.qualified_aliquots = None self.raw_data = None self.assay_data = None self.attachments = None self.submission_forms = None self.submission_package = None self.data_source_names = None # will hold value corresponding to the type of data source being used (attachments are not ignored) # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used self.data_source_forms_assignment = None # self.sheet_name = '' self.sheet_name = sheet_name.strip() if not self.sheet_name or len(self.sheet_name) == 0: # if sheet name was not passed as a parameter, try to get it from config file self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME # 'wk_sheet_name' # print (self.sheet_name) self.logger.info('Data will be loaded from worksheet: "{}"'.format( self.sheet_name)) self.conf_assay = None self.get_file_content() def get_file_content(self): if not self.columnlist: if cm.file_exists(self.filepath): self.logger.debug('Loading file content of "{}"'.format( self.filepath)) with xlrd.open_workbook(self.filepath) as wb: if not self.sheet_name or len(self.sheet_name) == 0: # by default retrieve the first sheet in the excel file sheet = wb.sheet_by_index(0) else: # if sheet name was provided sheets = wb.sheet_names() # get list of all sheets if self.sheet_name in sheets: # if given sheet name in the list of available sheets, load the sheet sheet = wb.sheet_by_name(self.sheet_name) else: # report an error if given sheet name not in the list of available sheets _str = ( 'Given worksheet name "{}" was not found in the file "{}". ' 'Verify that the worksheet name exists in the file.' ).format(self.sheet_name, self.filepath) self.error.add_error(_str) self.logger.error(_str) self.lineList = None self.loaded = False return self.lineList sheet.cell_value(0, 0) lines = [ ] # will hold content of the request file as an array of arrays (rows) for i in range(sheet.ncols): column = [] for j in range(sheet.nrows): if i == 0: lines.append( [] ) # adds an array for each new row in the request file # print(sheet.cell_value(i, j)) cell = sheet.cell(j, i) cell_value = cell.value # take care of number and dates received from Excel and converted to float by default if cell.ctype == 2 and int(cell_value) == cell_value: # the key is integer cell_value = str(int(cell_value)) elif cell.ctype == 2: # the key is float cell_value = str(cell_value) # convert date back to human readable date format # print ('cell_value = {}'.format(cell_value)) if cell.ctype == 3: cell_value_date = xlrd.xldate_as_datetime( cell_value, wb.datemode) cell_value = cell_value_date.strftime( "%Y-%m-%directory") column.append( cell_value ) # adds value to the current column array lines[j].append( '"' + str(cell_value) + '"' ) # adds value in "csv" format for a current row # self.columnlist.append(','.join(column)) self.columnlist.append( column) # adds a column to a list of columns # populate lineList property self.lineList = [] for ln in lines: self.lineList.append(','.join(ln)) wb.unload_sheet(sheet.name) # load passed request parameters (by columns) self.get_request_parameters() # validate provided information self.logger.info( 'Validating provided request parameters. project: "{}", bulk location: "{}", ' 'assay: "{}", db_center_code_or_id: "{}",' 'Sub-Aliquots: "{}"'.format(self.project, self.bulk_location, self.assay, self.center, self.sub_aliquots)) self.validate_request_params() if self.error.exist(): # report that errors exist self.loaded = False # print(self.error.count) # print(self.error.get_errors_to_str()) _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format( self.error.count, self.error.get_errors_to_str()) else: self.loaded = True _str = 'Request parameters were successfully validated - no errors found.' self.logger.info(_str) # combine Experiment_id out of request parameters if self.center_code and len(self.center_code.strip()) > 0: # use center code if available self.experiment_id = "_".join( [self.project, self.center_code, self.assay]) else: # use provided value for the center column from request, if center_code is not available self.experiment_id = "_".join( [self.project, self.center, self.assay]) else: _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format( self.filepath) self.error.add_error(_str) self.logger.error(_str) self.columnlist = None self.lineList = None self.loaded = False return self.lineList # get all values provided in the request file def get_request_parameters(self): self.project = self.columnlist[0][1] self.bulk_location = self.columnlist[1][1] self.assay = self.columnlist[2][1].lower() self.center = self.columnlist[3][ 1] # center code (if alpha numeric) or center id (if numeric) self.sub_aliquots = self.columnlist[4] if self.sub_aliquots and len(self.sub_aliquots) > 0: self.sub_aliquots.pop(0) # get rid of the column header # self.samples = self.columnlist[5] # if self.samples and len(self.samples) > 0: # self.samples.pop(0) # get rid of the column header # validates provided parameters (loaded from the submission request file) def validate_request_params(self): _str_err = '' _str_warn = '' if len(self.sub_aliquots) == 0: _str_err = '\n'.join([ _str_err, 'List of provided sub-samples is empty. ' 'Aborting processing of the submission request.' ]) # Check if empty sub-samples were provided if '' in self.sub_aliquots: i = 0 cleaned_cnt = 0 for s in self.sub_aliquots: # check for any empty sub-aliquot values and remove them if len(s.strip()) == 0: self.sub_aliquots.pop(i) cleaned_cnt += 1 else: i += 1 if cleaned_cnt > 0: _str_warn = '\n'.join([ _str_warn, 'Empty sub-aliqouts (count = {}) were removed from the list. ' 'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" ' .format(cleaned_cnt, len(self.sub_aliquots), self.sub_aliquots) ]) # check for empty values if len(self.project) == 0: _str_err = '\n'.join([ _str_err, 'No Program name was provided. Aborting processing of the submission request.' ]) if len(self.bulk_location) == 0: _str_err = '\n'.join([ _str_err, 'No Bulk Location was provided. Aborting processing of the submission request.' ]) if len(self.assay) == 0: _str_err = '\n'.join([ _str_err, 'No Assay was provided. Aborting processing of the submission request.' ]) if len(self.center) == 0: _str_err = '\n'.join([ _str_err, 'No DB Center information was provided. Aborting processing of the submission request.' ]) # check for values that should match some predefined values from a dictionary # check assay value if not cm2.key_exists_in_dict(self.assay, 'assay'): _str_err = '\n'.join([ _str_err, 'Provided Assay name "{}" is not matching a list of expected assay names ' '(as stored in "{}" dictionary file). ' 'Aborting processing of the submission request.'.format( self.assay, gc.CONFIG_FILE_DICTIONARY) ]) else: # if provided assay name is expected, convert it to the name expected by the Submission logic self.assay = cm2.get_dict_value(self.assay, 'assay') # check project value if not cm2.key_exists_in_dict(self.project.lower(), 'project'): _str_err = '\n'.join([ _str_err, 'Provided Program name "{}" is not matching a list of expected names ' '(as stored in "{}" dictionary file). ' 'Aborting processing of the submission request.'.format( self.project, gc.CONFIG_FILE_DICTIONARY) ]) else: # if provided assay name is expected, convert it to the name expected by the Submission logic self.project = cm2.get_dict_value(self.project.lower(), 'project') # validate center_code or center_id value self.logger.info( 'Start validation of center value "{}" provided in the request'. format(self.center)) db = DBAccess(self.logger, self.error, self.conf_main) # create DBAccess object db.open_connection() # test center value assuming center code was provided dataset = db.validate_center_code(self.center, self.project, 'code', 'code') _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_code') if center_id_out1: # center id was returned, meaning center was validated fine self.center_id = center_id_out1 # get center code value from the current DB dataset _str_err_out3, center_code = self.get_field_value_from_dataset( dataset, 'center_code') if center_code: # center code retrieved OK self.center_code = center_code else: # report an error during retrieving center_code _str_err = '\n'.join([_str_err, _str_err_out3]) else: # if center code was not validated at first attempt, validate it assuming the center id was given dataset = db.validate_center_code(self.center, self.project, 'id', 'code') _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_id') if center_id_out2: # center id was validated at the 2nd attempt, ignore the 1st failed center code validation self.center_id = center_id_out2 # get center code value from the current DB dataset _str_err_out3, center_code = self.get_field_value_from_dataset( dataset, 'center_code') if center_code: # center code retrieved OK self.center_code = center_code else: # report an error during retrieving center_code _str_err = '\n'.join([_str_err, _str_err_out3]) else: # center validation attempts failed, report both failures _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2]) # get list of aliquots from list of sub-aliquots self.aliquots = [ cm2.convert_sub_aliq_to_aliquot(al, self.assay) for al in self.sub_aliquots ] # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots) for sa, a in zip(self.sub_aliquots, self.aliquots): self.aliquots_to_subaliquots_map[a] = sa if self.center_id: self.logger.info('Start validation of aliquot ids vs DB') # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB dataset = db.validate_aliquot_ids(self.center_id, self.aliquots) if dataset: # create dictionary of received aliquots/sample ids aliquots_to_samples_map = {} for row in dataset: if '_aliquot_id' in row and '_sample_id' in row: aliquots_to_samples_map[ row['_aliquot_id']] = row['_sample_id'] # check if each aliquot id was returned from a database and get the sample id from the dataset for sa, a in zip(self.sub_aliquots, self.aliquots): if a in aliquots_to_samples_map: if len(str(aliquots_to_samples_map[a]).strip()) > 0: self.samples.append(aliquots_to_samples_map[a]) else: _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \ 'The sub-aliquot was disqualified'.format(sa) self.disqualify_sub_aliquot(sa, _str) _str_warn = '\n'.join([_str_warn, _str]) else: _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format( sa) self.disqualify_sub_aliquot(sa, _str) _str_warn = '\n'.join([_str_warn, _str]) else: _str_err = '\n'.join([ _str_err, 'Aliquot ids cannot be validated since no data was returned from DB for ' 'center_id = "{}" and aliquot ids as following: {} '. format(self.center_id, self.aliquots) ]) db = None # report any collected errors if len(_str_err) > 0: _str_err = 'Validation of request parameters:' + _str_err self.error.add_error(_str_err) self.logger.error(_str_err) # report any collected warnings if len(_str_warn) > 0: _str_warn = 'Validation of request parameters:' + _str_warn self.logger.warning(_str_warn) def check_validation_dataset_outcome(self, dataset, validation_id_column, validation_id_name): _str_err = '' row_num = 1 validation_id_out = None if dataset: if len(dataset) >= row_num: row = dataset[row_num - 1] # get the first row of the dataset if 'status' in row: status = row['status'] if 'description' in row: description = row['description'] if validation_id_column in row: # center_id validation_id = row[validation_id_column] if status == 'OK': # validation was successful validation_id_out = validation_id elif status == 'Failed': # validation has failed _str_err = '\n'.join([ _str_err, 'Validation of the provided {} value vs DB has Failed, description: {}' .format(validation_id_name, description) ]) else: # unexpected status value was returned _str_err = '\n'.join([ _str_err, 'Validation of the provided {} value vs DB returned unexpected status {}' .format(validation_id_name, status) ]) else: _str_err = '\n'.join([ _str_err, 'Unexpected error was reported during validating {} in the DB. ' 'Check earlier entries in the log file.'.format( validation_id_name) ]) return _str_err, validation_id_out def get_field_value_from_dataset(self, dataset, field_name, row_num=None): # set default values if row_num is None: row_num = 1 # default row is #1 _str_err = '' value_out = None if dataset: if len(dataset) >= row_num: row = dataset[row_num - 1] if field_name in row: value_out = row[field_name] else: _str_err = '\n'.join([ _str_err, 'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. ' .format(field_name, row_num) ]) return _str_err, value_out def setup_logger(self, wrkdir, filename): # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) log_folder_name = gc.REQ_LOG_DIR # gc.LOG_FOLDER_NAME # m_logger_name = gc.MAIN_LOG_NAME # m_logger = logging.getLogger(m_logger_name) logger_name = gc.REQUEST_LOG_NAME logging_level = self.conf_main.get_value('Logging/request_log_level') # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): log_folder_path = Path(wrkdir) / log_folder_name else: log_folder_path = Path(log_folder_name) lg = setup_logger_common( logger_name, logging_level, log_folder_path, # Path(wrkdir) / log_folder_name, str(filename) + '_' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log') self.log_handler = lg['handler'] return lg['logger'] def load_request_configuration(self): # update main config file with the project/environmetn specific details from additional config files self.load_project_config_into_main( self.project ) # loads project specific config and merges it into main config # load project specific assay config file self.conf_assay = self.load_assay_conf(self.assay, self.project) if self.conf_assay: # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml self.conf_assay = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay) def process_request(self): self.data_source_names = cm.get_value_from_dictionary( 'data_sources', self.conf_assay) # self.conf_assay['data_sources'] # path to the folder where created submission packages will be located. # since this location can be provided in the project config file, this assignment is happening # after loading the project config gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value( 'Submission_location/output_packages') for data_source_name in self.data_source_names: # if isinstance(data_source_name, tuple) if isinstance(data_source_name, str): if data_source_name == 'attachment': self.attachments = Attachment(self) elif data_source_name[-3:] == "_db": self.data_source_objects[data_source_name] = DataSourceDB( self, data_source_name, data_source_name) if not self.data_source_forms_assignment: self.data_source_forms_assignment = 'db' else: self.data_source_objects[data_source_name] = DataSource( self, data_source_name, data_source_name) if not self.data_source_forms_assignment: self.data_source_forms_assignment = 'file' elif isinstance(data_source_name, tuple): if data_source_name[0][-3:] == "_db": self.data_source_objects[ data_source_name[0]] = DataSourceDB( self, data_source_name[0], data_source_name[1]) else: self.data_source_objects[data_source_name[0]] = DataSource( self, data_source_name[0], data_source_name[1]) else: self.logger.error( 'Provided data source name ({}) is of unexpected format and cannot be processed.' .format(data_source_name)) # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it # this a case when an assay submits only attachments and do not use any assay or QC data if not self.data_source_forms_assignment: self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT self.submission_package = SubmissionPackage(self) self.create_request_for_disqualified_sub_aliquots() self.create_trasfer_script_file() # check for errors and put final log entry for the request. if self.error.exist(): _str = 'Processing of the current request was finished with the following errors: {}\n'.format( self.error.get_errors_to_str()) self.logger.error(_str) else: _str = 'Processing of the current request was finished successfully.\n' self.logger.info(_str) def load_assay_conf(self, assay, project): assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project) cfg_assay = ConfigData(assay_cfg_path) assay_config = cfg_assay.get_value(assay.upper()) if assay_config: self.logger.info( "Configuration for the {} assay was loaded from the assay config file: {}. " .format(assay.upper(), assay_cfg_path)) else: _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \ "Aborting execution.".format(assay.upper(), assay_cfg_path) self.logger.error(_str) self.error.add_error(_str) return assay_config # def update_cfg_assay_with_location_details(self, project, cfg_assay): # cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project)) # if cfg_assay_location.loaded: # self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path)) # cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary()) # else: # _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ # .format(cfg_assay_location.cfg_path) # self.logger.error(_str) # self.error.add_error(_str) # return cfg_assay def update_cfg_dictionary_with_location_details(self, location_path, project, cfg_to_update): cfg_location = ConfigData(location_path.replace('{project}', project)) if cfg_location.loaded: self.logger.info( 'Local config file "{}" was loaded and being used.'.format( cfg_location.cfg_path)) cfg_to_update = cm.update_dictionary_matching_keys( cfg_to_update, cfg_location.get_whole_dictionary()) else: _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ .format(cfg_location.cfg_path) self.logger.error(_str) self.error.add_error(_str) return cfg_to_update def load_project_config_into_main(self, project): # load project specific "project_config" config file cfg_project = ConfigData( gc.CONFIG_FILE_PROJECT.replace('{project}', project)) if cfg_project.loaded: # if cfg_project was loaded, update it with the environment specific settings (from project_location config) cfg_project_updated = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_PROJECT_LOCATION, self.project, cfg_project.get_whole_dictionary()) # update main config with the outcome of the previous updates self.conf_main.update(cfg_project_updated) def create_trasfer_script_file(self): self.logger.info("Start preparing transfer_script.sh file.") # path for the script file being created sf_path = Path(self.submission_package.submission_dir + "/transfer_script.sh") # get script file template with open('scripts/' + self.project + '/transfer_script.sh', 'r') as ft: scr_tmpl = ft.read() # update placeholders in the script with the actual values smtp_server = cm.get_environment_variable( self.conf_main.get_item_by_key('Email/smtp_server_env_name')) smtp_port = cm.get_environment_variable( self.conf_main.get_item_by_key('Email/smtp_server_port_env_name')) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port)) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!to_email!}", ','.join(self.conf_main.get_value("Email/sent_to_emails"))) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!from_email!}", self.conf_main.get_value("Email/default_from_email")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!send_email_flag!}", str(self.conf_main.get_value("Email/send_emails"))) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!cmd!}", self.conf_main.get_value("DataTransfer/transfer_command")) # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mp_cmd!}", self.conf_main.get_value("DataTransfer/mount_point_command")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mount_local_dir!}", self.conf_main.get_value("DataTransfer/mount_local_dir")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mount_remote_dir!}", self.conf_main.get_value("DataTransfer/mount_remote_dir")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!target_dir!}", self.conf_main.get_value("DataTransfer/remote_target_dir")) ssh_server = cm.get_environment_variable( self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name')) scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}", str(ssh_server)) # apply user name as the very last replacement statement, since it can be used as part of previous replacements ssh_user = cm.get_environment_variable( self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name')) scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}", str(ssh_user)) set_permissions = False set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis") if set_perm_value: try: exec_permission = eval(set_perm_value.strip()) set_permissions = True except Exception as ex: _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \ '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\ .format(ex, set_perm_value, traceback.format_exc()) self.logger.warning(_str) # self.error.add_error(_str) set_permissions = False with open(sf_path, "w") as sf: sf.write(scr_tmpl) if set_permissions: try: # if permissions to be set were retrieved from config file, set them here st = os.stat(sf_path) os.chmod(sf_path, st.st_mode | exec_permission) #stat.S_IXUSR except Exception as ex: _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \ '"{}". \n{} '\ .format(ex, set_perm_value, sf_path, traceback.format_exc()) self.logger.warning(_str) self.error.add_error(_str) else: _str = 'Permission setup was skipped for the transfer script file. ' \ 'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\ .format(set_perm_value) self.logger.warning(_str) self.logger.info("Finish preparing '{}' file.".format(sf_path)) def disqualify_sub_aliquot(self, sa, details): # adds a sub aliquots to the disctionary of disqualified sub_aliquots # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons if sa in self.disqualified_sub_aliquots.keys(): self.disqualified_sub_aliquots[sa].append(details) else: arr_details = [details] self.disqualified_sub_aliquots[sa] = arr_details self.logger.warning( 'Sub-aliquot "{}" was disqualified with the following details: "{}"' .format(sa, details)) def populate_qualified_aliquots(self): # reset self.qualified_aliquots array self.qualified_aliquots = [] #select only aliquots that were not disqualified for sa, a in zip(self.sub_aliquots, self.aliquots): if not sa in self.disqualified_sub_aliquots.keys(): self.qualified_aliquots.append(a) def create_request_for_disqualified_sub_aliquots(self): # proceed only if some disqualified sub-aliquots are present if self.disqualified_sub_aliquots: self.logger.info( "Start preparing a request file for disqualified sub-aliquots '{}'." .format([val for val in self.disqualified_sub_aliquots.keys()])) wb = xlwt.Workbook() # create empty workbook object sh = wb.add_sheet( 'Submission_Request' ) # sheet name can not be longer than 32 characters cur_row = 0 # first row for 0-based array cur_col = 0 # first col for 0-based array #write headers to the file headers = self.get_headers() for val in headers: sh.write(cur_row, cur_col, val) cur_col += 1 cur_row += 1 for sa in self.sub_aliquots: if sa in self.disqualified_sub_aliquots.keys(): sh.write(cur_row, 0, self.project) sh.write(cur_row, 1, self.bulk_location) sh.write(cur_row, 2, self.assay) sh.write(cur_row, 3, self.center) sh.write(cur_row, 4, sa) cur_row += 1 self.disqualified_request_path = Path( gc.DISQUALIFIED_REQUESTS + '/' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_reprocess_disqualified _' + Path(self.filename).stem + '.xls') # if DISQUALIFIED_REQUESTS folder does not exist, it will be created os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True) wb.save(str(self.disqualified_request_path)) self.logger.info( "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'." .format(str(self.disqualified_request_path)))
class Monitor(): def __init__(self, cfg_monitor_path, log_obj): self.action_completed = False self.status = [] self.mtr_cfg_path = cfg_monitor_path self.log = log_obj self.error = MonitorError(self) self.mtr_cfg = ConfigData(cfg_monitor_path) if self.validate_config_file(): self.loaded = True else: self.loaded = False cur_cfg_dir = os.path.dirname(cfg_monitor_path) cur_cfg_file_name = Path(os.path.abspath(cfg_monitor_path)).name stamp_dir = Path(str(cur_cfg_dir) + '/' + gc.STAMPS_FILES_FOLDER_NAME) if not os.path.exists(stamp_dir): os.mkdir(stamp_dir) stamp_file = Path( str(stamp_dir) + '/' + cur_cfg_file_name.replace('.yaml', '_stamp.yaml')) self.verify_config_stamp_file(stamp_file) self.mtr_cfg_stamp = ConfigData(stamp_file) self.mtr_source = None self.mtr_source_path = None if self.loaded: # get config file values self.mtr_source_dir = Path( cm.eval_cfg_value( self.mtr_cfg.get_value('Location/source_dir'), self.log, None)) self.mtr_source_file = Path( self.mtr_cfg.get_value('Location/source_file')) found_files = cm.find_file_in_dir(self.mtr_source_dir, self.mtr_source_file, False) if found_files: ff_stamp = None for file_match in found_files: if not ff_stamp or ff_stamp < os.stat( Path(self.mtr_source_dir) / file_match).st_mtime: ff_stamp = os.stat( Path(self.mtr_source_dir) / file_match).st_mtime self.mtr_source = file_match # self.mtr_source = found_files[0] self.mtr_source_path = Path( self.mtr_source_dir) / self.mtr_source # else: # self.mtr_source = None # self.mtr_source_path = None self.mtr_destin = self.mtr_cfg.get_value('Location/destination') self.mtr_item = self.mtr_cfg.get_value('Monitoring/item') self.mtr_type = self.mtr_cfg.get_value('Monitoring/type') self.mtr_action = self.mtr_cfg.get_value('Monitoring/action') self.mtr_frequency = self.mtr_cfg.get_value('Monitoring/frequency') # self.mtr_email = self.mtr_cfg.get_value('Monitoring/email_notification') # self.mtr_email_cc = self.mtr_cfg.get_value('Monitoring/email_cc') # load stamp info from stamp config file self.mtr_sync_date = self.mtr_cfg_stamp.get_value( 'Last_sync/date_time') self.mtr_watch_value = self.mtr_cfg_stamp.get_value( 'Last_sync/watch_value') def verify_config_stamp_file(self, file_path): if not cm.file_exists(file_path): # if file is not present, create it f = open(file_path, "w+") f.close def validate_config_file(self): # TODO: add some rules to validate the current monitoring config file return True def start_monitor(self): if self.mtr_source_path: next_sync_datetime = None # default value # check if delay between monitoring events was fulfilled if self.mtr_sync_date and str(self.mtr_frequency).isnumeric(): try: next_sync_datetime = datetime.strptime(self.mtr_sync_date, gc.STAMP_DATETIME_FORMAT) + \ timedelta(seconds=self.mtr_frequency) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during calculating next sync datetime. ' \ 'Saved sync date: "{}", sync frequency: "{}"' \ .format(ex, self.mtr_sync_date, self.mtr_frequency) self.status.append(_str) _str = _str + '\n{} '.format(traceback.format_exc()) self.log.error(_str) self.error.add_error(_str) if not next_sync_datetime or next_sync_datetime < datetime.now(): self.log.info( 'Monitoring delay of "{}" seconds has expired since the last syncronization event on {}. ' 'Proceeding to monitor "{}" file.'.format( self.mtr_frequency if self.mtr_frequency else 'N/A', self.mtr_sync_date if self.mtr_sync_date else 'N/A', self.mtr_source)) custom_action = self.action_copy # set default value if self.mtr_action == 'copy': custom_action = self.action_copy watcher = Watcher( self.mtr_source_path, custom_action, self, self.mtr_watch_value) # self.mtr_item, self.mtr_type) watcher.watch() # start the watch going # update stats in the config file datetime_stamp = time.strftime(gc.STAMP_DATETIME_FORMAT, time.localtime()) self.mtr_cfg_stamp.set_value(datetime_stamp, 'Last_sync/date_time') self.log.info( 'Datetime information for monitored file was recorded: Last_sync/date_time: {}' .format(datetime_stamp)) else: _str = 'Monitoring delay of "{}" seconds has not expired since the last syncronization event on {}. '\ .format(self.mtr_frequency if self.mtr_frequency else 'N/A', self.mtr_sync_date if self.mtr_sync_date else 'N/A') self.log.info(_str) self.status.append(_str) else: _str = 'Source file "{}" was not found in the source directory "{}". '\ .format(self.mtr_source_file, self.mtr_source_dir) self.log.warning(_str) self.status.append(_str) def action_copy(self, file_time_stamp): self.log.info('Start copying "{}" to "{}"'.format( self.mtr_source, self.mtr_destin)) self.new_file_time_stamp = file_time_stamp try: shutil.copy(self.mtr_source_path, self.mtr_destin) _str = 'Copying of "{}" to "{}" completed successfuly.'.format( self.mtr_source_path, self.mtr_destin) self.log.info(_str) self.action_completed = True self.status.append(_str) # update stats in the config file self.mtr_cfg_stamp.set_value(file_time_stamp, 'Last_sync/watch_value') self.log.info( 'Stamp information for just copied file was recorded: ' 'Last_sync/watch_value: {}'.format(file_time_stamp)) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during copying file "{}" to "{}"\n{} ' \ .format(ex, self.mtr_source, self.mtr_destin, traceback.format_exc()) self.log.error(_str) self.error.add_error(_str) self.status.append(_str)
Created on Oct. 12 2011 @author: Jason MacWilliams """ import sys, os, time, signal, subprocess, atexit import zipfile from optparse import OptionParser #import pdb from utils.commonFedora import connectToFedora from utils.ConfigData import * import Navigator config = ConfigData() # try to handle an abrupt shutdown more cleanly # we also hit the shutdown handler after this, so don't bother sending it now def shutdown_handler(signum, frame): # is there enough time to save the script state, do we even have to? print("Script terminating with signal %d" % signum) config.message.addLine("Script was terminated with signal %d" % signum) # we might also have to remove the last object as it may be corrupt # need to look into how an interrupt can interfere with shutil.copy, os.chown, and ffmpeg sys.exit(1) def sendReport(): config.message.send() """ ====== M A I N ====== """
def get_main_config(): if not gc.main_cfg: gc.main_cfg = ConfigData(gc.MAIN_CONFIG_FILE) return gc.main_cfg
def validate_inquiry_file(self): self.logger.info( 'Start validating the current inquiry file "{}".'.format( self.filepath)) row_count = 1 failed_cnt = 0 valid_aliquot_flag = self.conf_main.get_value( 'Validate/aliquot_id_vs_manifest') valid_inquiry_values_flag = self.conf_main.get_value( 'Validate/inquiry_values_vs_dictionary') inquiry_min_number_columns = self.conf_main.get_value( 'Validate/inquiry_min_number_columns') inquiry_validate_number_columns = self.conf_main.get_value( 'Validate/inquiry_validate_number_columns') if not inquiry_min_number_columns or not isinstance( inquiry_min_number_columns, int): inquiry_min_number_columns = 6 # set a default value if it is not provided in the config file if not inquiry_validate_number_columns or not isinstance( inquiry_validate_number_columns, int): inquiry_validate_number_columns = 6 # set a default value if it is not provided in the config file for row in self.lines_arr: if row_count == self.header_row_num: # 1 # skip the first column as it is a header row_count += 1 continue sub_al = 'ND' # set blank value as default assay = '' # set blank value as default valid_aliquot_performed = False skip_final_check = False # check if inquiry file contain min number of columns if len(row) < inquiry_min_number_columns: # disqualify the current inquiry file _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \ .format(len(row), inquiry_min_number_columns) self.error.add_error(_str) self.logger.error(_str) return # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # get sub-aliquot value before looping through all fields, so it can be used for reporting errors # also get program_code assigned to the row program_code = self.get_inquiry_value_by_field_name( 'program_code', row) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', row, False) # validate program_code value if conf_dict.key_exists_in_dict( str(program_code).lower(), 'program_code'): # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\ .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code')) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) else: _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \ .format(program_code, row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True if not skip_final_check: # go through fields and validate the provided values for i in range(len(row)): if i + 1 > inquiry_validate_number_columns: # if number of columns in the inquiry file > expected maximum, exit the loop break col_category = conf_dict.get_dict_value( str(i + 1), 'inquiry_file_structure') if col_category in ('program_code', 'sub-aliquot'): # no checking is needed for the listed field, proceed further continue elif col_category == 'db_center_id': # get center id value and validate it db_center_id = row[i] # validate center_code or center_id value self.logger.info( 'Start validation of center value "{}" provided for the current row' .format(db_center_id)) db = DBAccess(self.logger, self.conf_main, self.error) # create DBAccess object db.open_connection() # test center value assuming center code was provided dataset = db.validate_center_code( db_center_id, program_code, 'code', 'code') _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_code') if center_id_out1: # center id was returned, meaning center was validated fine db_center_id = center_id_out1 else: # if center code was not validated at first attempt, validate it assuming the center id was given dataset = db.validate_center_code( db_center_id, program_code, 'id', 'code') _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_id') if center_id_out2: # center id was validated at the 2nd attempt, ignore the 1st validation attempt db_center_id = center_id_out2 else: # center validation attempts failed, report both failures _str = 'Provided center value cannot be interpreted neither as code nor id; ' \ 'here are both validation outcomes: ' + \ ' | '.join([_str_err_out1, _str_err_out2]) self.logger.warning(_str) self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True break # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value if valid_aliquot_flag: # aliquot id validation is required valid_aliquot_performed = True # flag that aliquot validation was done if isinstance(db_center_id, int): # db_center_id.isnumeric(): # since center is numeric, proceed here # get aliquot id based on the verified earlier assay value and given sub_aliquot id aliquot = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) valid_status, valid_desc = self.db_access.validate_aliquot_id( aliquot, db_center_id) if valid_status != 'OK': # disqualify an inquiry file row, if returned status is not OK _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \ 'of the database. DB response => Status: "{}"; Description: "{}".'\ .format(aliquot, row_count, valid_status, valid_desc) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break else: # report unexpected center id value _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \ 'critical error because this value is required (based on the configuration setting ' \ '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \ .format(db_center_id, row_count, i + 1, sub_al) self.logger.warning(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True # break else: self.logger.info( 'Validating of the provided aliquot_id "{}" is not required based on the ' 'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".' .format(sub_al, valid_aliquot_flag)) else: if col_category == 'assay': assay = row[i].strip().lower( ) # save assay value to a dedicated variable if valid_inquiry_values_flag: # if validation of the inquiry values vs dictionary is required validate_values = [] validate_categories = [] if col_category == 'bulk_location': # get inquiry_file_structure_bulk_location value bulk_value_delim = conf_dict.get_dict_value( 'inquiry_file_structure_bulk_location_delim', '') validate_values = str( row[i]).split(bulk_value_delim) validate_categories = conf_dict.get_dict_object( 'inquiry_file_structure_bulk_location', '') else: validate_values.append(str(row[i]).lower()) validate_categories.append(col_category) for vv, vc in zip(validate_values, validate_categories): if not conf_dict.key_exists_in_dict( vv.lower(), vc): if col_category == 'bulk_location': _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \ 'the "bulk_location" value (line #{}, column #{})' \ .format(vv, vc, row_count, i + 1) else: _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\ .format(vv, vc, row_count, i+1) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break if skip_final_check: break # check that if aliquot validation is required it was actually performed if not skip_final_check: if valid_aliquot_flag and not valid_aliquot_performed: _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \ '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \ 'was not provided). ' \ .format(row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 row_count += 1 self.logger.info('Finish validating the inquiry file with{}.'.format( ' no errors' if failed_cnt == 0 else ' errors; {} records were disqualified - see earlier log entries for details' .format(failed_cnt)))
class MappingFileText(File): def __init__(self, filepath, conf_source, log_obj, file_type=None, file_delim=None): # setup default parameters if file_type is None: file_type = 1 if file_delim is None: file_delim = ',' #'\t' File.__init__(self, filepath, file_type, file_delim) self.conf_src = ConfigData('', conf_source) self.logger = log_obj self.map = { } # it will hold a dict where key is an aliquot id and value is the relative path to the file # set file properties before loading it self.file_delim = self.conf_src.get_value('file_delim') \ if self.conf_src.get_value('file_delim') else self.file_delim self.header_row_num = self.conf_src.get_value('header_row_num') \ if self.conf_src.get_value('header_row_num') else self.header_row_num # load the file self.get_file_content() def load_map(self, data_loc): disqualify = None aliquot_id_col_num = self.conf_src.get_value('aliquot_id_column_num') template_fields_col_num = self.conf_src.get_value( 'template_fields_col_num') file_path = self.conf_src.get_value('file_path_template') # raw_file_name = self.conf_src.get_value('file_name_template') if aliquot_id_col_num is None: disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Expected map file\'s configuration parameter "aliquot_id_col_num" was not provided.' if template_fields_col_num is None: disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Expected map file\'s configuration parameter "template_fields_col_num" was not provided.' if template_fields_col_num is None: disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.' if file_path is None: disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.' if not isinstance(aliquot_id_col_num, int): disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Non-integer value was provided for the map file\'s "aliquot_id_col_num" parameter.' for entry in template_fields_col_num: if not isinstance(template_fields_col_num[entry], int): disqualify = ('' if disqualify is None else disqualify + '| ') disqualify = disqualify + 'Non-integer value was provided for the map file\'s {} parameter.'.format( entry) if disqualify is None: row_num = 0 for row in self.lineList: row_num += 1 if row_num <= self.header_row_num: continue cur_aliquot_id = row[aliquot_id_col_num - 1] cur_fields = copy.deepcopy(template_fields_col_num) cur_raw_file_path = file_path # cur_raw_file_name = raw_file_name # combine path of the data file for the current row of mapping file for fld_name in cur_fields: fld_val = row[cur_fields[fld_name] - 1] cur_raw_file_path = cur_raw_file_path.replace( '{' + fld_name + '}', fld_val) # print (str(Path(data_loc) / cur_raw_file_path)) files = glob.glob(str(Path(data_loc) / cur_raw_file_path)) if files: for file in files: if not cur_aliquot_id in self.map: self.map[cur_aliquot_id] = [] self.map[cur_aliquot_id].append(file) return disqualify
def process_download_inquiries(): # load main config file and get required values m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both configs were loaded, update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # print ('m_cfg = {}'.format(m_cfg.cfg)) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new inquiry files will be posted inquiries_loc = m_cfg.get_value('Location/inquiries') gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing inquiry files will be stored # (one file per inquiry) gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path') # path to the folder where all processed (and renamed) inquiries will be stored gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value( 'Location/inquiries_processed_relative_path') # get config setting for the processed_add_datestamp and save it to global const module processed_add_datestamp = m_cfg.get_value( 'Location/processed_add_datestamp') if processed_add_datestamp: gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp # path to the folder where created submission packages will be located. One package sub_folder per inquiry. gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests') # path to dir with dynamically created inquiry files for disqualified aliquots gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified_path') log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME # this variable define if Data Downloader app will be executed at the end of processing inquiries run_data_download = m_cfg.get_value('Execute/run_data_downloader') # path to the Data Downloader tool gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path') prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] # email_attchms = [] inquiries_path = Path(inquiries_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] mlog.info( 'Start processing download inquiries in "{}"'.format(inquiries_path)) try: (root, source_inq_dirs, _) = next(walk(inquiries_path)) inq_proc_cnt = 0 errors_present = 'OK' for inq_dir in source_inq_dirs: source_inquiry_path = Path(root) / inq_dir mlog.info( 'Selected for processing inquiry source: "{}", full path: {}'. format(inq_dir, source_inquiry_path)) (_, _, inq_files) = next(walk(source_inquiry_path)) # filter only excel files for processing as inquiries inquiries = [ fl for fl in inq_files if fl.endswith(('xlsx', 'xls')) ] # filter out temp files (starting with '~$') created when an excel file is open inquiries = [fl for fl in inquiries if not fl.startswith('~$')] mlog.info('Inquiry files presented (count = {}): "{}"'.format( len(inquiries), inquiries)) for inq_file in inquiries: inq_path = Path(source_inquiry_path) / inq_file # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(inq_path)) mlog.info('The following Inquiry file was selected: "{}".'. format(inq_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) inq_obj = Inquiry(inq_path, m_cfg) if inq_obj and inq_obj.loaded: # proceed processing inquiry mlog.info('Inquiry file was successfully loaded.') mlog.info( 'Starting processing Download Inquiry file: "{}".'. format(inq_path)) inq_obj.process_inquiry() mlog.info( 'Processing of Download Inquiry was finished for {}' .format(inq_path)) inq_proc_cnt += 1 # identify if any errors were identified and set status variable accordingly if not inq_obj.error.exist(): if not inq_obj.disqualified_items: # no disqualified sub-aliquots present fl_status = 'OK' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) # errors_present = 'OK' # this variable is set to OK by default, no update needed else: # some disqualified sub-aliquots are presetn fl_status = 'OK_with_Disqualifications' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \ .format(fl_status, inq_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) else: mlog.warning(_str) processed_dir = inq_obj.processed_folder # 'Processed' # combine the name of the processed file inq_processed_name = fl_status + '_' + str( inq_file).replace(' ', '_').replace('__', '_') if gc.PROCESSED_ADD_DATESTAMP: inq_processed_name = ts + '_' + inq_processed_name # move processed files to Processed folder fl_processed_name = cm.move_file_to_processed( inq_path, inq_processed_name, processed_dir, inq_obj.logger, inq_obj.error) if fl_processed_name: mlog.info( 'Processed file "{}" was moved(renamed) to: "{}"'. format(inq_path, processed_dir / fl_processed_name)) else: errors_present = errors_present + '|MoveProcessedError' mlog.warning( 'Moving the processed file "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( inq_path, inq_obj.log_handler.baseFilename)) # preps for email notification # create a dictionary to feed into template for preparing an email body template_feeder = { 'file_num': inq_proc_cnt, 'file_path': str(inq_path), 'file_path_new': (str(processed_dir / fl_processed_name) if processed_dir and fl_processed_name else None), 'inq_obj_errors_cnt': inq_obj.error.count, 'log_file_path': inq_obj.log_handler.baseFilename, 'dld_request_file_path': str(inq_obj.download_request_path), 'inq_sources': inq_obj.inq_sources, 'inq_match_aliquots': inq_obj.inq_match_arr, 'inq_disqul_aliquots': inq_obj.disqualified_items, 'inq_disqul_reprocess_path': str(inq_obj.disqualified_inquiry_path) } email_body_part = cm.populate_email_template( 'processed_inquiry.html', template_feeder) email_msgs.append(email_body_part) # deactivate the current Inquiry logger deactivate_logger_common(inq_obj.logger, inq_obj.log_handler) inq_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, inq_path, traceback.format_exc())) raise mlog.info('Number of successfully processed Inquiries = {}'.format( inq_proc_cnt)) # start Data Download request if proper config setting was provided dd_status = {'status': '', 'message': ''} if run_data_download: # start process mlog.info( 'Starting asynchronously Data Downloader app: "{}".'.format( gc.DATA_DOWNLOADER_PATH)) try: dd_process = cm.start_external_process_async( gc.DATA_DOWNLOADER_PATH) # check if it is running dd_status = cm.check_external_process(dd_process) mlog.info( 'Status of running Data Downloader app: "{}".'.format( dd_status)) except Exception as ex: # report unexpected error during starting Data Downloader _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \ .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc()) mlog.critical(_str) dd_status = {'status': 'Error', 'message': _str} mlog.info('Preparing to send notificatoin email.') email_to = m_cfg.get_value('Email/send_to_emails') email_subject = 'processing of download inquiry. ' if inq_proc_cnt > 0: # inquiries and len(inquiries) > 0: # collect final details and send email about this study results err_present = errors_present.split( '|' ) # get all statuses into an array; 1st element is the main status if err_present: # set email subject based on the main status err_present[0] if err_present[0] == 'OK': email_subject = 'SUCCESSFUL ' + email_subject elif err_present[0] == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject if len(err_present) > 1: if err_present[1] == 'MoveProcessedError': email_subject = email_subject + ' Error moving inquiry to processed.' if dd_status and 'status' in dd_status.keys( ) and dd_status['status'].lower() == 'error': email_subject = email_subject + ' Errors starting Data Downloader.' # create a dictionary to feed into template for preparing an email body template_feeder = { 'inq_cnt': inq_proc_cnt, 'run_data_download': run_data_download, 'downloader_path': gc.DATA_DOWNLOADER_PATH, 'downloader_start_status': dd_status['status'].lower(), 'processed_details': '<br/>'.join(email_msgs) } email_body = cm.populate_email_template('processed_inquiries.html', template_feeder) # remove return characters from the body of the email, to keep just clean html code email_body = email_body.replace("\r", "") email_body = email_body.replace("\n", "") # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) mlog.info( 'Sending a status email with subject "{}" to "{}".'.format( email_subject, email_to)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=email_to, subject=email_subject, message=email_body # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) mlog.info( 'End of processing of download inquiries in "{}".'.format( inquiries_path)) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()
def __init__(self, study_cfg): self.cfg = ConfigData(gc.CONFIG_FILE_MAIN) # obj_cfg self.s_conn = self.cfg.get_item_by_key(gc.CFG_DB_CONN).strip() self.study_cfg = study_cfg
class SubmissionForm: def __init__(self, form_name, request, sub_aliquot, aliquot, sample, form_file_name_id=None): self.form_name = form_name if not form_file_name_id: form_file_name_id = form_name self.form_file_name_id = form_file_name_id self.req_obj = request # reference to the current request object self.sub_aliquot = sub_aliquot self.aliquot = aliquot self.sample = sample self.error = self.req_obj.error self.logger = self.req_obj.logger self.conf_assay = request.conf_assay self.fl_json = None self.fl_json_schema = None self.fl_cfg_common = None self.fl_cfg_assay = None # self.fl_cfg_dict = None self.prepare_form(form_name) def prepare_form(self, form_name): forms_location = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + self.req_obj.project) # identify paths for json and config (yaml) files fl_path_json_common = forms_location / (form_name + '.json') fl_path_json_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.json') fl_path_json_schema = forms_location / (form_name + '_schema.json') fl_path_cfg_common = forms_location / (form_name + '.yaml') # fl_path_json_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.json') # fl_path_json_assay = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_' + # str(self.req_obj.assay).lower() + '.json') # fl_path_json_schema = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_schema.json') # fl_path_cfg_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.yaml') # check the value assigned to the current request's data_source_forms_assignment # and select assay config file accordingly if self.req_obj.data_source_forms_assignment == 'file': fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.yaml') elif self.req_obj.data_source_forms_assignment == 'db': fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '_db.yaml') else: # data_source_forms_assignment = 'db' will be treated as a default assignment fl_path_cfg_assay = forms_location / ( form_name + '_' + str(self.req_obj.assay).lower() + '.yaml') # check if assay specific json exists; if yes - use it, if not - use common one if cm.file_exists(fl_path_json_assay): fl_path_json = fl_path_json_assay else: fl_path_json = fl_path_json_common # load json and config files self.fl_json = FileJson(fl_path_json, self.req_obj.error, self.req_obj.logger) self.fl_json_schema = FileJson(fl_path_json_schema, self.req_obj.error, self.req_obj.logger) self.fl_cfg_common = ConfigData(fl_path_cfg_common) self.fl_cfg_assay = ConfigData(fl_path_cfg_assay) # self.fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY) # print(self.fl_json.json_data) # loop through all json keys and fill those with associated data self.get_json_keys(self.fl_json.json_data) # print(self.fl_json.json_data) # validate final json file against json schema (if present) self.validate_json(self.fl_json, self.fl_json_schema) def get_json_keys(self, json_node, parent_keys=''): for key, val in json_node.items(): # TODO: add functionality to handle JSON arrays (if those are needed) if isinstance(val, dict): if parent_keys: cur_parents = '/'.join([parent_keys, key]) else: cur_parents = key self.get_json_keys(val, cur_parents) else: if parent_keys: full_key_name = '/'.join([parent_keys, key]) else: full_key_name = key # json_node[key] = 'print("{}")'.format(full_key_name) # json_node[key] = eval(json_node[key]) # print("JSON file - {} : {}".format(full_key_name, val)) # val # json_node[key] # print("Config Common - {} = {}".format(key, self.fl_cfg_common.get_value(key))) # print("Config Assay - {} = {}".format(key, self.fl_cfg_assay.get_value(key))) val = self.eval_cfg_value( full_key_name, self.fl_cfg_assay.get_value(full_key_name), self.fl_cfg_common.get_value(full_key_name)) if str(val).strip() == '': # if returned value is blank, create a warning in the log file self.logger.warning( 'Blank value was reported for field "{}" '.format( full_key_name)) # check if the assigned value is a special expected blank value that don't need to be reported in log if str(val).strip( ) == gc.SUBMISSION_FORM_EXPECTED_BLANK_VALUE: # '!!blank!!' json_node[key] = '' self.logger.info( 'Field "{}" was assigned with the expected blank ("") value' .format(key)) else: # assign retrieved key back to associated json key json_node[key] = val self.logger.info( 'Field "{}" was assigned with "{}" value'.format( key, val)) # print(key, '==>', json_node[key]) pass def eval_cfg_value(self, key, assay_cfg_val, common_cfg_val): # if assay config key is not provided, use common assay val if assay_cfg_val: cfg_val = assay_cfg_val else: cfg_val = common_cfg_val eval_flag = gc.SUBMISSION_YAML_EVAL_FLAG # 'eval!' # check if some configuration instruction/key was retrieved for the given "key" if cfg_val: if eval_flag in str(cfg_val): cfg_val = cfg_val.replace(eval_flag, '') # replace 'eval!' flag key try: out_val = eval(cfg_val) except Exception as ex: _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \ 'while attempting to interpret configuration key "{}" provided for the form\'s key ' \ '"{}". \n{} ' \ .format(ex, self.form_name, self.sub_aliquot, cfg_val, key, traceback.format_exc()) self.logger.error(_str) self.error.add_error(_str) out_val = '' else: out_val = cfg_val else: # requested "key" does not exist neither in assay or common config files _str = 'No value was assigned to "{}" key during preparing submission form "{}" for sub-aliquot "{}".' \ .format(key, self.form_name, self.sub_aliquot) self.logger.warning(_str) out_val = '' return out_val def get_tarball_property(self, sa, val_type): value = '' if self.req_obj.attachments: tar_obj = self.req_obj.attachments.aliquots_tarball_dict[sa] if tar_obj: if val_type == 'name': value = os.path.basename(tar_obj['path']) elif val_type == 'md5': value = tar_obj['md5'] return value # it will retrieve any existing property_val from the request object def get_request_value(self, property_name, check_dict=False): return self.get_property_value_from_object(self.req_obj, property_name, check_dict) # it will retrieve any existing property_val from the submission_form object def get_submission_form_value(self, property_name, check_dict=False): return self.get_property_value_from_object(self, property_name, check_dict) # it will retrieve any existing property_val from rawdata object def get_rawdata_value(self, property_name, check_dict=False): # return self.get_property_value_from_object(self.req_obj.raw_data.aliquots_data_dict[self.sub_aliquot], # property_name, check_dict, 'dict') return self.get_sourcedata_value('rawdata', property_name, check_dict) # it will retrieve any existing property_val from assay data object def get_assaydata_value_by_col_number(self, col_num, check_dict=False): # obj = list(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot].items()) # val = self.get_property_value_from_object(obj, col_num - 1, check_dict, 'dict', 'number') # if isinstance(val, tuple): # return val[1] # else: # return val return self.get_sourcedata_value_by_col_number('assaydata', col_num, check_dict) # it will retrieve any existing property_val from assay data object def get_assaydata_value(self, property_name, check_dict=False): # return self.get_property_value_from_object(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot], # property_name, check_dict, 'dict') return self.get_sourcedata_value('assaydata', property_name, check_dict) # it will retrieve any existing property_val (specified by the name) from the data source object # specified by the data_source_name def get_sourcedata_value(self, data_source_name, property_name, check_dict=False): if data_source_name in self.req_obj.data_source_names: return self.get_property_value_from_object( self.req_obj.data_source_objects[data_source_name]. aliquots_data_dict[self.sub_aliquot], property_name, check_dict, 'dict') else: _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \ '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot) self.logger.error(_str) self.error.add_error(_str) return '#ERROR#' # it will retrieve any existing property_val (specified by the column number) from the data source object # specified by the data_source_name def get_sourcedata_value_by_col_number(self, data_source_name, col_num, check_dict=False): if data_source_name in self.req_obj.data_source_names: obj = list(self.req_obj.data_source_objects[data_source_name]. aliquots_data_dict[self.sub_aliquot].items()) val = self.get_property_value_from_object(obj, col_num - 1, check_dict, 'dict', 'number') if isinstance(val, tuple): return val[1] else: return val else: _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \ '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot) self.logger.error(_str) self.error.add_error(_str) return '#ERROR#' # it will retrieve a key of a property_val named in "property_val" parameter # from the object passed as a reference in "obj" parameter # obj_type possible values: "class" (type of "obj" is class), # "dict" (type of "obj" is dictionary) # property_type possible values: "name" ("property_val" is name of property_val), # "number" ("property_val" is number of items in dictionary) # noinspection PyUnusedLocal def get_property_value_from_object(self, obj, property_val, check_dict=False, obj_type='class', property_type='name'): property_val = str(property_val) if property_type == 'name': # if property_val name is given, proceed here if obj_type == 'class': get_item = 'obj.' + property_val + ' if hasattr(obj, "' + property_val + '") else ""' elif obj_type == 'dict': get_item = 'obj["' + property_val + '"] if "' + property_val + '" in obj else ""' else: get_item = None else: # if column number is given, proceed here get_item = 'obj[' + property_val + ']' try: out = eval(get_item) if check_dict: out = cm2.get_dict_value(out, property_val) except Exception as ex: _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \ 'while attempting to evaluate property_val: "{}". \n{} ' \ .format(ex, self.form_name, self.sub_aliquot, get_item, traceback.format_exc()) self.logger.error(_str) self.error.add_error(_str) out = '' return out # converts an array of values (i.e. list of aliquots) in to list of dictionaries with a given key name # For example: [1, 2, 3] => [{name: 1}, {name: 2}, {name: 3}] @staticmethod def convert_simple_list_to_list_of_dict(sm_arr, key_name): out = [] for a in sm_arr: dict_ob = {key_name: a} out.append(dict_ob) return out def validate_json(self, json_file, schema_file): try: validate(json_file.json_data, schema_file.json_data) _str = 'Validation of "{}" against "{}" was successful.'.format( json_file.filepath, schema_file.filepath) self.logger.info(_str) except jsonschema.exceptions.ValidationError as ve: _str = 'Validation of "{}" file against schema "{}" failed with the following error: \n{}' \ .format(json_file.filepath, schema_file.filepath, ve) self.logger.error(_str) self.error.add_error(_str)
def match_inquiry_items_to_sources(self): cur_row = -1 for inq_line in self.lines_arr: cur_row += 1 # increase row counter if cur_row == self.header_row_num - 1: continue # program_code = str(inq_line[0]) # get program code that must be a first column program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace( '{program}', program_code) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) # print (inq_study_path) bulk_location = self.get_inquiry_value_by_field_name( 'bulk_location', inq_line, False) assay = self.get_inquiry_value_by_field_name('assay', inq_line) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) # inq_study_path = '/'.join([program_code, bulk_location, assay]) inq_study_path = self.conf_main.get_value( 'Destination/study_path_template') inq_study_path = inq_study_path.replace('{program_code}', program_code) inq_study_path = inq_study_path.replace('{bulk_location}', bulk_location) inq_study_path = inq_study_path.replace('{assay}', assay) # check if current sub-aliquot is not part of disqualified items array if self.disqualified_items and sub_al in self.disqualified_items.keys( ): # if sub-aliquot was disqualifed already, skip this line continue # identify aliquot for the given sub-aliquot al = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) # identify aliquot for the current inquiry line match = False # get reference to the Datasource object assigned to the current row if cur_row in self.inq_line_sources: cur_source = self.inq_sources[self.inq_line_sources[cur_row]] else: # if the data source was not assigned to the current row, skip the row using this datasource cur_source = None continue # check if any source types were disqualified during loading the datasource if cur_source.disqualified_data_sources: # if at least one source of the datasource was disqualified, skip the row using this datasource # and disqualify the current sub-aliquot as well self.disqualify_inquiry_item( sub_al, 'Datasource associated with this aliquot_id was marked as disqualified.', inq_line) continue # get a copy of the source type ids of the current datasource; # it will track number of items found for each source type cur_source_types = copy.deepcopy(cur_source.source_types) # loop through items of the source for src_item in cur_source.source_content_arr: match_out = False # attempt match by the sub-aliquot match_out, match_details = \ self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if sub-aliquot match was not success, attempt to match by the aliquot elif src_item['aliquot_match']: match_out, match_details = \ self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if a match was found using one of the above methods, record the item to inq_match_arr if match_out: # since a match was found, verify that the source path is accessible (except for web locations) web_loc = src_item['web_location'] # real_path = os.path.realpath(src_item['path']) # real path of the current item if web_loc or not web_loc and os.path.exists( src_item['path']): item_details = { 'sub-aliquot': sub_al, 'study': inq_study_path, # 'source': src_item, 'source_item_name': src_item['name'], 'target_subfolder': src_item['target_subfolder'], 'real_path': src_item['path'], 'target_copied_item_name': src_item['target_copied_item_name'], 'match_details': match_details, 'source_type_id': src_item['source_type_id'], 'obj_type': src_item['obj_type'], 'source_name_generic': cur_source.source_name_generic } self.inq_match_arr.append(item_details) # record the source type id of an item to track quantity of found matches for each source type cur_source_types[ src_item['source_type_id']]['items_count'] += 1 else: self.disqualify_inquiry_item( sub_al, 'A match was found, but the identified source path is not accessible. Match details: {}. ' 'Source path: "{}". Real source path: "{}".'. format(match_details, src_item['path'], src_item['path']), inq_line) # report if no match was found and # verify that a match was found for each of the source types of the current datasource if not match: # no matches were found for the current datasource self.disqualify_inquiry_item( sub_al, 'No matching items (files/folders) were found in the current data source.', inq_line) else: if not cur_source.allow_nomatch_per_sourcetype: # some matches were found; verify that a match was found for each of the source types for src_type in cur_source_types: if cur_source_types[src_type]['items_count'] == 0: # no matches were found for this source type self.disqualify_inquiry_item( sub_al, 'No matches were found for the "{}" source type id in the datasource.' .format(src_type), inq_line)
def __init__(self, cfg_monitor_path, log_obj): self.action_completed = False self.status = [] self.mtr_cfg_path = cfg_monitor_path self.log = log_obj self.error = MonitorError(self) self.mtr_cfg = ConfigData(cfg_monitor_path) if self.validate_config_file(): self.loaded = True else: self.loaded = False cur_cfg_dir = os.path.dirname(cfg_monitor_path) cur_cfg_file_name = Path(os.path.abspath(cfg_monitor_path)).name stamp_dir = Path(str(cur_cfg_dir) + '/' + gc.STAMPS_FILES_FOLDER_NAME) if not os.path.exists(stamp_dir): os.mkdir(stamp_dir) stamp_file = Path( str(stamp_dir) + '/' + cur_cfg_file_name.replace('.yaml', '_stamp.yaml')) self.verify_config_stamp_file(stamp_file) self.mtr_cfg_stamp = ConfigData(stamp_file) self.mtr_source = None self.mtr_source_path = None if self.loaded: # get config file values self.mtr_source_dir = Path( cm.eval_cfg_value( self.mtr_cfg.get_value('Location/source_dir'), self.log, None)) self.mtr_source_file = Path( self.mtr_cfg.get_value('Location/source_file')) found_files = cm.find_file_in_dir(self.mtr_source_dir, self.mtr_source_file, False) if found_files: ff_stamp = None for file_match in found_files: if not ff_stamp or ff_stamp < os.stat( Path(self.mtr_source_dir) / file_match).st_mtime: ff_stamp = os.stat( Path(self.mtr_source_dir) / file_match).st_mtime self.mtr_source = file_match # self.mtr_source = found_files[0] self.mtr_source_path = Path( self.mtr_source_dir) / self.mtr_source # else: # self.mtr_source = None # self.mtr_source_path = None self.mtr_destin = self.mtr_cfg.get_value('Location/destination') self.mtr_item = self.mtr_cfg.get_value('Monitoring/item') self.mtr_type = self.mtr_cfg.get_value('Monitoring/type') self.mtr_action = self.mtr_cfg.get_value('Monitoring/action') self.mtr_frequency = self.mtr_cfg.get_value('Monitoring/frequency') # self.mtr_email = self.mtr_cfg.get_value('Monitoring/email_notification') # self.mtr_email_cc = self.mtr_cfg.get_value('Monitoring/email_cc') # load stamp info from stamp config file self.mtr_sync_date = self.mtr_cfg_stamp.get_value( 'Last_sync/date_time') self.mtr_watch_value = self.mtr_cfg_stamp.get_value( 'Last_sync/watch_value')
def __init__(self, filepath, conf_main=None, file_type=2, sheet_name=''): # load_configuration (main_cfg_obj) # load global and local configureations File.__init__(self, filepath, file_type) self.sheet_name = sheet_name # .strip() if conf_main: self.conf_main = conf_main else: self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) self.error = InquiryError(self) self.log_handler = None self.logger = self.setup_logger(self.wrkdir, self.filename) self.logger.info( 'Start working with Download Inquiry file {}'.format(filepath)) self.inq_match_arr = [] self.columns_arr = [] self.inq_sources = {} self.inq_line_sources = {} # load common for all programs dictionary config self.conf_dict = DictConfigData(gc.CONFIG_FILE_DICTIONARY) if not self.conf_dict.loaded: # disqualify the current inquiry file _str = 'Aborting processing of the inquiry file - the following common dictionary config file cannot ' \ 'be loaded: {}.'.format(gc.CONFIG_FILE_MAIN) self.error.add_error(_str) self.logger.error(_str) return # save inquiry file structure into a dedicated variables self.file_structure_by_col_num = self.conf_dict.get_inqury_file_structure( 'by_col_num') self.file_structure_by_col_name = self.conf_dict.get_inqury_file_structure( 'by_col_name') self.processed_folder = gc.INQUIRY_PROCESSED_DIR # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(self.processed_folder): self.processed_folder = Path(self.wrkdir) / self.processed_folder else: self.processed_folder = Path(self.processed_folder) self.download_request_path = None self.disqualified_items = {} self.disqualified_inquiry_path = '' # will store path to a inquiry file with disqualified sub-aliquots if not self.sheet_name or len(self.sheet_name) == 0: # if sheet name was not passed as a parameter, try to get it from config file self.sheet_name = gc.INQUIRY_EXCEL_WK_SHEET_NAME # 'wk_sheet_name' # print (self.sheet_name) self.logger.info('Data will be loaded from worksheet: "{}"'.format( self.sheet_name)) self.conf_process_entity = None self.db_access = DBAccess(self.logger, self.conf_main, self.error) self.get_file_content()
class MetadataDB: # CFG_DB_CONN = 'DB/mdb_conn_str' # name of the config parameter storing DB connection string # CFG_DB_SQL_PROC = 'DB/mdb_sql_proc_load_sample' # name of the config parameter storing DB name of the stored proc # CFG_DB_STUDY_ID = 'DB/mdb_study_id' # name of the config parameter storing key of the MDB study id # CFG_DICT_PATH = 'DB/dict_tmpl_fields_node' # name of the config parameter storing key of dictionary path # to list of fields # CFG_DB_ALLOW_DICT_UPDATE = 'DB/mdb_allow_dict_update' # name of the config parameter storing values # for "allow dict updates" # CFG_DB_ALLOW_SAMPLE_UPDATE = 'DB/mdb_allow_sample_update' # name of the config parameter storing values # for "allow sample updates" s_conn = '' conn = None def __init__(self, study_cfg): self.cfg = ConfigData(gc.CONFIG_FILE_MAIN) # obj_cfg self.s_conn = self.cfg.get_item_by_key(gc.CFG_DB_CONN).strip() self.study_cfg = study_cfg def open_connection(self): self.conn = pyodbc.connect(self.s_conn, autocommit=True) def submit_row(self, row, file): # sample_id, row_json, dict_json, filepath): dict_json = file.get_file_dictionary_json(True) filepath = str(file.filepath) sample_id = row.sample_id row_json = row.to_json() if not self.conn: self.open_connection() str_proc = self.cfg.get_item_by_key(gc.CFG_DB_SQL_PROC).strip() study_id = self.study_cfg.get_item_by_key(gc.CFG_DB_STUDY_ID).strip() dict_path = '$.' + self.study_cfg.get_item_by_key( gc.CFG_DICT_PATH).strip() dict_upd = self.study_cfg.get_item_by_key( gc.CFG_DB_ALLOW_DICT_UPDATE).strip() sample_upd = self.study_cfg.get_item_by_key( gc.CFG_DB_ALLOW_SAMPLE_UPDATE).strip() # prepare stored proc string to be executed str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_STUDY_ID), study_id) # '{study_id}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_ID), sample_id) # '{sample_id}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_ROW_JSON), row_json) # '{smpl_json}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_JSON), dict_json) # '{dict_json}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_PATH), dict_path) # '{dict_path}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_FILEPATH), filepath) # '{filepath}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_UPD), dict_upd) # '{dict_update}' str_proc = str_proc.replace( self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_UPD), sample_upd) # '{samlpe_update}' # get currrent file_processing_log file.logger.debug('SQL Procedure call = {}'.format(str_proc)) # print ('procedure (str_proc) = {}'.format(str_proc)) try: cursor = self.conn.cursor() cursor.execute(str_proc) # returned recordsets rs_out = [] rows = cursor.fetchall() columns = [column[0] for column in cursor.description] results = [] for row in rows: results.append(dict(zip(columns, row))) rs_out.append(results) return rs_out except Exception as ex: # report an error if DB call has failed. _str = 'Error "{}" occurred during submitting a row (sample_id = "{}") to database; ' \ 'used SQL script "{}". Here is the traceback: \n{} '.format( ex, sample_id, str_proc, traceback.format_exc()) row.error.add_error(_str) file.logger.error(_str)
def process_submission(): # load main config file m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both config were loaded, load update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new request files will be posted requests_loc = m_cfg.get_value('Location/requests') gc.DISQUALIFIED_REQUESTS = m_cfg.get_value( 'Location/requests_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing request files will be stored # (one file per request) gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs') # path to the folder where all processed (and renamed) requests will be stored gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed') # path to the folder where created submission packages will be located. One package sub_folder per request. # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages') # tarball approach to be used for the current deployment gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach') # flag to save calculated md5sum to a physical file gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file') # tarball ignore directories ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs') if ignore_dirs: # update default ignore_dirs value with the value from a config file gc.TARBALL_IGNORE_DIRS = ignore_dirs log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME processed_folder_name = gc.REQ_PROCESSED_DIR # gc.PROCESSED_FOLDER_NAME prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] email_attchms = [] transfers = [] # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles' requests_path = Path(requests_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] log_warnings = False mlog.info( 'Start processing submission requests in "{}"'.format(requests_path)) try: (_, _, requests) = next(walk(requests_path)) # print('Study requests: {}'.format(requests)) mlog.info( 'Submission requests to be processed (count = {}): {}'.format( len(requests), requests)) req_proc_cnt = 0 errors_present = 'OK' req_path = '' # '~$' should filter out temp file created when excel is open requests = [file for file in requests if not file.startswith('~$')] for req_file in requests: if req_file.endswith(('xlsx', 'xls')): req_path = Path(requests_path) / req_file # transfer_path = '' # set a default transfer path transfer_details = { 'transfer_path': '', 'request_file': req_file, 'process_handler': None, 'return_code': None, 'return_status': None } # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(req_path)) mlog.info( 'Request file {} was selected for processing.'.format( req_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) req_obj = Request(req_path, m_cfg) if req_obj and req_obj.loaded: # proceed processing request mlog.info( 'Submission request loading status: Success. Submission request file: "{}".' .format(req_path)) mlog.info( 'Loading local and project related configs for processing the request.' ) req_obj.load_request_configuration() if not req_obj.error.exist(): mlog.info( 'Local config files were loaded with no errors, proceeding to process ' 'the request file.') req_obj.process_request() else: mlog.info( 'Errors were reported during loading local config files. Aborting processing ' 'this request.') mlog.info( 'Processing of Submission request was finished for {}' .format(req_path)) req_proc_cnt += 1 # print (req_obj.logger._cache) if hasattr(req_obj.logger, '_cache' ): #verify that _cache attribute is present # check if any warning were recorded to the log file and set a flag log_warnings if 30 in req_obj.logger._cache and req_obj.logger._cache[ 30]: log_warnings = True # else: # log_warnings = False else: mlog.warning( 'The current logger object has no "_cache" attribute - thus cannot determine ' 'if any Warnings were reported during the process.' ) # identify if any errors were identified and set status variable accordingly if not req_obj.error.exist(): if not req_obj.disqualified_sub_aliquots: # no disqualified sub-aliquots present if not log_warnings: fl_status = 'OK' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) # errors_present = 'OK' else: fl_status = 'OK with Warnings' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) else: # some disqualified sub-aliquots are presetn fl_status = 'OK with Disqualifications' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this request: {}' \ .format(fl_status, req_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: # update transfer details dictionary with the path to the transfer file transfer_details['transfer_path'] = \ Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh' transfers.append( transfer_details ) # add transfer details to transfers list mlog.info( 'Since the last request was processed with "{}" status and transfer on ' 'completion was requested ("--execute_transfer" argument was set to "yes"), ' 'the following path was put in queue for execution: ' '{}'.format(fl_status, transfer_details['transfer_path'])) else: mlog.warning(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: mlog.info( 'The transfer on completion request ("--execute_transfer" argument was set to ' '"yes") will be ignored since the last request was processed with "{}" status.' .format(fl_status)) processed_dir = Path(processed_folder_name) req_processed_name = ts + '_' + fl_status + '_' + req_file file_name_new_path = cm.move_file_to_processed( req_path, req_processed_name, processed_dir, req_obj.logger, req_obj.error) if file_name_new_path: mlog.info( 'Processed Submission request "{}" was moved and renamed as: "{}"' .format(req_path, processed_dir / req_processed_name)) else: mlog.warning( 'Moving the processed request "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( req_path, req_obj.log_handler.baseFilename)) # deactivate the current Request logger deactivate_logger_common(req_obj.logger, req_obj.log_handler) if req_obj.submission_package and req_obj.submission_package.submission_dir: # save transfer path to a local variable transfer_path = Path( req_obj.submission_package.submission_dir ) / 'transfer_script.sh' else: transfer_path = None # preps for email notification email_msgs.append(( '-------------------------------------<br/>' 'Requested project: {}'.format(req_obj.project) + '<br/>Requested Experiment: {}.'.format( req_obj.experiment_id) + ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.' .format(req_path, processed_dir / req_processed_name) if file_name_new_path else '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due ' 'to some errors</font> reported in the request\'s log file.' .format(req_path)) + '<br/> <b>Errors summary:</b> {}' '<br/> <b>Warning(s) reported:</b> {}' '<br/> <i>Log file location: <br/>{}</i>' '<br/> Submission package locatoin:<br/>{}' '<br/> Data source locatoin:<br/>{}' '<br/> Processed Aliquots:<br/>{}' '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}' '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}' '<br/> Automatic data transferring: {}' '<br/> Command line to run data transferring manually: <br/> {}' ''.format( '<font color="red">Check Errors in the log file.</font>' if req_obj.error.exist() else '<font color="green">No Errors</font> ', '<font color="red">Yes - check the log file.</font>' if log_warnings else 'No', req_obj.log_handler.baseFilename, req_obj.submission_package.submission_dir if req_obj.submission_package else 'N/A', req_obj.attachments.data_loc if req_obj.attachments else 'N/A', req_obj.qualified_aliquots if req_obj.qualified_aliquots else 'None', [ val for val in req_obj.disqualified_sub_aliquots.keys() ] if req_obj.disqualified_sub_aliquots else 'None', req_obj.disqualified_request_path, '<font color="green">Performed.</font> ' 'Additional email should be sent upon data transfer completion.' if len( str(transfer_details['transfer_path']).strip()) > 0 else 'Not performed.', str( Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh') if req_obj.submission_package else 'N/A'))) email_attchms.append(req_obj.log_handler.baseFilename) # print ('email_msgs = {}'.format(email_msgs)) req_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, req_path, traceback.format_exc())) raise mlog.info('Number of processed Submission requests = {}'.format( req_proc_cnt)) if req_proc_cnt > 0: # collect final details and send email about this study results email_subject = 'processing of Submission Requests ' if errors_present == 'OK': if not log_warnings: email_subject = 'SUCCESSFUL ' + email_subject else: email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject elif errors_present == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject email_body = ( 'Number of requests processed: {}.'.format(req_proc_cnt) + '<br/><br/>' + '<br/><br/>'.join(email_msgs)) # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=m_cfg.get_value('Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) # perform transfers, if anything qualifies for it if transfers and len(transfers) > 0: transfer_status_checking_delay = m_cfg.get_value( 'General/transfer_status_checking_delay') if transfer_status_checking_delay and str( transfer_status_checking_delay).isnumeric(): if transfer_status_checking_delay > 0: pass else: transfer_status_checking_delay = None else: transfer_status_checking_delay = None mlog.info( 'Starting processing requested transfers. Total count: {} transfers.' .format(len(transfers))) # process all collected transfer requests cm.process_transfers(transfers, mlog, transfer_status_checking_delay) # assess results of the transfer processing transfer_ok = 0 transfer_err = 0 transfer_nd = 0 for transfer in transfers: if transfer['return_status']: if transfer['return_status'][:2] == 'OK': transfer_ok += 1 elif transfer['return_status'][:5] == 'ERROR': transfer_err += 1 else: transfer_nd += 1 else: transfer_nd += 1 _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \ 'transfer(s)'.format(transfer_ok, transfer_err) if transfer_nd > 0: _str = _str + ', "ND" - {}'.format(transfer_nd) mlog.info(_str) # send email with the status of the transfers if transfers and len(transfers) > 0: if transfer_err > 0: email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)' else: email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)' email_transfer_msgs = [] for transfer in transfers: email_transfer_msgs.append( ('Transfer process for the request file: "{}" ' '<br/>Transfer script file:<br/>{}' '<br/>Completion status:<br/>{}'.format( transfer['request_file'], transfer['transfer_path'], transfer['return_status']))) email_body = ( 'Summary of transfer of prepared submissions:' '<br/>Total count of completed transfers: {}. ' '<br/>Status "OK": {} transfer(s)' '<br/>Status "ERROR": {} transfer(s)' '<br/>Status "Not Defined": {} transfer(s)' '<br/><br/>The following are details for each performed transfer:' '<br/><br/>'.format( len(transfers), '<font color="green">' + str(transfer_ok) + '</font>' if transfer_ok > 0 else transfer_ok, '<font color="red">' + str(transfer_err) + '</font>' if transfer_err > 0 else transfer_err, transfer_nd) + '<br/><br/>'.join(email_transfer_msgs)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail(emails_to=m_cfg.get_value( 'Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing automated transfers. \n{} '\ .format(ex, traceback.format_exc()) mlog.critical(_str) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()