def load_project_config_into_main(self, project): # load project specific "project_config" config file cfg_project = ConfigData( gc.CONFIG_FILE_PROJECT.replace('{project}', project)) if cfg_project.loaded: # if cfg_project was loaded, update it with the environment specific settings (from project_location config) cfg_project_updated = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_PROJECT_LOCATION, self.project, cfg_project.get_whole_dictionary()) # update main config with the outcome of the previous updates self.conf_main.update(cfg_project_updated)
def update_cfg_dictionary_with_location_details(self, location_path, project, cfg_to_update): cfg_location = ConfigData(location_path.replace('{project}', project)) if cfg_location.loaded: self.logger.info( 'Local config file "{}" was loaded and being used.'.format( cfg_location.cfg_path)) cfg_to_update = cm.update_dictionary_matching_keys( cfg_to_update, cfg_location.get_whole_dictionary()) else: _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ .format(cfg_location.cfg_path) self.logger.error(_str) self.error.add_error(_str) return cfg_to_update
def process_submission(): # load main config file m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both config were loaded, load update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new request files will be posted requests_loc = m_cfg.get_value('Location/requests') gc.DISQUALIFIED_REQUESTS = m_cfg.get_value( 'Location/requests_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing request files will be stored # (one file per request) gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs') # path to the folder where all processed (and renamed) requests will be stored gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed') # path to the folder where created submission packages will be located. One package sub_folder per request. # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages') # tarball approach to be used for the current deployment gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach') # flag to save calculated md5sum to a physical file gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file') # tarball ignore directories ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs') if ignore_dirs: # update default ignore_dirs value with the value from a config file gc.TARBALL_IGNORE_DIRS = ignore_dirs log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME processed_folder_name = gc.REQ_PROCESSED_DIR # gc.PROCESSED_FOLDER_NAME prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] email_attchms = [] transfers = [] # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles' requests_path = Path(requests_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] log_warnings = False mlog.info( 'Start processing submission requests in "{}"'.format(requests_path)) try: (_, _, requests) = next(walk(requests_path)) # print('Study requests: {}'.format(requests)) mlog.info( 'Submission requests to be processed (count = {}): {}'.format( len(requests), requests)) req_proc_cnt = 0 errors_present = 'OK' req_path = '' # '~$' should filter out temp file created when excel is open requests = [file for file in requests if not file.startswith('~$')] for req_file in requests: if req_file.endswith(('xlsx', 'xls')): req_path = Path(requests_path) / req_file # transfer_path = '' # set a default transfer path transfer_details = { 'transfer_path': '', 'request_file': req_file, 'process_handler': None, 'return_code': None, 'return_status': None } # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(req_path)) mlog.info( 'Request file {} was selected for processing.'.format( req_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) req_obj = Request(req_path, m_cfg) if req_obj and req_obj.loaded: # proceed processing request mlog.info( 'Submission request loading status: Success. Submission request file: "{}".' .format(req_path)) mlog.info( 'Loading local and project related configs for processing the request.' ) req_obj.load_request_configuration() if not req_obj.error.exist(): mlog.info( 'Local config files were loaded with no errors, proceeding to process ' 'the request file.') req_obj.process_request() else: mlog.info( 'Errors were reported during loading local config files. Aborting processing ' 'this request.') mlog.info( 'Processing of Submission request was finished for {}' .format(req_path)) req_proc_cnt += 1 # print (req_obj.logger._cache) if hasattr(req_obj.logger, '_cache' ): #verify that _cache attribute is present # check if any warning were recorded to the log file and set a flag log_warnings if 30 in req_obj.logger._cache and req_obj.logger._cache[ 30]: log_warnings = True # else: # log_warnings = False else: mlog.warning( 'The current logger object has no "_cache" attribute - thus cannot determine ' 'if any Warnings were reported during the process.' ) # identify if any errors were identified and set status variable accordingly if not req_obj.error.exist(): if not req_obj.disqualified_sub_aliquots: # no disqualified sub-aliquots present if not log_warnings: fl_status = 'OK' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) # errors_present = 'OK' else: fl_status = 'OK with Warnings' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) else: # some disqualified sub-aliquots are presetn fl_status = 'OK with Disqualifications' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this request: {}' \ .format(fl_status, req_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: # update transfer details dictionary with the path to the transfer file transfer_details['transfer_path'] = \ Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh' transfers.append( transfer_details ) # add transfer details to transfers list mlog.info( 'Since the last request was processed with "{}" status and transfer on ' 'completion was requested ("--execute_transfer" argument was set to "yes"), ' 'the following path was put in queue for execution: ' '{}'.format(fl_status, transfer_details['transfer_path'])) else: mlog.warning(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: mlog.info( 'The transfer on completion request ("--execute_transfer" argument was set to ' '"yes") will be ignored since the last request was processed with "{}" status.' .format(fl_status)) processed_dir = Path(processed_folder_name) req_processed_name = ts + '_' + fl_status + '_' + req_file file_name_new_path = cm.move_file_to_processed( req_path, req_processed_name, processed_dir, req_obj.logger, req_obj.error) if file_name_new_path: mlog.info( 'Processed Submission request "{}" was moved and renamed as: "{}"' .format(req_path, processed_dir / req_processed_name)) else: mlog.warning( 'Moving the processed request "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( req_path, req_obj.log_handler.baseFilename)) # deactivate the current Request logger deactivate_logger_common(req_obj.logger, req_obj.log_handler) if req_obj.submission_package and req_obj.submission_package.submission_dir: # save transfer path to a local variable transfer_path = Path( req_obj.submission_package.submission_dir ) / 'transfer_script.sh' else: transfer_path = None # preps for email notification email_msgs.append(( '-------------------------------------<br/>' 'Requested project: {}'.format(req_obj.project) + '<br/>Requested Experiment: {}.'.format( req_obj.experiment_id) + ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.' .format(req_path, processed_dir / req_processed_name) if file_name_new_path else '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due ' 'to some errors</font> reported in the request\'s log file.' .format(req_path)) + '<br/> <b>Errors summary:</b> {}' '<br/> <b>Warning(s) reported:</b> {}' '<br/> <i>Log file location: <br/>{}</i>' '<br/> Submission package locatoin:<br/>{}' '<br/> Data source locatoin:<br/>{}' '<br/> Processed Aliquots:<br/>{}' '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}' '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}' '<br/> Automatic data transferring: {}' '<br/> Command line to run data transferring manually: <br/> {}' ''.format( '<font color="red">Check Errors in the log file.</font>' if req_obj.error.exist() else '<font color="green">No Errors</font> ', '<font color="red">Yes - check the log file.</font>' if log_warnings else 'No', req_obj.log_handler.baseFilename, req_obj.submission_package.submission_dir if req_obj.submission_package else 'N/A', req_obj.attachments.data_loc if req_obj.attachments else 'N/A', req_obj.qualified_aliquots if req_obj.qualified_aliquots else 'None', [ val for val in req_obj.disqualified_sub_aliquots.keys() ] if req_obj.disqualified_sub_aliquots else 'None', req_obj.disqualified_request_path, '<font color="green">Performed.</font> ' 'Additional email should be sent upon data transfer completion.' if len( str(transfer_details['transfer_path']).strip()) > 0 else 'Not performed.', str( Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh') if req_obj.submission_package else 'N/A'))) email_attchms.append(req_obj.log_handler.baseFilename) # print ('email_msgs = {}'.format(email_msgs)) req_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, req_path, traceback.format_exc())) raise mlog.info('Number of processed Submission requests = {}'.format( req_proc_cnt)) if req_proc_cnt > 0: # collect final details and send email about this study results email_subject = 'processing of Submission Requests ' if errors_present == 'OK': if not log_warnings: email_subject = 'SUCCESSFUL ' + email_subject else: email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject elif errors_present == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject email_body = ( 'Number of requests processed: {}.'.format(req_proc_cnt) + '<br/><br/>' + '<br/><br/>'.join(email_msgs)) # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=m_cfg.get_value('Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) # perform transfers, if anything qualifies for it if transfers and len(transfers) > 0: transfer_status_checking_delay = m_cfg.get_value( 'General/transfer_status_checking_delay') if transfer_status_checking_delay and str( transfer_status_checking_delay).isnumeric(): if transfer_status_checking_delay > 0: pass else: transfer_status_checking_delay = None else: transfer_status_checking_delay = None mlog.info( 'Starting processing requested transfers. Total count: {} transfers.' .format(len(transfers))) # process all collected transfer requests cm.process_transfers(transfers, mlog, transfer_status_checking_delay) # assess results of the transfer processing transfer_ok = 0 transfer_err = 0 transfer_nd = 0 for transfer in transfers: if transfer['return_status']: if transfer['return_status'][:2] == 'OK': transfer_ok += 1 elif transfer['return_status'][:5] == 'ERROR': transfer_err += 1 else: transfer_nd += 1 else: transfer_nd += 1 _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \ 'transfer(s)'.format(transfer_ok, transfer_err) if transfer_nd > 0: _str = _str + ', "ND" - {}'.format(transfer_nd) mlog.info(_str) # send email with the status of the transfers if transfers and len(transfers) > 0: if transfer_err > 0: email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)' else: email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)' email_transfer_msgs = [] for transfer in transfers: email_transfer_msgs.append( ('Transfer process for the request file: "{}" ' '<br/>Transfer script file:<br/>{}' '<br/>Completion status:<br/>{}'.format( transfer['request_file'], transfer['transfer_path'], transfer['return_status']))) email_body = ( 'Summary of transfer of prepared submissions:' '<br/>Total count of completed transfers: {}. ' '<br/>Status "OK": {} transfer(s)' '<br/>Status "ERROR": {} transfer(s)' '<br/>Status "Not Defined": {} transfer(s)' '<br/><br/>The following are details for each performed transfer:' '<br/><br/>'.format( len(transfers), '<font color="green">' + str(transfer_ok) + '</font>' if transfer_ok > 0 else transfer_ok, '<font color="red">' + str(transfer_err) + '</font>' if transfer_err > 0 else transfer_err, transfer_nd) + '<br/><br/>'.join(email_transfer_msgs)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail(emails_to=m_cfg.get_value( 'Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing automated transfers. \n{} '\ .format(ex, traceback.format_exc()) mlog.critical(_str) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()
def match_inquiry_items_to_sources(self): cur_row = -1 for inq_line in self.lines_arr: cur_row += 1 # increase row counter if cur_row == self.header_row_num - 1: continue # program_code = str(inq_line[0]) # get program code that must be a first column program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace( '{program}', program_code) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) # print (inq_study_path) bulk_location = self.get_inquiry_value_by_field_name( 'bulk_location', inq_line, False) assay = self.get_inquiry_value_by_field_name('assay', inq_line) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) # inq_study_path = '/'.join([program_code, bulk_location, assay]) inq_study_path = self.conf_main.get_value( 'Destination/study_path_template') inq_study_path = inq_study_path.replace('{program_code}', program_code) inq_study_path = inq_study_path.replace('{bulk_location}', bulk_location) inq_study_path = inq_study_path.replace('{assay}', assay) # check if current sub-aliquot is not part of disqualified items array if self.disqualified_items and sub_al in self.disqualified_items.keys( ): # if sub-aliquot was disqualifed already, skip this line continue # identify aliquot for the given sub-aliquot al = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) # identify aliquot for the current inquiry line match = False # get reference to the Datasource object assigned to the current row if cur_row in self.inq_line_sources: cur_source = self.inq_sources[self.inq_line_sources[cur_row]] else: # if the data source was not assigned to the current row, skip the row using this datasource cur_source = None continue # check if any source types were disqualified during loading the datasource if cur_source.disqualified_data_sources: # if at least one source of the datasource was disqualified, skip the row using this datasource # and disqualify the current sub-aliquot as well self.disqualify_inquiry_item( sub_al, 'Datasource associated with this aliquot_id was marked as disqualified.', inq_line) continue # get a copy of the source type ids of the current datasource; # it will track number of items found for each source type cur_source_types = copy.deepcopy(cur_source.source_types) # loop through items of the source for src_item in cur_source.source_content_arr: match_out = False # attempt match by the sub-aliquot match_out, match_details = \ self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if sub-aliquot match was not success, attempt to match by the aliquot elif src_item['aliquot_match']: match_out, match_details = \ self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al) if match_out: match = True # if a match was found using one of the above methods, record the item to inq_match_arr if match_out: # since a match was found, verify that the source path is accessible (except for web locations) web_loc = src_item['web_location'] # real_path = os.path.realpath(src_item['path']) # real path of the current item if web_loc or not web_loc and os.path.exists( src_item['path']): item_details = { 'sub-aliquot': sub_al, 'study': inq_study_path, # 'source': src_item, 'source_item_name': src_item['name'], 'target_subfolder': src_item['target_subfolder'], 'real_path': src_item['path'], 'target_copied_item_name': src_item['target_copied_item_name'], 'match_details': match_details, 'source_type_id': src_item['source_type_id'], 'obj_type': src_item['obj_type'], 'source_name_generic': cur_source.source_name_generic } self.inq_match_arr.append(item_details) # record the source type id of an item to track quantity of found matches for each source type cur_source_types[ src_item['source_type_id']]['items_count'] += 1 else: self.disqualify_inquiry_item( sub_al, 'A match was found, but the identified source path is not accessible. Match details: {}. ' 'Source path: "{}". Real source path: "{}".'. format(match_details, src_item['path'], src_item['path']), inq_line) # report if no match was found and # verify that a match was found for each of the source types of the current datasource if not match: # no matches were found for the current datasource self.disqualify_inquiry_item( sub_al, 'No matching items (files/folders) were found in the current data source.', inq_line) else: if not cur_source.allow_nomatch_per_sourcetype: # some matches were found; verify that a match was found for each of the source types for src_type in cur_source_types: if cur_source_types[src_type]['items_count'] == 0: # no matches were found for this source type self.disqualify_inquiry_item( sub_al, 'No matches were found for the "{}" source type id in the datasource.' .format(src_type), inq_line)
def process_inquiry_sources(self): cur_row = 0 for inq_line in self.lines_arr: if cur_row == self.header_row_num - 1: # skip the header row cur_row += 1 continue # get program code assigned to the current row program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # get assay assigned to the current row assay = self.get_inquiry_value_by_field_name('assay', inq_line) # get source id assigned to the current row source_id = self.get_inquiry_value_by_field_name( 'source_id', inq_line) # get source config file # 2 values are saved in tuple: program name specific path and default one. # if program name specific path does not exist, the default will be used cfg_source_path = ( # configuration path for the current program by name gc.CONFIG_FILE_SOURCE_PATH\ .replace('{program}', program_code)\ .replace('{assay}', assay)\ .replace('{source_id}', source_id), # configuration path for the default program (used if no program specific path is present) gc.CONFIG_FILE_SOURCE_PATH \ .replace('{program}', 'default') \ .replace('{assay}', assay) \ .replace('{source_id}', source_id) ) # get the source location config file path cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace( '{source_id}', source_id) # attempt to load configuration for the program specific path cfg_source = ConfigData(Path(cfg_source_path[0])) if not cfg_source.loaded: # if config was not loaded from the program specific path, load the default one cfg_source = ConfigData(Path(cfg_source_path[1])) if cfg_source.loaded: # proceed here if the source config was loaded # load source location config with location specific settings for the current source cfg_source_location = ConfigData( Path(cfg_source_location_path)) if cfg_source_location.loaded: # if the source location config was loaded, update cfg_source config with the source location config cfg_source.update( cfg_source_location.get_whole_dictionary()) # get unique id of the datasource and check if the same id was used already, reuse that in such case inq_line_datasource_id = self.get_inquiry_line_datasource_id( inq_line) self.logger.info( 'Current inquiry row #{} was identified with the following data source id: {}' .format(cur_row, inq_line_datasource_id)) # assign source id (inq_line_datasource_id) to the current inquiry line self.inq_line_sources[cur_row] = inq_line_datasource_id if inq_line_datasource_id in self.inq_sources: # reuse existing datasource self.logger.info( 'Identified data source id for the current inquiry row #{} was identified as ' 'earlier retrieved one (for this or another row) and will be re-used for ' 'the current row.'.format(cur_row)) else: # create a new datasource object inq_line_datasource = DataSource(self, cfg_source, inq_line, inq_line_datasource_id) self.inq_sources[ inq_line_datasource_id] = inq_line_datasource else: sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \ 'None of the expected to exist files is accessible: {}'\ .format(cur_row, sub_al, ' | '.join(cfg_source_path)) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, cur_row ) # TODO: verify if inq_line should be used instead of curr_row cur_row += 1 pass
def validate_inquiry_file(self): self.logger.info( 'Start validating the current inquiry file "{}".'.format( self.filepath)) row_count = 1 failed_cnt = 0 valid_aliquot_flag = self.conf_main.get_value( 'Validate/aliquot_id_vs_manifest') valid_inquiry_values_flag = self.conf_main.get_value( 'Validate/inquiry_values_vs_dictionary') inquiry_min_number_columns = self.conf_main.get_value( 'Validate/inquiry_min_number_columns') inquiry_validate_number_columns = self.conf_main.get_value( 'Validate/inquiry_validate_number_columns') if not inquiry_min_number_columns or not isinstance( inquiry_min_number_columns, int): inquiry_min_number_columns = 6 # set a default value if it is not provided in the config file if not inquiry_validate_number_columns or not isinstance( inquiry_validate_number_columns, int): inquiry_validate_number_columns = 6 # set a default value if it is not provided in the config file for row in self.lines_arr: if row_count == self.header_row_num: # 1 # skip the first column as it is a header row_count += 1 continue sub_al = 'ND' # set blank value as default assay = '' # set blank value as default valid_aliquot_performed = False skip_final_check = False # check if inquiry file contain min number of columns if len(row) < inquiry_min_number_columns: # disqualify the current inquiry file _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \ .format(len(row), inquiry_min_number_columns) self.error.add_error(_str) self.logger.error(_str) return # create a local DictConfigData object and copy there a dictionary object conf_dict = DictConfigData(None, self.conf_dict.get_dictionary_copy()) # get sub-aliquot value before looping through all fields, so it can be used for reporting errors # also get program_code assigned to the row program_code = self.get_inquiry_value_by_field_name( 'program_code', row) sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', row, False) # validate program_code value if conf_dict.key_exists_in_dict( str(program_code).lower(), 'program_code'): # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\ .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code')) conf_dict_program = ConfigData(conf_dict_program_path) conf_dict.update(conf_dict_program.get_whole_dictionary()) else: _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \ .format(program_code, row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True if not skip_final_check: # go through fields and validate the provided values for i in range(len(row)): if i + 1 > inquiry_validate_number_columns: # if number of columns in the inquiry file > expected maximum, exit the loop break col_category = conf_dict.get_dict_value( str(i + 1), 'inquiry_file_structure') if col_category in ('program_code', 'sub-aliquot'): # no checking is needed for the listed field, proceed further continue elif col_category == 'db_center_id': # get center id value and validate it db_center_id = row[i] # validate center_code or center_id value self.logger.info( 'Start validation of center value "{}" provided for the current row' .format(db_center_id)) db = DBAccess(self.logger, self.conf_main, self.error) # create DBAccess object db.open_connection() # test center value assuming center code was provided dataset = db.validate_center_code( db_center_id, program_code, 'code', 'code') _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_code') if center_id_out1: # center id was returned, meaning center was validated fine db_center_id = center_id_out1 else: # if center code was not validated at first attempt, validate it assuming the center id was given dataset = db.validate_center_code( db_center_id, program_code, 'id', 'code') _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_id') if center_id_out2: # center id was validated at the 2nd attempt, ignore the 1st validation attempt db_center_id = center_id_out2 else: # center validation attempts failed, report both failures _str = 'Provided center value cannot be interpreted neither as code nor id; ' \ 'here are both validation outcomes: ' + \ ' | '.join([_str_err_out1, _str_err_out2]) self.logger.warning(_str) self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True break # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value if valid_aliquot_flag: # aliquot id validation is required valid_aliquot_performed = True # flag that aliquot validation was done if isinstance(db_center_id, int): # db_center_id.isnumeric(): # since center is numeric, proceed here # get aliquot id based on the verified earlier assay value and given sub_aliquot id aliquot = conf_dict.convert_sub_aliq_to_aliquot( sub_al, assay) valid_status, valid_desc = self.db_access.validate_aliquot_id( aliquot, db_center_id) if valid_status != 'OK': # disqualify an inquiry file row, if returned status is not OK _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \ 'of the database. DB response => Status: "{}"; Description: "{}".'\ .format(aliquot, row_count, valid_status, valid_desc) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break else: # report unexpected center id value _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \ 'critical error because this value is required (based on the configuration setting ' \ '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \ .format(db_center_id, row_count, i + 1, sub_al) self.logger.warning(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 skip_final_check = True # break else: self.logger.info( 'Validating of the provided aliquot_id "{}" is not required based on the ' 'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".' .format(sub_al, valid_aliquot_flag)) else: if col_category == 'assay': assay = row[i].strip().lower( ) # save assay value to a dedicated variable if valid_inquiry_values_flag: # if validation of the inquiry values vs dictionary is required validate_values = [] validate_categories = [] if col_category == 'bulk_location': # get inquiry_file_structure_bulk_location value bulk_value_delim = conf_dict.get_dict_value( 'inquiry_file_structure_bulk_location_delim', '') validate_values = str( row[i]).split(bulk_value_delim) validate_categories = conf_dict.get_dict_object( 'inquiry_file_structure_bulk_location', '') else: validate_values.append(str(row[i]).lower()) validate_categories.append(col_category) for vv, vc in zip(validate_values, validate_categories): if not conf_dict.key_exists_in_dict( vv.lower(), vc): if col_category == 'bulk_location': _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \ 'the "bulk_location" value (line #{}, column #{})' \ .format(vv, vc, row_count, i + 1) else: _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\ .format(vv, vc, row_count, i+1) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item( sub_al, _str, row) failed_cnt += 1 skip_final_check = True break if skip_final_check: break # check that if aliquot validation is required it was actually performed if not skip_final_check: if valid_aliquot_flag and not valid_aliquot_performed: _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \ '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \ 'was not provided). ' \ .format(row_count) self.logger.critical(_str) # disqualify an inquiry file row, if unexpected value was provided self.disqualify_inquiry_item(sub_al, _str, row) failed_cnt += 1 row_count += 1 self.logger.info('Finish validating the inquiry file with{}.'.format( ' no errors' if failed_cnt == 0 else ' errors; {} records were disqualified - see earlier log entries for details' .format(failed_cnt)))
def process_download_inquiries(): # load main config file and get required values m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both configs were loaded, update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # print ('m_cfg = {}'.format(m_cfg.cfg)) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new inquiry files will be posted inquiries_loc = m_cfg.get_value('Location/inquiries') gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing inquiry files will be stored # (one file per inquiry) gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path') # path to the folder where all processed (and renamed) inquiries will be stored gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value( 'Location/inquiries_processed_relative_path') # get config setting for the processed_add_datestamp and save it to global const module processed_add_datestamp = m_cfg.get_value( 'Location/processed_add_datestamp') if processed_add_datestamp: gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp # path to the folder where created submission packages will be located. One package sub_folder per inquiry. gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests') # path to dir with dynamically created inquiry files for disqualified aliquots gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified_path') log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME # this variable define if Data Downloader app will be executed at the end of processing inquiries run_data_download = m_cfg.get_value('Execute/run_data_downloader') # path to the Data Downloader tool gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path') prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] # email_attchms = [] inquiries_path = Path(inquiries_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] mlog.info( 'Start processing download inquiries in "{}"'.format(inquiries_path)) try: (root, source_inq_dirs, _) = next(walk(inquiries_path)) inq_proc_cnt = 0 errors_present = 'OK' for inq_dir in source_inq_dirs: source_inquiry_path = Path(root) / inq_dir mlog.info( 'Selected for processing inquiry source: "{}", full path: {}'. format(inq_dir, source_inquiry_path)) (_, _, inq_files) = next(walk(source_inquiry_path)) # filter only excel files for processing as inquiries inquiries = [ fl for fl in inq_files if fl.endswith(('xlsx', 'xls')) ] # filter out temp files (starting with '~$') created when an excel file is open inquiries = [fl for fl in inquiries if not fl.startswith('~$')] mlog.info('Inquiry files presented (count = {}): "{}"'.format( len(inquiries), inquiries)) for inq_file in inquiries: inq_path = Path(source_inquiry_path) / inq_file # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(inq_path)) mlog.info('The following Inquiry file was selected: "{}".'. format(inq_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) inq_obj = Inquiry(inq_path, m_cfg) if inq_obj and inq_obj.loaded: # proceed processing inquiry mlog.info('Inquiry file was successfully loaded.') mlog.info( 'Starting processing Download Inquiry file: "{}".'. format(inq_path)) inq_obj.process_inquiry() mlog.info( 'Processing of Download Inquiry was finished for {}' .format(inq_path)) inq_proc_cnt += 1 # identify if any errors were identified and set status variable accordingly if not inq_obj.error.exist(): if not inq_obj.disqualified_items: # no disqualified sub-aliquots present fl_status = 'OK' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) # errors_present = 'OK' # this variable is set to OK by default, no update needed else: # some disqualified sub-aliquots are presetn fl_status = 'OK_with_Disqualifications' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \ .format(fl_status, inq_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) else: mlog.warning(_str) processed_dir = inq_obj.processed_folder # 'Processed' # combine the name of the processed file inq_processed_name = fl_status + '_' + str( inq_file).replace(' ', '_').replace('__', '_') if gc.PROCESSED_ADD_DATESTAMP: inq_processed_name = ts + '_' + inq_processed_name # move processed files to Processed folder fl_processed_name = cm.move_file_to_processed( inq_path, inq_processed_name, processed_dir, inq_obj.logger, inq_obj.error) if fl_processed_name: mlog.info( 'Processed file "{}" was moved(renamed) to: "{}"'. format(inq_path, processed_dir / fl_processed_name)) else: errors_present = errors_present + '|MoveProcessedError' mlog.warning( 'Moving the processed file "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( inq_path, inq_obj.log_handler.baseFilename)) # preps for email notification # create a dictionary to feed into template for preparing an email body template_feeder = { 'file_num': inq_proc_cnt, 'file_path': str(inq_path), 'file_path_new': (str(processed_dir / fl_processed_name) if processed_dir and fl_processed_name else None), 'inq_obj_errors_cnt': inq_obj.error.count, 'log_file_path': inq_obj.log_handler.baseFilename, 'dld_request_file_path': str(inq_obj.download_request_path), 'inq_sources': inq_obj.inq_sources, 'inq_match_aliquots': inq_obj.inq_match_arr, 'inq_disqul_aliquots': inq_obj.disqualified_items, 'inq_disqul_reprocess_path': str(inq_obj.disqualified_inquiry_path) } email_body_part = cm.populate_email_template( 'processed_inquiry.html', template_feeder) email_msgs.append(email_body_part) # deactivate the current Inquiry logger deactivate_logger_common(inq_obj.logger, inq_obj.log_handler) inq_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, inq_path, traceback.format_exc())) raise mlog.info('Number of successfully processed Inquiries = {}'.format( inq_proc_cnt)) # start Data Download request if proper config setting was provided dd_status = {'status': '', 'message': ''} if run_data_download: # start process mlog.info( 'Starting asynchronously Data Downloader app: "{}".'.format( gc.DATA_DOWNLOADER_PATH)) try: dd_process = cm.start_external_process_async( gc.DATA_DOWNLOADER_PATH) # check if it is running dd_status = cm.check_external_process(dd_process) mlog.info( 'Status of running Data Downloader app: "{}".'.format( dd_status)) except Exception as ex: # report unexpected error during starting Data Downloader _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \ .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc()) mlog.critical(_str) dd_status = {'status': 'Error', 'message': _str} mlog.info('Preparing to send notificatoin email.') email_to = m_cfg.get_value('Email/send_to_emails') email_subject = 'processing of download inquiry. ' if inq_proc_cnt > 0: # inquiries and len(inquiries) > 0: # collect final details and send email about this study results err_present = errors_present.split( '|' ) # get all statuses into an array; 1st element is the main status if err_present: # set email subject based on the main status err_present[0] if err_present[0] == 'OK': email_subject = 'SUCCESSFUL ' + email_subject elif err_present[0] == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject if len(err_present) > 1: if err_present[1] == 'MoveProcessedError': email_subject = email_subject + ' Error moving inquiry to processed.' if dd_status and 'status' in dd_status.keys( ) and dd_status['status'].lower() == 'error': email_subject = email_subject + ' Errors starting Data Downloader.' # create a dictionary to feed into template for preparing an email body template_feeder = { 'inq_cnt': inq_proc_cnt, 'run_data_download': run_data_download, 'downloader_path': gc.DATA_DOWNLOADER_PATH, 'downloader_start_status': dd_status['status'].lower(), 'processed_details': '<br/>'.join(email_msgs) } email_body = cm.populate_email_template('processed_inquiries.html', template_feeder) # remove return characters from the body of the email, to keep just clean html code email_body = email_body.replace("\r", "") email_body = email_body.replace("\n", "") # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) mlog.info( 'Sending a status email with subject "{}" to "{}".'.format( email_subject, email_to)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=email_to, subject=email_subject, message=email_body # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) mlog.info( 'End of processing of download inquiries in "{}".'.format( inquiries_path)) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()