class Request(File): def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''): # load_configuration (main_cfg_obj) # load global and local configureations File.__init__(self, filepath, file_type) if main_cfg: self.conf_main = main_cfg else: self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # if cfg_path=='': # self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN) # else: # self.conf_main = ConfigData(cfg_path) self.error = RequestError(self) self.log_handler = None self.logger = self.setup_logger(self.wrkdir, self.filename) self.logger.info( 'Start working with Submission request file {}'.format(filepath)) # self.file_dict = OrderedDict() # self.rows = OrderedDict() self.columnlist = [] self.samples = [] self.sub_aliquots = [] self.disqualified_sub_aliquots = {} self.aliquots_to_subaliquots_map = { } # holds the map of aliquots to sub-aliquots for interpreting DB responses self.disqualified_request_path = '' # will store path to a request file with disqualified sub-aliquots self.project = '' self.bulk_location = '' self.assay = '' self.center = '' self.center_id = None self.center_code = None self.experiment_id = '' self.data_source_names = '' self.data_source_objects = { } # dictionary to store all collected data sources for the request self.aliquots = None self.qualified_aliquots = None self.raw_data = None self.assay_data = None self.attachments = None self.submission_forms = None self.submission_package = None self.data_source_names = None # will hold value corresponding to the type of data source being used (attachments are not ignored) # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used self.data_source_forms_assignment = None # self.sheet_name = '' self.sheet_name = sheet_name.strip() if not self.sheet_name or len(self.sheet_name) == 0: # if sheet name was not passed as a parameter, try to get it from config file self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME # 'wk_sheet_name' # print (self.sheet_name) self.logger.info('Data will be loaded from worksheet: "{}"'.format( self.sheet_name)) self.conf_assay = None self.get_file_content() def get_file_content(self): if not self.columnlist: if cm.file_exists(self.filepath): self.logger.debug('Loading file content of "{}"'.format( self.filepath)) with xlrd.open_workbook(self.filepath) as wb: if not self.sheet_name or len(self.sheet_name) == 0: # by default retrieve the first sheet in the excel file sheet = wb.sheet_by_index(0) else: # if sheet name was provided sheets = wb.sheet_names() # get list of all sheets if self.sheet_name in sheets: # if given sheet name in the list of available sheets, load the sheet sheet = wb.sheet_by_name(self.sheet_name) else: # report an error if given sheet name not in the list of available sheets _str = ( 'Given worksheet name "{}" was not found in the file "{}". ' 'Verify that the worksheet name exists in the file.' ).format(self.sheet_name, self.filepath) self.error.add_error(_str) self.logger.error(_str) self.lineList = None self.loaded = False return self.lineList sheet.cell_value(0, 0) lines = [ ] # will hold content of the request file as an array of arrays (rows) for i in range(sheet.ncols): column = [] for j in range(sheet.nrows): if i == 0: lines.append( [] ) # adds an array for each new row in the request file # print(sheet.cell_value(i, j)) cell = sheet.cell(j, i) cell_value = cell.value # take care of number and dates received from Excel and converted to float by default if cell.ctype == 2 and int(cell_value) == cell_value: # the key is integer cell_value = str(int(cell_value)) elif cell.ctype == 2: # the key is float cell_value = str(cell_value) # convert date back to human readable date format # print ('cell_value = {}'.format(cell_value)) if cell.ctype == 3: cell_value_date = xlrd.xldate_as_datetime( cell_value, wb.datemode) cell_value = cell_value_date.strftime( "%Y-%m-%directory") column.append( cell_value ) # adds value to the current column array lines[j].append( '"' + str(cell_value) + '"' ) # adds value in "csv" format for a current row # self.columnlist.append(','.join(column)) self.columnlist.append( column) # adds a column to a list of columns # populate lineList property self.lineList = [] for ln in lines: self.lineList.append(','.join(ln)) wb.unload_sheet(sheet.name) # load passed request parameters (by columns) self.get_request_parameters() # validate provided information self.logger.info( 'Validating provided request parameters. project: "{}", bulk location: "{}", ' 'assay: "{}", db_center_code_or_id: "{}",' 'Sub-Aliquots: "{}"'.format(self.project, self.bulk_location, self.assay, self.center, self.sub_aliquots)) self.validate_request_params() if self.error.exist(): # report that errors exist self.loaded = False # print(self.error.count) # print(self.error.get_errors_to_str()) _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format( self.error.count, self.error.get_errors_to_str()) else: self.loaded = True _str = 'Request parameters were successfully validated - no errors found.' self.logger.info(_str) # combine Experiment_id out of request parameters if self.center_code and len(self.center_code.strip()) > 0: # use center code if available self.experiment_id = "_".join( [self.project, self.center_code, self.assay]) else: # use provided value for the center column from request, if center_code is not available self.experiment_id = "_".join( [self.project, self.center, self.assay]) else: _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format( self.filepath) self.error.add_error(_str) self.logger.error(_str) self.columnlist = None self.lineList = None self.loaded = False return self.lineList # get all values provided in the request file def get_request_parameters(self): self.project = self.columnlist[0][1] self.bulk_location = self.columnlist[1][1] self.assay = self.columnlist[2][1].lower() self.center = self.columnlist[3][ 1] # center code (if alpha numeric) or center id (if numeric) self.sub_aliquots = self.columnlist[4] if self.sub_aliquots and len(self.sub_aliquots) > 0: self.sub_aliquots.pop(0) # get rid of the column header # self.samples = self.columnlist[5] # if self.samples and len(self.samples) > 0: # self.samples.pop(0) # get rid of the column header # validates provided parameters (loaded from the submission request file) def validate_request_params(self): _str_err = '' _str_warn = '' if len(self.sub_aliquots) == 0: _str_err = '\n'.join([ _str_err, 'List of provided sub-samples is empty. ' 'Aborting processing of the submission request.' ]) # Check if empty sub-samples were provided if '' in self.sub_aliquots: i = 0 cleaned_cnt = 0 for s in self.sub_aliquots: # check for any empty sub-aliquot values and remove them if len(s.strip()) == 0: self.sub_aliquots.pop(i) cleaned_cnt += 1 else: i += 1 if cleaned_cnt > 0: _str_warn = '\n'.join([ _str_warn, 'Empty sub-aliqouts (count = {}) were removed from the list. ' 'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" ' .format(cleaned_cnt, len(self.sub_aliquots), self.sub_aliquots) ]) # check for empty values if len(self.project) == 0: _str_err = '\n'.join([ _str_err, 'No Program name was provided. Aborting processing of the submission request.' ]) if len(self.bulk_location) == 0: _str_err = '\n'.join([ _str_err, 'No Bulk Location was provided. Aborting processing of the submission request.' ]) if len(self.assay) == 0: _str_err = '\n'.join([ _str_err, 'No Assay was provided. Aborting processing of the submission request.' ]) if len(self.center) == 0: _str_err = '\n'.join([ _str_err, 'No DB Center information was provided. Aborting processing of the submission request.' ]) # check for values that should match some predefined values from a dictionary # check assay value if not cm2.key_exists_in_dict(self.assay, 'assay'): _str_err = '\n'.join([ _str_err, 'Provided Assay name "{}" is not matching a list of expected assay names ' '(as stored in "{}" dictionary file). ' 'Aborting processing of the submission request.'.format( self.assay, gc.CONFIG_FILE_DICTIONARY) ]) else: # if provided assay name is expected, convert it to the name expected by the Submission logic self.assay = cm2.get_dict_value(self.assay, 'assay') # check project value if not cm2.key_exists_in_dict(self.project.lower(), 'project'): _str_err = '\n'.join([ _str_err, 'Provided Program name "{}" is not matching a list of expected names ' '(as stored in "{}" dictionary file). ' 'Aborting processing of the submission request.'.format( self.project, gc.CONFIG_FILE_DICTIONARY) ]) else: # if provided assay name is expected, convert it to the name expected by the Submission logic self.project = cm2.get_dict_value(self.project.lower(), 'project') # validate center_code or center_id value self.logger.info( 'Start validation of center value "{}" provided in the request'. format(self.center)) db = DBAccess(self.logger, self.error, self.conf_main) # create DBAccess object db.open_connection() # test center value assuming center code was provided dataset = db.validate_center_code(self.center, self.project, 'code', 'code') _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_code') if center_id_out1: # center id was returned, meaning center was validated fine self.center_id = center_id_out1 # get center code value from the current DB dataset _str_err_out3, center_code = self.get_field_value_from_dataset( dataset, 'center_code') if center_code: # center code retrieved OK self.center_code = center_code else: # report an error during retrieving center_code _str_err = '\n'.join([_str_err, _str_err_out3]) else: # if center code was not validated at first attempt, validate it assuming the center id was given dataset = db.validate_center_code(self.center, self.project, 'id', 'code') _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome( dataset, 'center_id', 'center_id') if center_id_out2: # center id was validated at the 2nd attempt, ignore the 1st failed center code validation self.center_id = center_id_out2 # get center code value from the current DB dataset _str_err_out3, center_code = self.get_field_value_from_dataset( dataset, 'center_code') if center_code: # center code retrieved OK self.center_code = center_code else: # report an error during retrieving center_code _str_err = '\n'.join([_str_err, _str_err_out3]) else: # center validation attempts failed, report both failures _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2]) # get list of aliquots from list of sub-aliquots self.aliquots = [ cm2.convert_sub_aliq_to_aliquot(al, self.assay) for al in self.sub_aliquots ] # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots) for sa, a in zip(self.sub_aliquots, self.aliquots): self.aliquots_to_subaliquots_map[a] = sa if self.center_id: self.logger.info('Start validation of aliquot ids vs DB') # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB dataset = db.validate_aliquot_ids(self.center_id, self.aliquots) if dataset: # create dictionary of received aliquots/sample ids aliquots_to_samples_map = {} for row in dataset: if '_aliquot_id' in row and '_sample_id' in row: aliquots_to_samples_map[ row['_aliquot_id']] = row['_sample_id'] # check if each aliquot id was returned from a database and get the sample id from the dataset for sa, a in zip(self.sub_aliquots, self.aliquots): if a in aliquots_to_samples_map: if len(str(aliquots_to_samples_map[a]).strip()) > 0: self.samples.append(aliquots_to_samples_map[a]) else: _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \ 'The sub-aliquot was disqualified'.format(sa) self.disqualify_sub_aliquot(sa, _str) _str_warn = '\n'.join([_str_warn, _str]) else: _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format( sa) self.disqualify_sub_aliquot(sa, _str) _str_warn = '\n'.join([_str_warn, _str]) else: _str_err = '\n'.join([ _str_err, 'Aliquot ids cannot be validated since no data was returned from DB for ' 'center_id = "{}" and aliquot ids as following: {} '. format(self.center_id, self.aliquots) ]) db = None # report any collected errors if len(_str_err) > 0: _str_err = 'Validation of request parameters:' + _str_err self.error.add_error(_str_err) self.logger.error(_str_err) # report any collected warnings if len(_str_warn) > 0: _str_warn = 'Validation of request parameters:' + _str_warn self.logger.warning(_str_warn) def check_validation_dataset_outcome(self, dataset, validation_id_column, validation_id_name): _str_err = '' row_num = 1 validation_id_out = None if dataset: if len(dataset) >= row_num: row = dataset[row_num - 1] # get the first row of the dataset if 'status' in row: status = row['status'] if 'description' in row: description = row['description'] if validation_id_column in row: # center_id validation_id = row[validation_id_column] if status == 'OK': # validation was successful validation_id_out = validation_id elif status == 'Failed': # validation has failed _str_err = '\n'.join([ _str_err, 'Validation of the provided {} value vs DB has Failed, description: {}' .format(validation_id_name, description) ]) else: # unexpected status value was returned _str_err = '\n'.join([ _str_err, 'Validation of the provided {} value vs DB returned unexpected status {}' .format(validation_id_name, status) ]) else: _str_err = '\n'.join([ _str_err, 'Unexpected error was reported during validating {} in the DB. ' 'Check earlier entries in the log file.'.format( validation_id_name) ]) return _str_err, validation_id_out def get_field_value_from_dataset(self, dataset, field_name, row_num=None): # set default values if row_num is None: row_num = 1 # default row is #1 _str_err = '' value_out = None if dataset: if len(dataset) >= row_num: row = dataset[row_num - 1] if field_name in row: value_out = row[field_name] else: _str_err = '\n'.join([ _str_err, 'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. ' .format(field_name, row_num) ]) return _str_err, value_out def setup_logger(self, wrkdir, filename): # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) log_folder_name = gc.REQ_LOG_DIR # gc.LOG_FOLDER_NAME # m_logger_name = gc.MAIN_LOG_NAME # m_logger = logging.getLogger(m_logger_name) logger_name = gc.REQUEST_LOG_NAME logging_level = self.conf_main.get_value('Logging/request_log_level') # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): log_folder_path = Path(wrkdir) / log_folder_name else: log_folder_path = Path(log_folder_name) lg = setup_logger_common( logger_name, logging_level, log_folder_path, # Path(wrkdir) / log_folder_name, str(filename) + '_' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log') self.log_handler = lg['handler'] return lg['logger'] def load_request_configuration(self): # update main config file with the project/environmetn specific details from additional config files self.load_project_config_into_main( self.project ) # loads project specific config and merges it into main config # load project specific assay config file self.conf_assay = self.load_assay_conf(self.assay, self.project) if self.conf_assay: # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml self.conf_assay = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay) def process_request(self): self.data_source_names = cm.get_value_from_dictionary( 'data_sources', self.conf_assay) # self.conf_assay['data_sources'] # path to the folder where created submission packages will be located. # since this location can be provided in the project config file, this assignment is happening # after loading the project config gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value( 'Submission_location/output_packages') for data_source_name in self.data_source_names: # if isinstance(data_source_name, tuple) if isinstance(data_source_name, str): if data_source_name == 'attachment': self.attachments = Attachment(self) elif data_source_name[-3:] == "_db": self.data_source_objects[data_source_name] = DataSourceDB( self, data_source_name, data_source_name) if not self.data_source_forms_assignment: self.data_source_forms_assignment = 'db' else: self.data_source_objects[data_source_name] = DataSource( self, data_source_name, data_source_name) if not self.data_source_forms_assignment: self.data_source_forms_assignment = 'file' elif isinstance(data_source_name, tuple): if data_source_name[0][-3:] == "_db": self.data_source_objects[ data_source_name[0]] = DataSourceDB( self, data_source_name[0], data_source_name[1]) else: self.data_source_objects[data_source_name[0]] = DataSource( self, data_source_name[0], data_source_name[1]) else: self.logger.error( 'Provided data source name ({}) is of unexpected format and cannot be processed.' .format(data_source_name)) # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it # this a case when an assay submits only attachments and do not use any assay or QC data if not self.data_source_forms_assignment: self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT self.submission_package = SubmissionPackage(self) self.create_request_for_disqualified_sub_aliquots() self.create_trasfer_script_file() # check for errors and put final log entry for the request. if self.error.exist(): _str = 'Processing of the current request was finished with the following errors: {}\n'.format( self.error.get_errors_to_str()) self.logger.error(_str) else: _str = 'Processing of the current request was finished successfully.\n' self.logger.info(_str) def load_assay_conf(self, assay, project): assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project) cfg_assay = ConfigData(assay_cfg_path) assay_config = cfg_assay.get_value(assay.upper()) if assay_config: self.logger.info( "Configuration for the {} assay was loaded from the assay config file: {}. " .format(assay.upper(), assay_cfg_path)) else: _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \ "Aborting execution.".format(assay.upper(), assay_cfg_path) self.logger.error(_str) self.error.add_error(_str) return assay_config # def update_cfg_assay_with_location_details(self, project, cfg_assay): # cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project)) # if cfg_assay_location.loaded: # self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path)) # cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary()) # else: # _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ # .format(cfg_assay_location.cfg_path) # self.logger.error(_str) # self.error.add_error(_str) # return cfg_assay def update_cfg_dictionary_with_location_details(self, location_path, project, cfg_to_update): cfg_location = ConfigData(location_path.replace('{project}', project)) if cfg_location.loaded: self.logger.info( 'Local config file "{}" was loaded and being used.'.format( cfg_location.cfg_path)) cfg_to_update = cm.update_dictionary_matching_keys( cfg_to_update, cfg_location.get_whole_dictionary()) else: _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\ .format(cfg_location.cfg_path) self.logger.error(_str) self.error.add_error(_str) return cfg_to_update def load_project_config_into_main(self, project): # load project specific "project_config" config file cfg_project = ConfigData( gc.CONFIG_FILE_PROJECT.replace('{project}', project)) if cfg_project.loaded: # if cfg_project was loaded, update it with the environment specific settings (from project_location config) cfg_project_updated = self.update_cfg_dictionary_with_location_details( gc.CONFIG_FILE_PROJECT_LOCATION, self.project, cfg_project.get_whole_dictionary()) # update main config with the outcome of the previous updates self.conf_main.update(cfg_project_updated) def create_trasfer_script_file(self): self.logger.info("Start preparing transfer_script.sh file.") # path for the script file being created sf_path = Path(self.submission_package.submission_dir + "/transfer_script.sh") # get script file template with open('scripts/' + self.project + '/transfer_script.sh', 'r') as ft: scr_tmpl = ft.read() # update placeholders in the script with the actual values smtp_server = cm.get_environment_variable( self.conf_main.get_item_by_key('Email/smtp_server_env_name')) smtp_port = cm.get_environment_variable( self.conf_main.get_item_by_key('Email/smtp_server_port_env_name')) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port)) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!to_email!}", ','.join(self.conf_main.get_value("Email/sent_to_emails"))) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!from_email!}", self.conf_main.get_value("Email/default_from_email")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!send_email_flag!}", str(self.conf_main.get_value("Email/send_emails"))) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!cmd!}", self.conf_main.get_value("DataTransfer/transfer_command")) # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mp_cmd!}", self.conf_main.get_value("DataTransfer/mount_point_command")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mount_local_dir!}", self.conf_main.get_value("DataTransfer/mount_local_dir")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!mount_remote_dir!}", self.conf_main.get_value("DataTransfer/mount_remote_dir")) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir) scr_tmpl = cm.replace_value_in_string( scr_tmpl, "{!target_dir!}", self.conf_main.get_value("DataTransfer/remote_target_dir")) ssh_server = cm.get_environment_variable( self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name')) scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}", str(ssh_server)) # apply user name as the very last replacement statement, since it can be used as part of previous replacements ssh_user = cm.get_environment_variable( self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name')) scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}", str(ssh_user)) set_permissions = False set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis") if set_perm_value: try: exec_permission = eval(set_perm_value.strip()) set_permissions = True except Exception as ex: _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \ '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\ .format(ex, set_perm_value, traceback.format_exc()) self.logger.warning(_str) # self.error.add_error(_str) set_permissions = False with open(sf_path, "w") as sf: sf.write(scr_tmpl) if set_permissions: try: # if permissions to be set were retrieved from config file, set them here st = os.stat(sf_path) os.chmod(sf_path, st.st_mode | exec_permission) #stat.S_IXUSR except Exception as ex: _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \ '"{}". \n{} '\ .format(ex, set_perm_value, sf_path, traceback.format_exc()) self.logger.warning(_str) self.error.add_error(_str) else: _str = 'Permission setup was skipped for the transfer script file. ' \ 'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\ .format(set_perm_value) self.logger.warning(_str) self.logger.info("Finish preparing '{}' file.".format(sf_path)) def disqualify_sub_aliquot(self, sa, details): # adds a sub aliquots to the disctionary of disqualified sub_aliquots # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons if sa in self.disqualified_sub_aliquots.keys(): self.disqualified_sub_aliquots[sa].append(details) else: arr_details = [details] self.disqualified_sub_aliquots[sa] = arr_details self.logger.warning( 'Sub-aliquot "{}" was disqualified with the following details: "{}"' .format(sa, details)) def populate_qualified_aliquots(self): # reset self.qualified_aliquots array self.qualified_aliquots = [] #select only aliquots that were not disqualified for sa, a in zip(self.sub_aliquots, self.aliquots): if not sa in self.disqualified_sub_aliquots.keys(): self.qualified_aliquots.append(a) def create_request_for_disqualified_sub_aliquots(self): # proceed only if some disqualified sub-aliquots are present if self.disqualified_sub_aliquots: self.logger.info( "Start preparing a request file for disqualified sub-aliquots '{}'." .format([val for val in self.disqualified_sub_aliquots.keys()])) wb = xlwt.Workbook() # create empty workbook object sh = wb.add_sheet( 'Submission_Request' ) # sheet name can not be longer than 32 characters cur_row = 0 # first row for 0-based array cur_col = 0 # first col for 0-based array #write headers to the file headers = self.get_headers() for val in headers: sh.write(cur_row, cur_col, val) cur_col += 1 cur_row += 1 for sa in self.sub_aliquots: if sa in self.disqualified_sub_aliquots.keys(): sh.write(cur_row, 0, self.project) sh.write(cur_row, 1, self.bulk_location) sh.write(cur_row, 2, self.assay) sh.write(cur_row, 3, self.center) sh.write(cur_row, 4, sa) cur_row += 1 self.disqualified_request_path = Path( gc.DISQUALIFIED_REQUESTS + '/' + time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_reprocess_disqualified _' + Path(self.filename).stem + '.xls') # if DISQUALIFIED_REQUESTS folder does not exist, it will be created os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True) wb.save(str(self.disqualified_request_path)) self.logger.info( "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'." .format(str(self.disqualified_request_path)))
def process_inquiry_sources(self): cur_row = 0 for inq_line in self.lines_arr: if cur_row == self.header_row_num - 1: # skip the header row cur_row += 1 continue # get program code assigned to the current row program_code = self.get_inquiry_value_by_field_name( 'program_code', inq_line) # get assay assigned to the current row assay = self.get_inquiry_value_by_field_name('assay', inq_line) # get source id assigned to the current row source_id = self.get_inquiry_value_by_field_name( 'source_id', inq_line) # get source config file # 2 values are saved in tuple: program name specific path and default one. # if program name specific path does not exist, the default will be used cfg_source_path = ( # configuration path for the current program by name gc.CONFIG_FILE_SOURCE_PATH\ .replace('{program}', program_code)\ .replace('{assay}', assay)\ .replace('{source_id}', source_id), # configuration path for the default program (used if no program specific path is present) gc.CONFIG_FILE_SOURCE_PATH \ .replace('{program}', 'default') \ .replace('{assay}', assay) \ .replace('{source_id}', source_id) ) # get the source location config file path cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace( '{source_id}', source_id) # attempt to load configuration for the program specific path cfg_source = ConfigData(Path(cfg_source_path[0])) if not cfg_source.loaded: # if config was not loaded from the program specific path, load the default one cfg_source = ConfigData(Path(cfg_source_path[1])) if cfg_source.loaded: # proceed here if the source config was loaded # load source location config with location specific settings for the current source cfg_source_location = ConfigData( Path(cfg_source_location_path)) if cfg_source_location.loaded: # if the source location config was loaded, update cfg_source config with the source location config cfg_source.update( cfg_source_location.get_whole_dictionary()) # get unique id of the datasource and check if the same id was used already, reuse that in such case inq_line_datasource_id = self.get_inquiry_line_datasource_id( inq_line) self.logger.info( 'Current inquiry row #{} was identified with the following data source id: {}' .format(cur_row, inq_line_datasource_id)) # assign source id (inq_line_datasource_id) to the current inquiry line self.inq_line_sources[cur_row] = inq_line_datasource_id if inq_line_datasource_id in self.inq_sources: # reuse existing datasource self.logger.info( 'Identified data source id for the current inquiry row #{} was identified as ' 'earlier retrieved one (for this or another row) and will be re-used for ' 'the current row.'.format(cur_row)) else: # create a new datasource object inq_line_datasource = DataSource(self, cfg_source, inq_line, inq_line_datasource_id) self.inq_sources[ inq_line_datasource_id] = inq_line_datasource else: sub_al = self.get_inquiry_value_by_field_name( 'sub-aliquot', inq_line, False) _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \ 'None of the expected to exist files is accessible: {}'\ .format(cur_row, sub_al, ' | '.join(cfg_source_path)) self.logger.warning(_str) self.disqualify_inquiry_item( sub_al, _str, cur_row ) # TODO: verify if inq_line should be used instead of curr_row cur_row += 1 pass
def process_submission(): # load main config file m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both config were loaded, load update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new request files will be posted requests_loc = m_cfg.get_value('Location/requests') gc.DISQUALIFIED_REQUESTS = m_cfg.get_value( 'Location/requests_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing request files will be stored # (one file per request) gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs') # path to the folder where all processed (and renamed) requests will be stored gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed') # path to the folder where created submission packages will be located. One package sub_folder per request. # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages') # tarball approach to be used for the current deployment gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach') # flag to save calculated md5sum to a physical file gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file') # tarball ignore directories ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs') if ignore_dirs: # update default ignore_dirs value with the value from a config file gc.TARBALL_IGNORE_DIRS = ignore_dirs log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME processed_folder_name = gc.REQ_PROCESSED_DIR # gc.PROCESSED_FOLDER_NAME prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] email_attchms = [] transfers = [] # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles' requests_path = Path(requests_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] log_warnings = False mlog.info( 'Start processing submission requests in "{}"'.format(requests_path)) try: (_, _, requests) = next(walk(requests_path)) # print('Study requests: {}'.format(requests)) mlog.info( 'Submission requests to be processed (count = {}): {}'.format( len(requests), requests)) req_proc_cnt = 0 errors_present = 'OK' req_path = '' # '~$' should filter out temp file created when excel is open requests = [file for file in requests if not file.startswith('~$')] for req_file in requests: if req_file.endswith(('xlsx', 'xls')): req_path = Path(requests_path) / req_file # transfer_path = '' # set a default transfer path transfer_details = { 'transfer_path': '', 'request_file': req_file, 'process_handler': None, 'return_code': None, 'return_status': None } # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(req_path)) mlog.info( 'Request file {} was selected for processing.'.format( req_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) req_obj = Request(req_path, m_cfg) if req_obj and req_obj.loaded: # proceed processing request mlog.info( 'Submission request loading status: Success. Submission request file: "{}".' .format(req_path)) mlog.info( 'Loading local and project related configs for processing the request.' ) req_obj.load_request_configuration() if not req_obj.error.exist(): mlog.info( 'Local config files were loaded with no errors, proceeding to process ' 'the request file.') req_obj.process_request() else: mlog.info( 'Errors were reported during loading local config files. Aborting processing ' 'this request.') mlog.info( 'Processing of Submission request was finished for {}' .format(req_path)) req_proc_cnt += 1 # print (req_obj.logger._cache) if hasattr(req_obj.logger, '_cache' ): #verify that _cache attribute is present # check if any warning were recorded to the log file and set a flag log_warnings if 30 in req_obj.logger._cache and req_obj.logger._cache[ 30]: log_warnings = True # else: # log_warnings = False else: mlog.warning( 'The current logger object has no "_cache" attribute - thus cannot determine ' 'if any Warnings were reported during the process.' ) # identify if any errors were identified and set status variable accordingly if not req_obj.error.exist(): if not req_obj.disqualified_sub_aliquots: # no disqualified sub-aliquots present if not log_warnings: fl_status = 'OK' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) # errors_present = 'OK' else: fl_status = 'OK with Warnings' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) else: # some disqualified sub-aliquots are presetn fl_status = 'OK with Disqualifications' _str = 'Processing status: "{}". Submission Request: {}'.format( fl_status, req_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this request: {}' \ .format(fl_status, req_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: # update transfer details dictionary with the path to the transfer file transfer_details['transfer_path'] = \ Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh' transfers.append( transfer_details ) # add transfer details to transfers list mlog.info( 'Since the last request was processed with "{}" status and transfer on ' 'completion was requested ("--execute_transfer" argument was set to "yes"), ' 'the following path was put in queue for execution: ' '{}'.format(fl_status, transfer_details['transfer_path'])) else: mlog.warning(_str) # if transfer on completion was requested through the command line argument if gc.TRANSFER_ON_COMPLETION: mlog.info( 'The transfer on completion request ("--execute_transfer" argument was set to ' '"yes") will be ignored since the last request was processed with "{}" status.' .format(fl_status)) processed_dir = Path(processed_folder_name) req_processed_name = ts + '_' + fl_status + '_' + req_file file_name_new_path = cm.move_file_to_processed( req_path, req_processed_name, processed_dir, req_obj.logger, req_obj.error) if file_name_new_path: mlog.info( 'Processed Submission request "{}" was moved and renamed as: "{}"' .format(req_path, processed_dir / req_processed_name)) else: mlog.warning( 'Moving the processed request "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( req_path, req_obj.log_handler.baseFilename)) # deactivate the current Request logger deactivate_logger_common(req_obj.logger, req_obj.log_handler) if req_obj.submission_package and req_obj.submission_package.submission_dir: # save transfer path to a local variable transfer_path = Path( req_obj.submission_package.submission_dir ) / 'transfer_script.sh' else: transfer_path = None # preps for email notification email_msgs.append(( '-------------------------------------<br/>' 'Requested project: {}'.format(req_obj.project) + '<br/>Requested Experiment: {}.'.format( req_obj.experiment_id) + ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.' .format(req_path, processed_dir / req_processed_name) if file_name_new_path else '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due ' 'to some errors</font> reported in the request\'s log file.' .format(req_path)) + '<br/> <b>Errors summary:</b> {}' '<br/> <b>Warning(s) reported:</b> {}' '<br/> <i>Log file location: <br/>{}</i>' '<br/> Submission package locatoin:<br/>{}' '<br/> Data source locatoin:<br/>{}' '<br/> Processed Aliquots:<br/>{}' '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}' '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}' '<br/> Automatic data transferring: {}' '<br/> Command line to run data transferring manually: <br/> {}' ''.format( '<font color="red">Check Errors in the log file.</font>' if req_obj.error.exist() else '<font color="green">No Errors</font> ', '<font color="red">Yes - check the log file.</font>' if log_warnings else 'No', req_obj.log_handler.baseFilename, req_obj.submission_package.submission_dir if req_obj.submission_package else 'N/A', req_obj.attachments.data_loc if req_obj.attachments else 'N/A', req_obj.qualified_aliquots if req_obj.qualified_aliquots else 'None', [ val for val in req_obj.disqualified_sub_aliquots.keys() ] if req_obj.disqualified_sub_aliquots else 'None', req_obj.disqualified_request_path, '<font color="green">Performed.</font> ' 'Additional email should be sent upon data transfer completion.' if len( str(transfer_details['transfer_path']).strip()) > 0 else 'Not performed.', str( Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh') if req_obj.submission_package else 'N/A'))) email_attchms.append(req_obj.log_handler.baseFilename) # print ('email_msgs = {}'.format(email_msgs)) req_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, req_path, traceback.format_exc())) raise mlog.info('Number of processed Submission requests = {}'.format( req_proc_cnt)) if req_proc_cnt > 0: # collect final details and send email about this study results email_subject = 'processing of Submission Requests ' if errors_present == 'OK': if not log_warnings: email_subject = 'SUCCESSFUL ' + email_subject else: email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject elif errors_present == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject email_body = ( 'Number of requests processed: {}.'.format(req_proc_cnt) + '<br/><br/>' + '<br/><br/>'.join(email_msgs)) # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=m_cfg.get_value('Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) # perform transfers, if anything qualifies for it if transfers and len(transfers) > 0: transfer_status_checking_delay = m_cfg.get_value( 'General/transfer_status_checking_delay') if transfer_status_checking_delay and str( transfer_status_checking_delay).isnumeric(): if transfer_status_checking_delay > 0: pass else: transfer_status_checking_delay = None else: transfer_status_checking_delay = None mlog.info( 'Starting processing requested transfers. Total count: {} transfers.' .format(len(transfers))) # process all collected transfer requests cm.process_transfers(transfers, mlog, transfer_status_checking_delay) # assess results of the transfer processing transfer_ok = 0 transfer_err = 0 transfer_nd = 0 for transfer in transfers: if transfer['return_status']: if transfer['return_status'][:2] == 'OK': transfer_ok += 1 elif transfer['return_status'][:5] == 'ERROR': transfer_err += 1 else: transfer_nd += 1 else: transfer_nd += 1 _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \ 'transfer(s)'.format(transfer_ok, transfer_err) if transfer_nd > 0: _str = _str + ', "ND" - {}'.format(transfer_nd) mlog.info(_str) # send email with the status of the transfers if transfers and len(transfers) > 0: if transfer_err > 0: email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)' else: email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)' email_transfer_msgs = [] for transfer in transfers: email_transfer_msgs.append( ('Transfer process for the request file: "{}" ' '<br/>Transfer script file:<br/>{}' '<br/>Completion status:<br/>{}'.format( transfer['request_file'], transfer['transfer_path'], transfer['return_status']))) email_body = ( 'Summary of transfer of prepared submissions:' '<br/>Total count of completed transfers: {}. ' '<br/>Status "OK": {} transfer(s)' '<br/>Status "ERROR": {} transfer(s)' '<br/>Status "Not Defined": {} transfer(s)' '<br/><br/>The following are details for each performed transfer:' '<br/><br/>'.format( len(transfers), '<font color="green">' + str(transfer_ok) + '</font>' if transfer_ok > 0 else transfer_ok, '<font color="red">' + str(transfer_err) + '</font>' if transfer_err > 0 else transfer_err, transfer_nd) + '<br/><br/>'.join(email_transfer_msgs)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail(emails_to=m_cfg.get_value( 'Email/sent_to_emails'), subject=email_subject, message=email_body, main_conf=m_cfg) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing automated transfers. \n{} '\ .format(ex, traceback.format_exc()) mlog.critical(_str) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()
def process_download_inquiries(): # load main config file and get required values m_cfg = ConfigData(gc.CONFIG_FILE_MAIN) if not m_cfg.loaded: print( 'Specified main config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_MAIN)) return 1 # load location config file (with local value specific for the location) cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION) if not cfg_location.loaded: print( 'Specified location config file ({}) was not loaded. Aborting execution.' .format(gc.CONFIG_FILE_LOCATION)) return 1 # if both configs were loaded, update the main config with the location config m_cfg.update(cfg_location.get_whole_dictionary()) # print ('m_cfg = {}'.format(m_cfg.cfg)) # assign values common_logger_name = gc.MAIN_LOG_NAME # m_cfg.get_value('Logging/main_log_name') # get path configuration values logging_level = m_cfg.get_value('Logging/main_log_level') # path to the folder where all new inquiry files will be posted inquiries_loc = m_cfg.get_value('Location/inquiries') gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified') # get path configuration values and save them to global_const module # path to the folder where all application level log files will be stored (one file per run) gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs') # path to the folder where all log files for processing inquiry files will be stored # (one file per inquiry) gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path') # path to the folder where all processed (and renamed) inquiries will be stored gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value( 'Location/inquiries_processed_relative_path') # get config setting for the processed_add_datestamp and save it to global const module processed_add_datestamp = m_cfg.get_value( 'Location/processed_add_datestamp') if processed_add_datestamp: gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp # path to the folder where created submission packages will be located. One package sub_folder per inquiry. gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests') # path to dir with dynamically created inquiry files for disqualified aliquots gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value( 'Location/inquiries_disqualified_path') log_folder_name = gc.APP_LOG_DIR # gc.LOG_FOLDER_NAME # this variable define if Data Downloader app will be executed at the end of processing inquiries run_data_download = m_cfg.get_value('Execute/run_data_downloader') # path to the Data Downloader tool gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path') prj_wrkdir = os.path.dirname(os.path.abspath(__file__)) email_msgs = [] # email_attchms = [] inquiries_path = Path(inquiries_loc) # get current location of the script and create Log folder # if a relative path provided, convert it to the absolute address based on the application working dir if not os.path.isabs(log_folder_name): logdir = Path(prj_wrkdir) / log_folder_name else: logdir = Path(log_folder_name) # logdir = Path(prj_wrkdir) / log_folder_name # 'logs' lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log' lg = setup_logger_common(common_logger_name, logging_level, logdir, lg_filename) # logging_level mlog = lg['logger'] mlog.info( 'Start processing download inquiries in "{}"'.format(inquiries_path)) try: (root, source_inq_dirs, _) = next(walk(inquiries_path)) inq_proc_cnt = 0 errors_present = 'OK' for inq_dir in source_inq_dirs: source_inquiry_path = Path(root) / inq_dir mlog.info( 'Selected for processing inquiry source: "{}", full path: {}'. format(inq_dir, source_inquiry_path)) (_, _, inq_files) = next(walk(source_inquiry_path)) # filter only excel files for processing as inquiries inquiries = [ fl for fl in inq_files if fl.endswith(('xlsx', 'xls')) ] # filter out temp files (starting with '~$') created when an excel file is open inquiries = [fl for fl in inquiries if not fl.startswith('~$')] mlog.info('Inquiry files presented (count = {}): "{}"'.format( len(inquiries), inquiries)) for inq_file in inquiries: inq_path = Path(source_inquiry_path) / inq_file # email_msgs = [] # email_attchms = [] try: # print('--------->Process file {}'.format(inq_path)) mlog.info('The following Inquiry file was selected: "{}".'. format(inq_path)) # save timestamp of beginning of the file processing ts = time.strftime("%Y%m%d_%H%M%S", time.localtime()) inq_obj = Inquiry(inq_path, m_cfg) if inq_obj and inq_obj.loaded: # proceed processing inquiry mlog.info('Inquiry file was successfully loaded.') mlog.info( 'Starting processing Download Inquiry file: "{}".'. format(inq_path)) inq_obj.process_inquiry() mlog.info( 'Processing of Download Inquiry was finished for {}' .format(inq_path)) inq_proc_cnt += 1 # identify if any errors were identified and set status variable accordingly if not inq_obj.error.exist(): if not inq_obj.disqualified_items: # no disqualified sub-aliquots present fl_status = 'OK' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) # errors_present = 'OK' # this variable is set to OK by default, no update needed else: # some disqualified sub-aliquots are presetn fl_status = 'OK_with_Disqualifications' _str = 'Processing status: "{}". Download Inquiry: {}'.format( fl_status, inq_path) if not errors_present == 'ERROR': errors_present = 'DISQUALIFY' else: fl_status = 'ERROR' _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \ .format(fl_status, inq_obj.logger.handlers[0]) errors_present = 'ERROR' if fl_status == "OK": mlog.info(_str) else: mlog.warning(_str) processed_dir = inq_obj.processed_folder # 'Processed' # combine the name of the processed file inq_processed_name = fl_status + '_' + str( inq_file).replace(' ', '_').replace('__', '_') if gc.PROCESSED_ADD_DATESTAMP: inq_processed_name = ts + '_' + inq_processed_name # move processed files to Processed folder fl_processed_name = cm.move_file_to_processed( inq_path, inq_processed_name, processed_dir, inq_obj.logger, inq_obj.error) if fl_processed_name: mlog.info( 'Processed file "{}" was moved(renamed) to: "{}"'. format(inq_path, processed_dir / fl_processed_name)) else: errors_present = errors_present + '|MoveProcessedError' mlog.warning( 'Moving the processed file "{}" was not successful due to some errors ' 'reported in the request\'s log file {}.'.format( inq_path, inq_obj.log_handler.baseFilename)) # preps for email notification # create a dictionary to feed into template for preparing an email body template_feeder = { 'file_num': inq_proc_cnt, 'file_path': str(inq_path), 'file_path_new': (str(processed_dir / fl_processed_name) if processed_dir and fl_processed_name else None), 'inq_obj_errors_cnt': inq_obj.error.count, 'log_file_path': inq_obj.log_handler.baseFilename, 'dld_request_file_path': str(inq_obj.download_request_path), 'inq_sources': inq_obj.inq_sources, 'inq_match_aliquots': inq_obj.inq_match_arr, 'inq_disqul_aliquots': inq_obj.disqualified_items, 'inq_disqul_reprocess_path': str(inq_obj.disqualified_inquiry_path) } email_body_part = cm.populate_email_template( 'processed_inquiry.html', template_feeder) email_msgs.append(email_body_part) # deactivate the current Inquiry logger deactivate_logger_common(inq_obj.logger, inq_obj.log_handler) inq_obj = None except Exception as ex: # report an error to log file and proceed to next file. mlog.error( 'Error "{}" occurred during processing file: {}\n{} '. format(ex, inq_path, traceback.format_exc())) raise mlog.info('Number of successfully processed Inquiries = {}'.format( inq_proc_cnt)) # start Data Download request if proper config setting was provided dd_status = {'status': '', 'message': ''} if run_data_download: # start process mlog.info( 'Starting asynchronously Data Downloader app: "{}".'.format( gc.DATA_DOWNLOADER_PATH)) try: dd_process = cm.start_external_process_async( gc.DATA_DOWNLOADER_PATH) # check if it is running dd_status = cm.check_external_process(dd_process) mlog.info( 'Status of running Data Downloader app: "{}".'.format( dd_status)) except Exception as ex: # report unexpected error during starting Data Downloader _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \ .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc()) mlog.critical(_str) dd_status = {'status': 'Error', 'message': _str} mlog.info('Preparing to send notificatoin email.') email_to = m_cfg.get_value('Email/send_to_emails') email_subject = 'processing of download inquiry. ' if inq_proc_cnt > 0: # inquiries and len(inquiries) > 0: # collect final details and send email about this study results err_present = errors_present.split( '|' ) # get all statuses into an array; 1st element is the main status if err_present: # set email subject based on the main status err_present[0] if err_present[0] == 'OK': email_subject = 'SUCCESSFUL ' + email_subject elif err_present[0] == 'DISQUALIFY': email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject else: email_subject = 'ERROR(s) present during ' + email_subject if len(err_present) > 1: if err_present[1] == 'MoveProcessedError': email_subject = email_subject + ' Error moving inquiry to processed.' if dd_status and 'status' in dd_status.keys( ) and dd_status['status'].lower() == 'error': email_subject = email_subject + ' Errors starting Data Downloader.' # create a dictionary to feed into template for preparing an email body template_feeder = { 'inq_cnt': inq_proc_cnt, 'run_data_download': run_data_download, 'downloader_path': gc.DATA_DOWNLOADER_PATH, 'downloader_start_status': dd_status['status'].lower(), 'processed_details': '<br/>'.join(email_msgs) } email_body = cm.populate_email_template('processed_inquiries.html', template_feeder) # remove return characters from the body of the email, to keep just clean html code email_body = email_body.replace("\r", "") email_body = email_body.replace("\n", "") # print ('email_subject = {}'.format(email_subject)) # print('email_body = {}'.format(email_body)) mlog.info( 'Sending a status email with subject "{}" to "{}".'.format( email_subject, email_to)) try: if m_cfg.get_value('Email/send_emails'): email.send_yagmail( emails_to=email_to, subject=email_subject, message=email_body # commented adding attachements, since some log files go over 25GB limit and fail email sending # ,attachment_path=email_attchms ) except Exception as ex: # report unexpected error during sending emails to a log file and continue _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \ 'finishing processing "{}" study: {}\n{} ' \ .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) mlog.info( 'End of processing of download inquiries in "{}".'.format( inquiries_path)) except Exception as ex: # report unexpected error to log file _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \ .format(ex, os.path.abspath(__file__), traceback.format_exc()) mlog.critical(_str) raise sys.exit()