Beispiel #1
0
def process(data_type, study_id, center_id, center_ids, dataset_type_id,
            out_file, output_format, server_url):
    # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, study_id, out_file))
    # get URL of the API server
    from utils import ConfigData
    main_cfg = ConfigData('configs/main_config.yaml')
    api_server_url = main_cfg.get_value('SAMPLEINFO_CLI_URL')
    if server_url:
        # print('server_url: {}'.format(api_server_url))
        click.echo('server_url: {}'.format(api_server_url))

    if check_data_type_value(data_type):
        api_url, err_msg = identify_api_url(api_server_url, data_type,
                                            study_id, center_id, center_ids,
                                            dataset_type_id)
    else:
        api_url = ''
        err_msg = 'Unexpected data_type value ({}) was provided. Run --help for the list of expected values.'\
            .format(data_type)
    if len(err_msg) == 0:
        if len(api_url) > 0:
            # access api and retrieve the data
            response = requests.get(api_url)
            # print ("data_type = {}, study_id = {}, out_file = {}".format(data_type, stu)
            # print(response.status_code)
            # json_parsed =

            output_data(response.json(), out_file, output_format)
        else:
            print(
                'Error: Cannot identify the database call for the given parameters.'
            )
    else:
        # report an error
        print('Error: {}'.format(err_msg))
    def __init__(self,
                 filepath,
                 conf_source,
                 log_obj,
                 file_type=None,
                 file_delim=None):
        # setup default parameters
        if file_type is None:
            file_type = 1
        if file_delim is None:
            file_delim = ','  #'\t'

        File.__init__(self, filepath, file_type, file_delim)

        self.conf_src = ConfigData('', conf_source)
        self.logger = log_obj
        self.map = {
        }  # it will hold a dict where key is an aliquot id and value is the relative path to the file

        # set file properties before loading it
        self.file_delim = self.conf_src.get_value('file_delim') \
            if self.conf_src.get_value('file_delim') else self.file_delim
        self.header_row_num = self.conf_src.get_value('header_row_num') \
            if self.conf_src.get_value('header_row_num') else self.header_row_num

        # load the file
        self.get_file_content()
Beispiel #3
0
def send_yagmail(emails_to, subject, message, email_from = None, attachment_path =  None, smtp_server = None, smtp_server_port = None):
    root_dir = cm.get_project_root()
    cnf_path = str(root_dir.joinpath(gc.MAIN_CONFIG_FILE))
    m_cfg = ConfigData(cnf_path)
    if not email_from:
        email_from = m_cfg.get_value('Email/default_from_email')
    if not smtp_server:
        smtp_server = m_cfg.get_value('Email/smtp_server')
    if not smtp_server_port:
        smtp_server_port = m_cfg.get_value('Email/smtp_server_port')
    
    # receiver = emails_to  # '[email protected], [email protected], [email protected]'
    body = message
    filename = attachment_path  # 'test.png'
    
    yag = yagmail.SMTP(email_from,
                       host=smtp_server,
                       smtp_skip_login=True,
                       smtp_ssl=False,
                       soft_email_validation=False,
                       port=smtp_server_port)
    yag.send(
        to=emails_to,
        subject=subject,
        contents=body, 
        attachments=filename,
    )
def load_configuration(fl_class, loc_cfg_path):
    # load global configuration

    # m_cfg = ConfigData(gc.MAIN_CONFIG_FILE)
    m_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')
    m_logger = logging.getLogger(m_logger_name)

    m_logger.debug('Loading Global config file {} for file: {}'.format(
        gc.MAIN_CONFIG_FILE, fl_class.filepath))
    StudyConfig.config_glb = ConfigData(gc.MAIN_CONFIG_FILE)

    m_logger.info('Loading Study config file {} for file: {}'.format(
        loc_cfg_path, fl_class.filepath))
    # load local configuration
    try:
        StudyConfig.config_loc = ConfigData(loc_cfg_path)
    except Exception as ex:
        m_logger.error(
            'Error "{}" occurred during loading study config file "{}"\n{}'.
            format(ex, loc_cfg_path, traceback.format_exc()))
        # raise
        return False

    # load global logging setting
    StudyConfig.study_logger_name = gc.FILE_LOG_NAME  # StudyConfig.config_glb.get_value(gc.STUDY_LOGGER_NAME_CFG_PATH)
    StudyConfig.study_logging_level = StudyConfig.config_glb.get_value(
        gc.STUDY_LOGGING_LEVEL_CFG_PATH)

    return True
Beispiel #5
0
 def load_project_config_into_main(self, project):
     # load project specific "project_config" config file
     cfg_project = ConfigData(
         gc.CONFIG_FILE_PROJECT.replace('{project}', project))
     if cfg_project.loaded:
         # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
         cfg_project_updated = self.update_cfg_dictionary_with_location_details(
             gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
             cfg_project.get_whole_dictionary())
         # update main config with the outcome of the previous updates
         self.conf_main.update(cfg_project_updated)
Beispiel #6
0
def convert_sub_aliq_to_aliquot(sa, assay):
    aliquot = sa
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    assay_postfixes = fl_cfg_dict.get_value('assay_sub_aliquot_postfix/' + assay)  # get_item_by_key
    if assay_postfixes is not None:
        for assay_postfix in assay_postfixes:
            apf_len = len(assay_postfix)
            if sa[-apf_len:] == assay_postfix:
                aliquot = sa[:len(sa) - apf_len]
                break  # exit loop if a match was found
    return aliquot
Beispiel #7
0
def key_exists_in_dict(key, section):
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE)
    try:
        v = fl_cfg_dict.get_item_by_key(section + "/" + key)
        if v is not None:
            return True
        else:
            return False
    except Exception:
        return False
Beispiel #8
0
def get_dict_value(key, section):
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    # replace spaces and slashes with "_"
    key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE)
    try:
        v = fl_cfg_dict.get_item_by_key(section + "/" + key)
        if v is not None:
            return v
        else:
            return key
    except Exception:
        return key
Beispiel #9
0
    def prepare_form(self, form_name):
        forms_location = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' +
                              self.req_obj.project)
        # identify paths for json and config (yaml) files
        fl_path_json_common = forms_location / (form_name + '.json')
        fl_path_json_assay = forms_location / (
            form_name + '_' + str(self.req_obj.assay).lower() + '.json')
        fl_path_json_schema = forms_location / (form_name + '_schema.json')
        fl_path_cfg_common = forms_location / (form_name + '.yaml')

        # fl_path_json_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.json')
        # fl_path_json_assay = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_' +
        #                           str(self.req_obj.assay).lower() + '.json')
        # fl_path_json_schema = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_schema.json')
        # fl_path_cfg_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.yaml')

        # check the value assigned to the current request's data_source_forms_assignment
        # and select assay config file accordingly
        if self.req_obj.data_source_forms_assignment == 'file':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')
        elif self.req_obj.data_source_forms_assignment == 'db':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '_db.yaml')
        else:  # data_source_forms_assignment = 'db' will be treated as a default assignment
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')

        # check if assay specific json exists; if yes - use it, if not - use common one
        if cm.file_exists(fl_path_json_assay):
            fl_path_json = fl_path_json_assay
        else:
            fl_path_json = fl_path_json_common

        # load json and config files
        self.fl_json = FileJson(fl_path_json, self.req_obj.error,
                                self.req_obj.logger)
        self.fl_json_schema = FileJson(fl_path_json_schema, self.req_obj.error,
                                       self.req_obj.logger)
        self.fl_cfg_common = ConfigData(fl_path_cfg_common)
        self.fl_cfg_assay = ConfigData(fl_path_cfg_assay)
        # self.fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)

        # print(self.fl_json.json_data)
        # loop through all json keys and fill those with associated data
        self.get_json_keys(self.fl_json.json_data)
        # print(self.fl_json.json_data)

        # validate final json file against json schema (if present)
        self.validate_json(self.fl_json, self.fl_json_schema)
Beispiel #10
0
    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config
Beispiel #11
0
 def update_cfg_dictionary_with_location_details(self, location_path,
                                                 project, cfg_to_update):
     cfg_location = ConfigData(location_path.replace('{project}', project))
     if cfg_location.loaded:
         self.logger.info(
             'Local config file "{}" was loaded and being used.'.format(
                 cfg_location.cfg_path))
         cfg_to_update = cm.update_dictionary_matching_keys(
             cfg_to_update, cfg_location.get_whole_dictionary())
     else:
         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
             .format(cfg_location.cfg_path)
         self.logger.error(_str)
         self.error.add_error(_str)
     return cfg_to_update
Beispiel #12
0
    def __init__(self, api_cfg_file, log_obj):
        self.loaded = False
        # set logger object
        self.logger = log_obj
        self.dataset = None

        # set error object
        self.error = ApiError(self)

        self.logger.info(
            'Start processing API call for the following conig file: {}'.
            format(api_cfg_file))

        # load config file for the current api process
        cfg_file_path = gc.CONFIGS_DIR + api_cfg_file
        self.api_cfg = ConfigData(cfg_file_path)

        if not self.api_cfg.loaded:
            _str = 'Cannot load the config file: "{}"'.format(cfg_file_path)
            self.logger.error(_str)
            self.error.add_error(_str)
            return

        # get values from the config file
        self.api_name = self.api_cfg.get_value('API/name')
        self.api_url = self.api_cfg.get_value('API/url')
        self.post_fields = self.api_cfg.get_value('API/post_fields')

        # verify if "eval" is present in any of the post fields and perform the evaluation, if needed
        if self.post_fields:
            for pf in self.post_fields:
                self.post_fields[pf] = cm.eval_cfg_value(
                    self.post_fields[pf], self.logger, self.error)

        # if no errors were generated during init, set loaded = True
        if not self.error.errors_exist():
            self.loaded = True
class Inquiry(File):
    def __init__(self, filepath, conf_main=None, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        self.sheet_name = sheet_name  # .strip()

        if conf_main:
            self.conf_main = conf_main
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)

        self.error = InquiryError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Download Inquiry file {}'.format(filepath))
        self.inq_match_arr = []
        self.columns_arr = []
        self.inq_sources = {}
        self.inq_line_sources = {}

        # load common for all programs dictionary config
        self.conf_dict = DictConfigData(gc.CONFIG_FILE_DICTIONARY)
        if not self.conf_dict.loaded:
            # disqualify the current inquiry file
            _str = 'Aborting processing of the inquiry file - the following common dictionary config file cannot ' \
                   'be loaded: {}.'.format(gc.CONFIG_FILE_MAIN)
            self.error.add_error(_str)
            self.logger.error(_str)
            return

        # save inquiry file structure into a dedicated variables
        self.file_structure_by_col_num = self.conf_dict.get_inqury_file_structure(
            'by_col_num')
        self.file_structure_by_col_name = self.conf_dict.get_inqury_file_structure(
            'by_col_name')

        self.processed_folder = gc.INQUIRY_PROCESSED_DIR
        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(self.processed_folder):
            self.processed_folder = Path(self.wrkdir) / self.processed_folder
        else:
            self.processed_folder = Path(self.processed_folder)

        self.download_request_path = None

        self.disqualified_items = {}
        self.disqualified_inquiry_path = ''  # will store path to a inquiry file with disqualified sub-aliquots

        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.INQUIRY_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_process_entity = None

        self.db_access = DBAccess(self.logger, self.conf_main, self.error)

        self.get_file_content()

    def get_file_content(self):
        if not self.columns_arr or not self.lines_arr:
            self.columns_arr = []
            self.lines_arr = []
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lines_arr = None
                            self.loaded = False
                            return self.lines_arr

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the inquiry file as an array of arrays (rows)
                columns = []
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the inquiry file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        # lines[j].append('"' + cell_value + '"')  # adds value in "csv" format for a current row
                        lines[j].append(cell_value)

                    # self.columns_arr.append(','.join(column))
                    columns.append(
                        column)  # adds a column to a list of columns

                # populate lines_arr and columns_arr properties
                self.lines_arr = lines
                self.columns_arr = columns

                # populate lineList value as required for the base class
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(str(ln)))

                wb.unload_sheet(sheet.name)

                # perform validation of the current inquiry file
                self.validate_inquiry_file()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the inquiry. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columns_arr = None
                self.lines_arr = None
                self.loaded = False
        return self.lineList

    def validate_inquiry_file(self):
        self.logger.info(
            'Start validating the current inquiry file "{}".'.format(
                self.filepath))
        row_count = 1
        failed_cnt = 0
        valid_aliquot_flag = self.conf_main.get_value(
            'Validate/aliquot_id_vs_manifest')
        valid_inquiry_values_flag = self.conf_main.get_value(
            'Validate/inquiry_values_vs_dictionary')
        inquiry_min_number_columns = self.conf_main.get_value(
            'Validate/inquiry_min_number_columns')
        inquiry_validate_number_columns = self.conf_main.get_value(
            'Validate/inquiry_validate_number_columns')
        if not inquiry_min_number_columns or not isinstance(
                inquiry_min_number_columns, int):
            inquiry_min_number_columns = 6  # set a default value if it is not provided in the config file
        if not inquiry_validate_number_columns or not isinstance(
                inquiry_validate_number_columns, int):
            inquiry_validate_number_columns = 6  # set a default value if it is not provided in the config file

        for row in self.lines_arr:
            if row_count == self.header_row_num:  # 1
                # skip the first column as it is a header
                row_count += 1
                continue

            sub_al = 'ND'  # set blank value as default
            assay = ''  # set blank value as default
            valid_aliquot_performed = False
            skip_final_check = False

            # check if inquiry file contain min number of columns
            if len(row) < inquiry_min_number_columns:
                # disqualify the current inquiry file
                _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \
                    .format(len(row), inquiry_min_number_columns)
                self.error.add_error(_str)
                self.logger.error(_str)
                return
            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # get sub-aliquot value before looping through all fields, so it can be used for reporting errors
            # also get program_code assigned to the row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', row)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', row, False)

            # validate program_code value
            if conf_dict.key_exists_in_dict(
                    str(program_code).lower(), 'program_code'):
                # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
                conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\
                    .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code'))
                conf_dict_program = ConfigData(conf_dict_program_path)
                conf_dict.update(conf_dict_program.get_whole_dictionary())
            else:
                _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \
                    .format(program_code, row_count)
                self.logger.critical(_str)
                # disqualify an inquiry file row, if unexpected value was provided
                self.disqualify_inquiry_item(sub_al, _str, row)
                failed_cnt += 1
                skip_final_check = True

            if not skip_final_check:
                # go through fields and validate the provided values
                for i in range(len(row)):
                    if i + 1 > inquiry_validate_number_columns:
                        # if number of columns in the inquiry file > expected maximum, exit the loop
                        break
                    col_category = conf_dict.get_dict_value(
                        str(i + 1), 'inquiry_file_structure')
                    if col_category in ('program_code', 'sub-aliquot'):
                        # no checking is needed for the listed field, proceed further
                        continue
                    elif col_category == 'db_center_id':
                        # get center id value and validate it
                        db_center_id = row[i]
                        # validate center_code or center_id value
                        self.logger.info(
                            'Start validation of center value "{}" provided for the current row'
                            .format(db_center_id))
                        db = DBAccess(self.logger, self.conf_main,
                                      self.error)  # create DBAccess object
                        db.open_connection()
                        # test center value assuming center code was provided
                        dataset = db.validate_center_code(
                            db_center_id, program_code, 'code', 'code')
                        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
                            dataset, 'center_id', 'center_code')
                        if center_id_out1:
                            # center id was returned, meaning center was validated fine
                            db_center_id = center_id_out1
                        else:
                            # if center code was not validated at first attempt, validate it assuming the center id was given
                            dataset = db.validate_center_code(
                                db_center_id, program_code, 'id', 'code')
                            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                                dataset, 'center_id', 'center_id')
                            if center_id_out2:
                                # center id was validated at the 2nd attempt, ignore the 1st validation attempt
                                db_center_id = center_id_out2
                            else:
                                # center validation attempts failed, report both failures
                                _str = 'Provided center value cannot be interpreted neither as code nor id; ' \
                                       'here are both validation outcomes: ' + \
                                       ' | '.join([_str_err_out1, _str_err_out2])
                                self.logger.warning(_str)
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                break

                        # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value
                        if valid_aliquot_flag:
                            # aliquot id validation is required
                            valid_aliquot_performed = True  # flag that aliquot validation was done
                            if isinstance(db_center_id,
                                          int):  # db_center_id.isnumeric():
                                # since center is numeric, proceed here
                                # get aliquot id based on the verified earlier assay value and given sub_aliquot id
                                aliquot = conf_dict.convert_sub_aliq_to_aliquot(
                                    sub_al, assay)
                                valid_status, valid_desc = self.db_access.validate_aliquot_id(
                                    aliquot, db_center_id)
                                if valid_status != 'OK':
                                    # disqualify an inquiry file row, if returned status is not OK
                                    _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \
                                           'of the database. DB response => Status: "{}"; Description: "{}".'\
                                        .format(aliquot, row_count, valid_status, valid_desc)
                                    self.logger.warning(_str)
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                            else:
                                # report unexpected center id value
                                _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \
                                       'critical error because this value is required (based on the configuration setting ' \
                                       '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \
                                    .format(db_center_id, row_count, i + 1, sub_al)
                                self.logger.warning(_str)
                                # disqualify an inquiry file row, if unexpected value was provided
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                # break
                        else:
                            self.logger.info(
                                'Validating of the provided aliquot_id "{}" is not required based on the '
                                'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".'
                                .format(sub_al, valid_aliquot_flag))
                    else:
                        if col_category == 'assay':
                            assay = row[i].strip().lower(
                            )  # save assay value to a dedicated variable
                        if valid_inquiry_values_flag:
                            # if validation of the inquiry values vs dictionary is required
                            validate_values = []
                            validate_categories = []
                            if col_category == 'bulk_location':
                                # get inquiry_file_structure_bulk_location value
                                bulk_value_delim = conf_dict.get_dict_value(
                                    'inquiry_file_structure_bulk_location_delim',
                                    '')
                                validate_values = str(
                                    row[i]).split(bulk_value_delim)
                                validate_categories = conf_dict.get_dict_object(
                                    'inquiry_file_structure_bulk_location', '')
                            else:
                                validate_values.append(str(row[i]).lower())
                                validate_categories.append(col_category)
                            for vv, vc in zip(validate_values,
                                              validate_categories):
                                if not conf_dict.key_exists_in_dict(
                                        vv.lower(), vc):
                                    if col_category == 'bulk_location':
                                        _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \
                                               'the "bulk_location" value (line #{}, column #{})' \
                                            .format(vv, vc, row_count, i + 1)
                                    else:
                                        _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\
                                            .format(vv, vc, row_count, i+1)
                                    self.logger.critical(_str)
                                    # disqualify an inquiry file row, if unexpected value was provided
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                    if skip_final_check:
                        break

            # check that if aliquot validation is required it was actually performed
            if not skip_final_check:
                if valid_aliquot_flag and not valid_aliquot_performed:
                    _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \
                           '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \
                           'was not provided). ' \
                        .format(row_count)
                    self.logger.critical(_str)
                    # disqualify an inquiry file row, if unexpected value was provided
                    self.disqualify_inquiry_item(sub_al, _str, row)
                    failed_cnt += 1

            row_count += 1

        self.logger.info('Finish validating the inquiry file with{}.'.format(
            ' no errors' if failed_cnt == 0 else
            ' errors; {} records were disqualified - see earlier log entries for details'
            .format(failed_cnt)))

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        validation_id_out = None
        if dataset:
            for row in dataset:
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
                break  # read only first row of the dataset
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = 'Validation of the provided {} value vs DB has Failed, description: {}'\
                    .format(validation_id_name, description)
            else:  # unexpected status value was returned
                _str_err = 'Validation of the provided {} value vs DB returned unexpected status {}'\
                    .format(validation_id_name, status)
        else:
            _str_err = 'Unexpected error was reported during validating {} in the DB. ' \
                       'Check earlier entries in the log file.'\
                .format(validation_id_name)

        return _str_err, validation_id_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.INQUIRY_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.INQUIRY_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/inquiry_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    # function will combine the datasource_id for the current inquiry line
    # it is possible that different lines will have the same datasource_id and thus can share the datasource
    def get_inquiry_line_datasource_id(self, inq_line):
        datasource_id = ''

        for col in self.file_structure_by_col_name:
            if col in ['program_code', 'assay', 'source_id']:
                datasource_id += '|' + self.get_inquiry_value_by_field_name(
                    col, inq_line)
            elif 'source_' in col:
                datasource_id += '|' + self.get_inquiry_value_by_field_name(
                    col, inq_line, False)

        return datasource_id

    def get_inquiry_value_by_field_name(self,
                                        field_name,
                                        inq_line,
                                        validate_by_dictionary=None):
        if validate_by_dictionary is None:
            validate_by_dictionary = True  # set default value to True

        if field_name in self.file_structure_by_col_name:
            col_num = self.file_structure_by_col_name[field_name]
            value = inq_line[col_num - 1].strip()
        else:
            value = ''
        # validate the provided program code through the dictionary
        if validate_by_dictionary:
            value = self.conf_dict.get_dict_value(
                str(value).lower(), field_name)
        return value

    def process_inquiry_sources(self):
        cur_row = 0
        for inq_line in self.lines_arr:
            if cur_row == self.header_row_num - 1:
                # skip the header row
                cur_row += 1
                continue

            # get program code assigned to the current row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)
            # get assay assigned to the current row
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            # get source id assigned to the current row
            source_id = self.get_inquiry_value_by_field_name(
                'source_id', inq_line)

            # get source config file
            # 2 values are saved in tuple: program name specific path and default one.
            # if program name specific path does not exist, the default will be used
            cfg_source_path = (
                # configuration path for the current program by name
                gc.CONFIG_FILE_SOURCE_PATH\
                    .replace('{program}', program_code)\
                    .replace('{assay}', assay)\
                    .replace('{source_id}', source_id),
                # configuration path for the default program (used if no program specific path is present)
                gc.CONFIG_FILE_SOURCE_PATH \
                    .replace('{program}', 'default') \
                    .replace('{assay}', assay) \
                    .replace('{source_id}', source_id)
            )
            # get the source location config file path
            cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace(
                '{source_id}', source_id)

            # attempt to load configuration for the program specific path
            cfg_source = ConfigData(Path(cfg_source_path[0]))
            if not cfg_source.loaded:
                # if config was not loaded from the program specific path, load the default one
                cfg_source = ConfigData(Path(cfg_source_path[1]))

            if cfg_source.loaded:
                # proceed here if the source config was loaded
                # load source location config with location specific settings for the current source
                cfg_source_location = ConfigData(
                    Path(cfg_source_location_path))
                if cfg_source_location.loaded:
                    # if the source location config was loaded, update cfg_source config with the source location config
                    cfg_source.update(
                        cfg_source_location.get_whole_dictionary())

                # get unique id of the datasource and check if the same id was used already, reuse that in such case
                inq_line_datasource_id = self.get_inquiry_line_datasource_id(
                    inq_line)
                self.logger.info(
                    'Current inquiry row #{} was identified with the following data source id: {}'
                    .format(cur_row, inq_line_datasource_id))
                # assign source id (inq_line_datasource_id) to the current inquiry line
                self.inq_line_sources[cur_row] = inq_line_datasource_id
                if inq_line_datasource_id in self.inq_sources:
                    # reuse existing datasource
                    self.logger.info(
                        'Identified data source id for the current inquiry row #{} was identified as '
                        'earlier retrieved one (for this or another row) and will be re-used for '
                        'the current row.'.format(cur_row))
                else:
                    # create a new datasource object
                    inq_line_datasource = DataSource(self, cfg_source,
                                                     inq_line,
                                                     inq_line_datasource_id)
                    self.inq_sources[
                        inq_line_datasource_id] = inq_line_datasource
            else:
                sub_al = self.get_inquiry_value_by_field_name(
                    'sub-aliquot', inq_line, False)
                _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \
                       'None of the expected to exist files is accessible: {}'\
                    .format(cur_row, sub_al, ' | '.join(cfg_source_path))
                self.logger.warning(_str)
                self.disqualify_inquiry_item(
                    sub_al, _str, cur_row
                )  # TODO: verify if inq_line should be used instead of curr_row
            cur_row += 1
        pass

    def process_inquiry(self):
        self.process_inquiry_sources()
        self.match_inquiry_items_to_sources()
        self.create_download_request_file()
        self.create_inquiry_file_for_disqualified_entries()

        # check for errors and put final log entry for the inquiry.
        if self.error.exist():
            _str = 'Processing of the current inquiry was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current inquiry was finished successfully.\n'
            self.logger.info(_str)

    def match_inquiry_items_to_sources(self):
        cur_row = -1
        for inq_line in self.lines_arr:
            cur_row += 1  # increase row counter
            if cur_row == self.header_row_num - 1:
                continue

            # program_code = str(inq_line[0]) # get program code that must be a first column
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)

            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
            conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace(
                '{program}', program_code)
            conf_dict_program = ConfigData(conf_dict_program_path)
            conf_dict.update(conf_dict_program.get_whole_dictionary())

            # print (inq_study_path)
            bulk_location = self.get_inquiry_value_by_field_name(
                'bulk_location', inq_line, False)
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', inq_line, False)

            # inq_study_path = '/'.join([program_code, bulk_location, assay])
            inq_study_path = self.conf_main.get_value(
                'Destination/study_path_template')
            inq_study_path = inq_study_path.replace('{program_code}',
                                                    program_code)
            inq_study_path = inq_study_path.replace('{bulk_location}',
                                                    bulk_location)
            inq_study_path = inq_study_path.replace('{assay}', assay)

            # check if current sub-aliquot is not part of disqualified items array
            if self.disqualified_items and sub_al in self.disqualified_items.keys(
            ):
                # if sub-aliquot was disqualifed already, skip this line
                continue

            # identify aliquot for the given sub-aliquot
            al = conf_dict.convert_sub_aliq_to_aliquot(
                sub_al, assay)  # identify aliquot for the current inquiry line

            match = False

            # get reference to the Datasource object assigned to the current row
            if cur_row in self.inq_line_sources:
                cur_source = self.inq_sources[self.inq_line_sources[cur_row]]
            else:
                # if the data source was not assigned to the current row, skip the row using this datasource
                cur_source = None
                continue
            # check if any source types were disqualified during loading the datasource
            if cur_source.disqualified_data_sources:
                # if at least one source of the datasource was disqualified, skip the row using this datasource
                # and disqualify the current sub-aliquot as well
                self.disqualify_inquiry_item(
                    sub_al,
                    'Datasource associated with this aliquot_id was marked as disqualified.',
                    inq_line)
                continue

            # get a copy of the source type ids of the current datasource;
            # it will track number of items found for each source type
            cur_source_types = copy.deepcopy(cur_source.source_types)

            # loop through items of the source
            for src_item in cur_source.source_content_arr:
                match_out = False
                # attempt match by the sub-aliquot
                match_out, match_details = \
                    self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al)
                if match_out:
                    match = True
                # if sub-aliquot match was not success, attempt to match by the aliquot
                elif src_item['aliquot_match']:
                    match_out, match_details = \
                        self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al)
                    if match_out:
                        match = True
                # if a match was found using one of the above methods, record the item to inq_match_arr
                if match_out:
                    # since a match was found, verify that the source path is accessible (except for web locations)
                    web_loc = src_item['web_location']
                    # real_path = os.path.realpath(src_item['path'])  # real path of the current item

                    if web_loc or not web_loc and os.path.exists(
                            src_item['path']):
                        item_details = {
                            'sub-aliquot':
                            sub_al,
                            'study':
                            inq_study_path,
                            # 'source': src_item,
                            'source_item_name':
                            src_item['name'],
                            'target_subfolder':
                            src_item['target_subfolder'],
                            'real_path':
                            src_item['path'],
                            'target_copied_item_name':
                            src_item['target_copied_item_name'],
                            'match_details':
                            match_details,
                            'source_type_id':
                            src_item['source_type_id'],
                            'obj_type':
                            src_item['obj_type'],
                            'source_name_generic':
                            cur_source.source_name_generic
                        }
                        self.inq_match_arr.append(item_details)
                        # record the source type id of an item to track quantity of found matches for each source type
                        cur_source_types[
                            src_item['source_type_id']]['items_count'] += 1
                    else:
                        self.disqualify_inquiry_item(
                            sub_al,
                            'A match was found, but the identified source path is not accessible. Match details: {}. '
                            'Source path: "{}". Real source path: "{}".'.
                            format(match_details, src_item['path'],
                                   src_item['path']), inq_line)

            # report if no match was found and
            # verify that a match was found for each of the source types of the current datasource
            if not match:
                # no matches were found for the current datasource
                self.disqualify_inquiry_item(
                    sub_al,
                    'No matching items (files/folders) were found in the current data source.',
                    inq_line)
            else:
                if not cur_source.allow_nomatch_per_sourcetype:
                    # some matches were found; verify that a match was found for each of the source types
                    for src_type in cur_source_types:
                        if cur_source_types[src_type]['items_count'] == 0:
                            # no matches were found for this source type
                            self.disqualify_inquiry_item(
                                sub_al,
                                'No matches were found for the "{}" source type id in the datasource.'
                                .format(src_type), inq_line)

    def is_item_found_soft_match(self, srch_item, srch_in_str, soft_match_arr,
                                 item_to_be_reported):
        out = False
        _str = ''
        # identify if the search is performed for sub_aliquot (full value) or aliquot (partial value)
        if srch_item == item_to_be_reported:
            entity = 'sub-aliquot'
        else:
            entity = 'aliquot'

        soft_match = False
        self.logger.debug("srch_item = {}| srch_in_str = {}".format(
            srch_item, srch_in_str))
        if srch_item in srch_in_str:
            out = True
            self.logger.debug("Exact match found between: {} | {}".format(
                srch_item, srch_in_str))
        else:
            if soft_match_arr:
                self.logger.debug("Starting soft match for: {} | {}".format(
                    srch_item, srch_in_str))
                for item in soft_match_arr:
                    srch_in_str = srch_in_str.replace(item['find'],
                                                      item['replace'])
                    srch_item = srch_item.replace(item['find'],
                                                  item['replace'])
                self.logger.debug(
                    "Updated for soft match: srch_item = {}| srch_in_str = {}".
                    format(srch_item, srch_in_str))
                if srch_item in srch_in_str:
                    out = True
                    soft_match = True
                    self.logger.debug(
                        "Soft match found between: {} | {}".format(
                            srch_item, srch_in_str))
        # prepare log entry
        if out:
            _str = str('Loose' if soft_match else 'Exact') + \
                   ' match was ' + \
                   'found for {} item "{}". Match values are as following: "{}" and "{}".'\
                       .format(entity, item_to_be_reported, srch_item, srch_in_str)

        # log outcome of the match process, the "soft" match will logged as warning
        if out:
            if entity == 'aliquot':
                # if match was found by aliquot (partial id value), always report it as "warning"
                self.logger.warning(_str)
            else:
                # proceed here if match was found by sub-aliquot (full id value)
                if soft_match:
                    self.logger.warning(_str)
                else:
                    self.logger.info(_str)

        # prepare match details to output from this function
        match_type = ''
        if soft_match:
            # this was a soft match
            if entity == 'aliquot':
                match_type = 'loose/aliquot'
            else:
                match_type = 'loose'
        else:
            # this was an exact match
            if entity == 'aliquot':
                match_type = 'exact/aliquot'
            else:
                match_type = 'exact'

        out_details = {'match_type': match_type, 'details': _str}
        return out, out_details

    def create_download_request_file(self):
        self.logger.info("Start preparing download_request file.")
        # path for the script file being created
        rf_path = Path(gc.OUTPUT_REQUESTS_DIR + "/" +
                       time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '_' +
                       self.filename.replace(' ', '') + '.tsv')

        self.download_request_path = rf_path

        if not self.inq_match_arr:
            self.logger.warning(
                'No inquiries with matched datasources exists for the current inquiry file. '
                'Skipping creating a download request file.')
            return

        with open(rf_path, "w") as rf:
            # write headers to the file
            headers = '\t'.join([
                'Source', 'Destination', 'Aliquot_id', 'Obj_Type',
                'Target_Item_Name'
            ])
            rf.write(headers + '\n')

            for item in self.inq_match_arr:
                src_path = item['real_path']  # item['source']['path']

                #prepare values for the current inquiry row to put into the outcome file
                # project_path = self.conf_process_entity.get_value('Destination/location/project_path')
                bulk_data_path = self.conf_main.get_value(
                    'Destination/bulk_data_path')
                study_path = item['study']
                target_subfolder = item[
                    'target_subfolder']  # item['source']['target_subfolder']
                sub_aliquot = item['sub-aliquot']
                obj_type = item['obj_type']
                target_copied_item_name = item['target_copied_item_name']

                # check if current sub-aliquot is not part of disqualified items array
                if self.disqualified_items and sub_aliquot in self.disqualified_items.keys(
                ):
                    # if sub-aliquot was disqualifed already, skip this line
                    continue

                # get template for the destination path and replace placeholders with values
                # "{project_path}/{study_path}/{target_subfolder}"
                dest_path = self.conf_main.get_value(
                    'Destination/path_template')
                dest_path = dest_path.replace('{bulk_data_path}',
                                              bulk_data_path)
                dest_path = dest_path.replace('{study_path}', study_path)
                dest_path = dest_path.replace('{target_subfolder}',
                                              target_subfolder)

                line = '\t'.join([
                    str(src_path),
                    str(Path(dest_path)),
                    str(sub_aliquot),
                    str(obj_type), target_copied_item_name
                ])
                rf.write(line + '\n')

        self.logger.info(
            "Finish preparing download_request file '{}'.".format(rf_path))

    def disqualify_inquiry_item(self, sa, disqualify_status, inquiry_item):
        # adds a sub aliquots to the dictionary of disqualified items
        # key = sub-aliquot, values: dictionary with 2 values:
        #       'status' - reason for disqualification
        #       'inquiry_item: array of values for inquiry row from an inquiry file
        details = {'status': disqualify_status, 'inquiry_item': inquiry_item}
        if not sa in self.disqualified_items:
            self.disqualified_items[sa] = details
            self.logger.warning(
                'Sub-aliquot "{}" was disqualified with the following status: "{}"'
                .format(sa, disqualify_status))
        else:
            self.logger.warning(
                'Sub-aliquot "{}" was already disqualified earlier. '
                'The following disqualification call will be ignored: "{}"'.
                format(sa, disqualify_status))

    def create_inquiry_file_for_disqualified_entries(self):
        if self.disqualified_items:
            self.logger.info(
                "Start preparing inquiry file for disqualified sub-aliquots.")
            # path for the script file being created

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Re-process_inquiry'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            # write headers to the file
            headers = self.lines_arr[0]
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for di in self.disqualified_items:
                fields = self.disqualified_items[di]['inquiry_item']
                cur_col = 0
                for val in fields:
                    sh.write(cur_row, cur_col, val)
                    cur_col += 1
                cur_row += 1

            if not os.path.isabs(gc.DISQUALIFIED_INQUIRIES):
                disq_dir = Path(self.wrkdir) / gc.DISQUALIFIED_INQUIRIES
            else:
                disq_dir = Path(gc.DISQUALIFIED_INQUIRIES)

            # if DISQUALIFIED_INQUIRIES folder does not exist, it will be created
            os.makedirs(disq_dir, exist_ok=True)

            # identify path for the disqualified inquiry file
            self.disqualified_inquiry_path = Path(
                str(disq_dir) + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified_' +
                # .stem method is used to get file name without an extension
                Path(self.filename).stem.replace(' ', '') + '.xls')

            wb.save(str(self.disqualified_inquiry_path))

            self.logger.info(
                "Successfully prepared the inquiry file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_inquiry_path)))
Beispiel #14
0
    def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        if main_cfg:
            self.conf_main = main_cfg
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # if cfg_path=='':
        #     self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # else:
        #     self.conf_main = ConfigData(cfg_path)

        self.error = RequestError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Submission request file {}'.format(filepath))

        # self.file_dict = OrderedDict()
        # self.rows = OrderedDict()

        self.columnlist = []
        self.samples = []
        self.sub_aliquots = []
        self.disqualified_sub_aliquots = {}
        self.aliquots_to_subaliquots_map = {
        }  # holds the map of aliquots to sub-aliquots for interpreting DB responses
        self.disqualified_request_path = ''  # will store path to a request file with disqualified sub-aliquots
        self.project = ''
        self.bulk_location = ''
        self.assay = ''
        self.center = ''
        self.center_id = None
        self.center_code = None
        self.experiment_id = ''
        self.data_source_names = ''
        self.data_source_objects = {
        }  # dictionary to store all collected data sources for the request

        self.aliquots = None
        self.qualified_aliquots = None
        self.raw_data = None
        self.assay_data = None
        self.attachments = None
        self.submission_forms = None
        self.submission_package = None
        self.data_source_names = None
        # will hold value corresponding to the type of data source being used (attachments are not ignored)
        # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used
        self.data_source_forms_assignment = None

        # self.sheet_name = ''
        self.sheet_name = sheet_name.strip()
        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_assay = None

        self.get_file_content()
    def process_inquiry_sources(self):
        cur_row = 0
        for inq_line in self.lines_arr:
            if cur_row == self.header_row_num - 1:
                # skip the header row
                cur_row += 1
                continue

            # get program code assigned to the current row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)
            # get assay assigned to the current row
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            # get source id assigned to the current row
            source_id = self.get_inquiry_value_by_field_name(
                'source_id', inq_line)

            # get source config file
            # 2 values are saved in tuple: program name specific path and default one.
            # if program name specific path does not exist, the default will be used
            cfg_source_path = (
                # configuration path for the current program by name
                gc.CONFIG_FILE_SOURCE_PATH\
                    .replace('{program}', program_code)\
                    .replace('{assay}', assay)\
                    .replace('{source_id}', source_id),
                # configuration path for the default program (used if no program specific path is present)
                gc.CONFIG_FILE_SOURCE_PATH \
                    .replace('{program}', 'default') \
                    .replace('{assay}', assay) \
                    .replace('{source_id}', source_id)
            )
            # get the source location config file path
            cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace(
                '{source_id}', source_id)

            # attempt to load configuration for the program specific path
            cfg_source = ConfigData(Path(cfg_source_path[0]))
            if not cfg_source.loaded:
                # if config was not loaded from the program specific path, load the default one
                cfg_source = ConfigData(Path(cfg_source_path[1]))

            if cfg_source.loaded:
                # proceed here if the source config was loaded
                # load source location config with location specific settings for the current source
                cfg_source_location = ConfigData(
                    Path(cfg_source_location_path))
                if cfg_source_location.loaded:
                    # if the source location config was loaded, update cfg_source config with the source location config
                    cfg_source.update(
                        cfg_source_location.get_whole_dictionary())

                # get unique id of the datasource and check if the same id was used already, reuse that in such case
                inq_line_datasource_id = self.get_inquiry_line_datasource_id(
                    inq_line)
                self.logger.info(
                    'Current inquiry row #{} was identified with the following data source id: {}'
                    .format(cur_row, inq_line_datasource_id))
                # assign source id (inq_line_datasource_id) to the current inquiry line
                self.inq_line_sources[cur_row] = inq_line_datasource_id
                if inq_line_datasource_id in self.inq_sources:
                    # reuse existing datasource
                    self.logger.info(
                        'Identified data source id for the current inquiry row #{} was identified as '
                        'earlier retrieved one (for this or another row) and will be re-used for '
                        'the current row.'.format(cur_row))
                else:
                    # create a new datasource object
                    inq_line_datasource = DataSource(self, cfg_source,
                                                     inq_line,
                                                     inq_line_datasource_id)
                    self.inq_sources[
                        inq_line_datasource_id] = inq_line_datasource
            else:
                sub_al = self.get_inquiry_value_by_field_name(
                    'sub-aliquot', inq_line, False)
                _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \
                       'None of the expected to exist files is accessible: {}'\
                    .format(cur_row, sub_al, ' | '.join(cfg_source_path))
                self.logger.warning(_str)
                self.disqualify_inquiry_item(
                    sub_al, _str, cur_row
                )  # TODO: verify if inq_line should be used instead of curr_row
            cur_row += 1
        pass
Beispiel #16
0
import os
import sys
from os import walk
import getpass
from pathlib import Path
import traceback
from utils import Monitor
from utils import ConfigData, common as cm, common2 as cm2, global_const as gc, send_yagmail  #, send_email as email

# if executed by itself, do the following
if __name__ == '__main__':

    gc.CURRENT_PROCCESS_LOG_ID = 'monitor_file'
    # load main config file and get required values
    m_cfg = ConfigData(gc.MAIN_CONFIG_FILE)

    # setup application level logger
    cur_dir = Path(os.path.dirname(os.path.abspath(__file__)))
    mlog, log_handler = cm.setup_logger(m_cfg, cur_dir,
                                        gc.CURRENT_PROCCESS_LOG_ID)
    monitor_path = m_cfg.get_value('Location/monitor_configs')

    # Verify that target directory (df_path) is accessible for the current user (under which the app is running)
    # Identify the user under which the app is running if the df_path is not accessible
    if not os.path.exists(monitor_path):
        _str = 'Directory "{}" does not exist or not accessible for the current user. Aborting execution. ' \
               'Expected user login: "******", Effective user: "******"'.format(monitor_path, os.getlogin(),getpass.getuser())
        mlog.error(_str)

        # send notification email alerting about the error case
        email_subject = 'Error occurred during running file_monitoring tool.'
Beispiel #17
0
class Request(File):
    def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        if main_cfg:
            self.conf_main = main_cfg
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # if cfg_path=='':
        #     self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # else:
        #     self.conf_main = ConfigData(cfg_path)

        self.error = RequestError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Submission request file {}'.format(filepath))

        # self.file_dict = OrderedDict()
        # self.rows = OrderedDict()

        self.columnlist = []
        self.samples = []
        self.sub_aliquots = []
        self.disqualified_sub_aliquots = {}
        self.aliquots_to_subaliquots_map = {
        }  # holds the map of aliquots to sub-aliquots for interpreting DB responses
        self.disqualified_request_path = ''  # will store path to a request file with disqualified sub-aliquots
        self.project = ''
        self.bulk_location = ''
        self.assay = ''
        self.center = ''
        self.center_id = None
        self.center_code = None
        self.experiment_id = ''
        self.data_source_names = ''
        self.data_source_objects = {
        }  # dictionary to store all collected data sources for the request

        self.aliquots = None
        self.qualified_aliquots = None
        self.raw_data = None
        self.assay_data = None
        self.attachments = None
        self.submission_forms = None
        self.submission_package = None
        self.data_source_names = None
        # will hold value corresponding to the type of data source being used (attachments are not ignored)
        # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used
        self.data_source_forms_assignment = None

        # self.sheet_name = ''
        self.sheet_name = sheet_name.strip()
        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_assay = None

        self.get_file_content()

    def get_file_content(self):
        if not self.columnlist:
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lineList = None
                            self.loaded = False
                            return self.lineList

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the request file as an array of arrays (rows)
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the request file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        lines[j].append(
                            '"' + str(cell_value) + '"'
                        )  # adds value in "csv" format for a current row

                    # self.columnlist.append(','.join(column))
                    self.columnlist.append(
                        column)  # adds a column to a list of columns

                # populate lineList property
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(ln))

                wb.unload_sheet(sheet.name)

                # load passed request parameters (by columns)
                self.get_request_parameters()

                # validate provided information
                self.logger.info(
                    'Validating provided request parameters. project: "{}", bulk location: "{}", '
                    'assay: "{}", db_center_code_or_id: "{}",'
                    'Sub-Aliquots: "{}"'.format(self.project,
                                                self.bulk_location, self.assay,
                                                self.center,
                                                self.sub_aliquots))
                self.validate_request_params()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True
                    _str = 'Request parameters were successfully validated - no errors found.'
                self.logger.info(_str)

                # combine Experiment_id out of request parameters
                if self.center_code and len(self.center_code.strip()) > 0:
                    # use center code if available
                    self.experiment_id = "_".join(
                        [self.project, self.center_code, self.assay])
                else:
                    # use provided value for the center column from request, if center_code is not available
                    self.experiment_id = "_".join(
                        [self.project, self.center, self.assay])

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columnlist = None
                self.lineList = None
                self.loaded = False
        return self.lineList

    # get all values provided in the request file
    def get_request_parameters(self):
        self.project = self.columnlist[0][1]
        self.bulk_location = self.columnlist[1][1]
        self.assay = self.columnlist[2][1].lower()
        self.center = self.columnlist[3][
            1]  # center code (if alpha numeric) or center id (if numeric)
        self.sub_aliquots = self.columnlist[4]
        if self.sub_aliquots and len(self.sub_aliquots) > 0:
            self.sub_aliquots.pop(0)  # get rid of the column header
        # self.samples = self.columnlist[5]
        # if self.samples and len(self.samples) > 0:
        #     self.samples.pop(0) # get rid of the column header

    # validates provided parameters (loaded from the submission request file)
    def validate_request_params(self):
        _str_err = ''
        _str_warn = ''
        if len(self.sub_aliquots) == 0:
            _str_err = '\n'.join([
                _str_err, 'List of provided sub-samples is empty. '
                'Aborting processing of the submission request.'
            ])
        # Check if empty sub-samples were provided
        if '' in self.sub_aliquots:
            i = 0
            cleaned_cnt = 0
            for s in self.sub_aliquots:
                # check for any empty sub-aliquot values and remove them
                if len(s.strip()) == 0:
                    self.sub_aliquots.pop(i)
                    cleaned_cnt += 1
                else:
                    i += 1
            if cleaned_cnt > 0:
                _str_warn = '\n'.join([
                    _str_warn,
                    'Empty sub-aliqouts (count = {}) were removed from the list. '
                    'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" '
                    .format(cleaned_cnt, len(self.sub_aliquots),
                            self.sub_aliquots)
                ])
        # check for empty values
        if len(self.project) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Program name was provided. Aborting processing of the submission request.'
            ])
        if len(self.bulk_location) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Bulk Location was provided. Aborting processing of the submission request.'
            ])
        if len(self.assay) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Assay was provided. Aborting processing of the submission request.'
            ])
        if len(self.center) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No DB Center information was provided. Aborting processing of the submission request.'
            ])

        # check for values that should match some predefined values from a dictionary
        # check assay value
        if not cm2.key_exists_in_dict(self.assay, 'assay'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Assay name "{}" is not matching a list of expected assay names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.assay, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.assay = cm2.get_dict_value(self.assay, 'assay')

        # check project value
        if not cm2.key_exists_in_dict(self.project.lower(), 'project'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Program name "{}" is not matching a list of expected names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.project, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.project = cm2.get_dict_value(self.project.lower(), 'project')

        # validate center_code or center_id value
        self.logger.info(
            'Start validation of center value "{}" provided in the request'.
            format(self.center))
        db = DBAccess(self.logger, self.error,
                      self.conf_main)  # create DBAccess object
        db.open_connection()
        # test center value assuming center code was provided
        dataset = db.validate_center_code(self.center, self.project, 'code',
                                          'code')
        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
            dataset, 'center_id', 'center_code')
        if center_id_out1:
            # center id was returned, meaning center was validated fine
            self.center_id = center_id_out1
            # get center code value from the current DB dataset
            _str_err_out3, center_code = self.get_field_value_from_dataset(
                dataset, 'center_code')
            if center_code:
                # center code retrieved OK
                self.center_code = center_code
            else:
                # report an error during retrieving center_code
                _str_err = '\n'.join([_str_err, _str_err_out3])
        else:
            # if center code was not validated at first attempt, validate it assuming the center id was given
            dataset = db.validate_center_code(self.center, self.project, 'id',
                                              'code')
            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                dataset, 'center_id', 'center_id')
            if center_id_out2:
                # center id was validated at the 2nd attempt, ignore the 1st failed center code validation
                self.center_id = center_id_out2
                # get center code value from the current DB dataset
                _str_err_out3, center_code = self.get_field_value_from_dataset(
                    dataset, 'center_code')
                if center_code:
                    # center code retrieved OK
                    self.center_code = center_code
                else:
                    # report an error during retrieving center_code
                    _str_err = '\n'.join([_str_err, _str_err_out3])
            else:
                # center validation attempts failed, report both failures
                _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2])

        # get list of aliquots from list of sub-aliquots
        self.aliquots = [
            cm2.convert_sub_aliq_to_aliquot(al, self.assay)
            for al in self.sub_aliquots
        ]

        # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots)
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            self.aliquots_to_subaliquots_map[a] = sa

        if self.center_id:
            self.logger.info('Start validation of aliquot ids vs DB')
            # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB
            dataset = db.validate_aliquot_ids(self.center_id, self.aliquots)
            if dataset:
                # create dictionary of received aliquots/sample ids
                aliquots_to_samples_map = {}
                for row in dataset:
                    if '_aliquot_id' in row and '_sample_id' in row:
                        aliquots_to_samples_map[
                            row['_aliquot_id']] = row['_sample_id']
                # check if each aliquot id was returned from a database and get the sample id from the dataset
                for sa, a in zip(self.sub_aliquots, self.aliquots):
                    if a in aliquots_to_samples_map:
                        if len(str(aliquots_to_samples_map[a]).strip()) > 0:
                            self.samples.append(aliquots_to_samples_map[a])
                        else:
                            _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \
                                   'The sub-aliquot was disqualified'.format(sa)
                            self.disqualify_sub_aliquot(sa, _str)
                            _str_warn = '\n'.join([_str_warn, _str])
                    else:
                        _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format(
                            sa)
                        self.disqualify_sub_aliquot(sa, _str)
                        _str_warn = '\n'.join([_str_warn, _str])
            else:
                _str_err = '\n'.join([
                    _str_err,
                    'Aliquot ids cannot be validated since no data was returned from DB for '
                    'center_id = "{}" and aliquot ids as following: {} '.
                    format(self.center_id, self.aliquots)
                ])
        db = None

        # report any collected errors
        if len(_str_err) > 0:
            _str_err = 'Validation of request parameters:' + _str_err
            self.error.add_error(_str_err)
            self.logger.error(_str_err)
        # report any collected warnings
        if len(_str_warn) > 0:
            _str_warn = 'Validation of request parameters:' + _str_warn
            self.logger.warning(_str_warn)

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        row_num = 1
        validation_id_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]  # get the first row of the dataset
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB has Failed, description: {}'
                    .format(validation_id_name, description)
                ])
            else:  # unexpected status value was returned
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB returned unexpected status {}'
                    .format(validation_id_name, status)
                ])
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during validating {} in the DB. '
                'Check earlier entries in the log file.'.format(
                    validation_id_name)
            ])

        return _str_err, validation_id_out

    def get_field_value_from_dataset(self, dataset, field_name, row_num=None):
        # set default values
        if row_num is None:
            row_num = 1  # default row is #1

        _str_err = ''
        value_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]
                if field_name in row:
                    value_out = row[field_name]
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. '
                .format(field_name, row_num)
            ])

        return _str_err, value_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.REQ_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.REQUEST_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/request_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    def load_request_configuration(self):
        # update main config file with the project/environmetn specific details from additional config files
        self.load_project_config_into_main(
            self.project
        )  # loads project specific config and merges it into main config
        # load project specific assay config file
        self.conf_assay = self.load_assay_conf(self.assay, self.project)
        if self.conf_assay:
            # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml
            self.conf_assay = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay)

    def process_request(self):
        self.data_source_names = cm.get_value_from_dictionary(
            'data_sources', self.conf_assay)  # self.conf_assay['data_sources']

        # path to the folder where created submission packages will be located.
        # since this location can be provided in the project config file, this assignment is happening
        # after loading the project config
        gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value(
            'Submission_location/output_packages')

        for data_source_name in self.data_source_names:
            # if isinstance(data_source_name, tuple)
            if isinstance(data_source_name, str):
                if data_source_name == 'attachment':
                    self.attachments = Attachment(self)
                elif data_source_name[-3:] == "_db":
                    self.data_source_objects[data_source_name] = DataSourceDB(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'db'
                else:
                    self.data_source_objects[data_source_name] = DataSource(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'file'
            elif isinstance(data_source_name, tuple):
                if data_source_name[0][-3:] == "_db":
                    self.data_source_objects[
                        data_source_name[0]] = DataSourceDB(
                            self, data_source_name[0], data_source_name[1])
                else:
                    self.data_source_objects[data_source_name[0]] = DataSource(
                        self, data_source_name[0], data_source_name[1])
            else:
                self.logger.error(
                    'Provided data source name ({}) is of unexpected format and cannot be processed.'
                    .format(data_source_name))

        # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it
        # this a case when an assay submits only attachments and do not use any assay or QC data
        if not self.data_source_forms_assignment:
            self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT

        self.submission_package = SubmissionPackage(self)

        self.create_request_for_disqualified_sub_aliquots()

        self.create_trasfer_script_file()

        # check for errors and put final log entry for the request.
        if self.error.exist():
            _str = 'Processing of the current request was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current request was finished successfully.\n'
            self.logger.info(_str)

    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config

    # def update_cfg_assay_with_location_details(self, project, cfg_assay):
    #     cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project))
    #     if cfg_assay_location.loaded:
    #         self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path))
    #         cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary())
    #     else:
    #         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
    #             .format(cfg_assay_location.cfg_path)
    #         self.logger.error(_str)
    #         self.error.add_error(_str)
    #     return cfg_assay

    def update_cfg_dictionary_with_location_details(self, location_path,
                                                    project, cfg_to_update):
        cfg_location = ConfigData(location_path.replace('{project}', project))
        if cfg_location.loaded:
            self.logger.info(
                'Local config file "{}" was loaded and being used.'.format(
                    cfg_location.cfg_path))
            cfg_to_update = cm.update_dictionary_matching_keys(
                cfg_to_update, cfg_location.get_whole_dictionary())
        else:
            _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
                .format(cfg_location.cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)
        return cfg_to_update

    def load_project_config_into_main(self, project):
        # load project specific "project_config" config file
        cfg_project = ConfigData(
            gc.CONFIG_FILE_PROJECT.replace('{project}', project))
        if cfg_project.loaded:
            # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
            cfg_project_updated = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
                cfg_project.get_whole_dictionary())
            # update main config with the outcome of the previous updates
            self.conf_main.update(cfg_project_updated)

    def create_trasfer_script_file(self):
        self.logger.info("Start preparing transfer_script.sh file.")
        # path for the script file being created
        sf_path = Path(self.submission_package.submission_dir +
                       "/transfer_script.sh")

        # get script file template
        with open('scripts/' + self.project + '/transfer_script.sh',
                  'r') as ft:
            scr_tmpl = ft.read()

        # update placeholders in the script with the actual values
        smtp_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_env_name'))
        smtp_port = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_port_env_name'))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!to_email!}",
            ','.join(self.conf_main.get_value("Email/sent_to_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!from_email!}",
            self.conf_main.get_value("Email/default_from_email"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!send_email_flag!}",
            str(self.conf_main.get_value("Email/send_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!cmd!}",
            self.conf_main.get_value("DataTransfer/transfer_command"))

        # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mp_cmd!}",
            self.conf_main.get_value("DataTransfer/mount_point_command"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_local_dir!}",
            self.conf_main.get_value("DataTransfer/mount_local_dir"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_remote_dir!}",
            self.conf_main.get_value("DataTransfer/mount_remote_dir"))

        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!target_dir!}",
            self.conf_main.get_value("DataTransfer/remote_target_dir"))

        ssh_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}",
                                              str(ssh_server))
        # apply user name as the very last replacement statement, since it can be used as part of previous replacements
        ssh_user = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}",
                                              str(ssh_user))

        set_permissions = False
        set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis")
        if set_perm_value:
            try:
                exec_permission = eval(set_perm_value.strip())
                set_permissions = True
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \
                       '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\
                    .format(ex, set_perm_value, traceback.format_exc())
                self.logger.warning(_str)
                # self.error.add_error(_str)
                set_permissions = False

        with open(sf_path, "w") as sf:
            sf.write(scr_tmpl)

        if set_permissions:
            try:
                # if permissions to be set were retrieved from config file, set them here
                st = os.stat(sf_path)
                os.chmod(sf_path, st.st_mode | exec_permission)  #stat.S_IXUSR
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \
                       '"{}". \n{} '\
                    .format(ex, set_perm_value, sf_path, traceback.format_exc())
                self.logger.warning(_str)
                self.error.add_error(_str)
        else:
            _str = 'Permission setup was skipped for the transfer script file. ' \
                   'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\
                                    .format(set_perm_value)
            self.logger.warning(_str)

        self.logger.info("Finish preparing '{}' file.".format(sf_path))

    def disqualify_sub_aliquot(self, sa, details):
        # adds a sub aliquots to the disctionary of disqualified sub_aliquots
        # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons
        if sa in self.disqualified_sub_aliquots.keys():
            self.disqualified_sub_aliquots[sa].append(details)
        else:
            arr_details = [details]
            self.disqualified_sub_aliquots[sa] = arr_details
        self.logger.warning(
            'Sub-aliquot "{}" was disqualified with the following details: "{}"'
            .format(sa, details))

    def populate_qualified_aliquots(self):
        # reset self.qualified_aliquots array
        self.qualified_aliquots = []
        #select only aliquots that were not disqualified
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            if not sa in self.disqualified_sub_aliquots.keys():
                self.qualified_aliquots.append(a)

    def create_request_for_disqualified_sub_aliquots(self):

        # proceed only if some disqualified sub-aliquots are present
        if self.disqualified_sub_aliquots:

            self.logger.info(
                "Start preparing a request file for disqualified sub-aliquots '{}'."
                .format([val
                         for val in self.disqualified_sub_aliquots.keys()]))

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Submission_Request'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            #write headers to the file
            headers = self.get_headers()
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for sa in self.sub_aliquots:
                if sa in self.disqualified_sub_aliquots.keys():
                    sh.write(cur_row, 0, self.project)
                    sh.write(cur_row, 1, self.bulk_location)
                    sh.write(cur_row, 2, self.assay)
                    sh.write(cur_row, 3, self.center)
                    sh.write(cur_row, 4, sa)
                    cur_row += 1

            self.disqualified_request_path = Path(
                gc.DISQUALIFIED_REQUESTS + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified _' + Path(self.filename).stem +
                '.xls')

            # if DISQUALIFIED_REQUESTS folder does not exist, it will be created
            os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True)

            wb.save(str(self.disqualified_request_path))

            self.logger.info(
                "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_request_path)))
Beispiel #18
0
class Monitor():
    def __init__(self, cfg_monitor_path, log_obj):
        self.action_completed = False
        self.status = []

        self.mtr_cfg_path = cfg_monitor_path
        self.log = log_obj
        self.error = MonitorError(self)
        self.mtr_cfg = ConfigData(cfg_monitor_path)
        if self.validate_config_file():
            self.loaded = True
        else:
            self.loaded = False
        cur_cfg_dir = os.path.dirname(cfg_monitor_path)
        cur_cfg_file_name = Path(os.path.abspath(cfg_monitor_path)).name
        stamp_dir = Path(str(cur_cfg_dir) + '/' + gc.STAMPS_FILES_FOLDER_NAME)
        if not os.path.exists(stamp_dir):
            os.mkdir(stamp_dir)
        stamp_file = Path(
            str(stamp_dir) + '/' +
            cur_cfg_file_name.replace('.yaml', '_stamp.yaml'))
        self.verify_config_stamp_file(stamp_file)
        self.mtr_cfg_stamp = ConfigData(stamp_file)

        self.mtr_source = None
        self.mtr_source_path = None

        if self.loaded:
            # get config file values
            self.mtr_source_dir = Path(
                cm.eval_cfg_value(
                    self.mtr_cfg.get_value('Location/source_dir'), self.log,
                    None))
            self.mtr_source_file = Path(
                self.mtr_cfg.get_value('Location/source_file'))
            found_files = cm.find_file_in_dir(self.mtr_source_dir,
                                              self.mtr_source_file, False)
            if found_files:
                ff_stamp = None
                for file_match in found_files:
                    if not ff_stamp or ff_stamp < os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime:
                        ff_stamp = os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime
                        self.mtr_source = file_match
                # self.mtr_source = found_files[0]
                self.mtr_source_path = Path(
                    self.mtr_source_dir) / self.mtr_source
            # else:
            #    self.mtr_source = None
            #    self.mtr_source_path = None
            self.mtr_destin = self.mtr_cfg.get_value('Location/destination')
            self.mtr_item = self.mtr_cfg.get_value('Monitoring/item')
            self.mtr_type = self.mtr_cfg.get_value('Monitoring/type')
            self.mtr_action = self.mtr_cfg.get_value('Monitoring/action')
            self.mtr_frequency = self.mtr_cfg.get_value('Monitoring/frequency')
            # self.mtr_email = self.mtr_cfg.get_value('Monitoring/email_notification')
            # self.mtr_email_cc = self.mtr_cfg.get_value('Monitoring/email_cc')
            # load stamp info from stamp config file
            self.mtr_sync_date = self.mtr_cfg_stamp.get_value(
                'Last_sync/date_time')
            self.mtr_watch_value = self.mtr_cfg_stamp.get_value(
                'Last_sync/watch_value')

    def verify_config_stamp_file(self, file_path):
        if not cm.file_exists(file_path):
            # if file is not present, create it
            f = open(file_path, "w+")
            f.close

    def validate_config_file(self):
        # TODO: add some rules to validate the current monitoring config file
        return True

    def start_monitor(self):

        if self.mtr_source_path:
            next_sync_datetime = None  # default value
            # check if delay between monitoring events was fulfilled
            if self.mtr_sync_date and str(self.mtr_frequency).isnumeric():
                try:
                    next_sync_datetime = datetime.strptime(self.mtr_sync_date, gc.STAMP_DATETIME_FORMAT) + \
                                         timedelta(seconds=self.mtr_frequency)
                except Exception as ex:
                    # report unexpected error to log file
                    _str = 'Unexpected Error "{}" occurred during calculating next sync datetime. ' \
                           'Saved sync date: "{}", sync frequency: "{}"' \
                        .format(ex, self.mtr_sync_date, self.mtr_frequency)
                    self.status.append(_str)
                    _str = _str + '\n{} '.format(traceback.format_exc())
                    self.log.error(_str)
                    self.error.add_error(_str)

            if not next_sync_datetime or next_sync_datetime < datetime.now():
                self.log.info(
                    'Monitoring delay of "{}" seconds has expired since the last syncronization event on {}. '
                    'Proceeding to monitor "{}" file.'.format(
                        self.mtr_frequency if self.mtr_frequency else 'N/A',
                        self.mtr_sync_date if self.mtr_sync_date else 'N/A',
                        self.mtr_source))
                custom_action = self.action_copy  # set default value
                if self.mtr_action == 'copy':
                    custom_action = self.action_copy
                watcher = Watcher(
                    self.mtr_source_path, custom_action, self,
                    self.mtr_watch_value)  # self.mtr_item, self.mtr_type)
                watcher.watch()  # start the watch going

                # update stats in the config file
                datetime_stamp = time.strftime(gc.STAMP_DATETIME_FORMAT,
                                               time.localtime())
                self.mtr_cfg_stamp.set_value(datetime_stamp,
                                             'Last_sync/date_time')
                self.log.info(
                    'Datetime information for monitored file was recorded: Last_sync/date_time: {}'
                    .format(datetime_stamp))

            else:
                _str = 'Monitoring delay of "{}" seconds has not expired since the last syncronization event on {}. '\
                        .format(self.mtr_frequency if self.mtr_frequency else 'N/A',
                                self.mtr_sync_date if self.mtr_sync_date else 'N/A')
                self.log.info(_str)
                self.status.append(_str)
        else:
            _str = 'Source file "{}" was not found in the source directory "{}". '\
                .format(self.mtr_source_file, self.mtr_source_dir)
            self.log.warning(_str)
            self.status.append(_str)

    def action_copy(self, file_time_stamp):
        self.log.info('Start copying "{}" to "{}"'.format(
            self.mtr_source, self.mtr_destin))
        self.new_file_time_stamp = file_time_stamp
        try:
            shutil.copy(self.mtr_source_path, self.mtr_destin)
            _str = 'Copying of "{}" to "{}" completed successfuly.'.format(
                self.mtr_source_path, self.mtr_destin)
            self.log.info(_str)

            self.action_completed = True
            self.status.append(_str)

            # update stats in the config file
            self.mtr_cfg_stamp.set_value(file_time_stamp,
                                         'Last_sync/watch_value')
            self.log.info(
                'Stamp information for just copied file was recorded: '
                'Last_sync/watch_value: {}'.format(file_time_stamp))

        except Exception as ex:
            # report unexpected error to log file
            _str = 'Unexpected Error "{}" occurred during copying file "{}" to "{}"\n{} ' \
                .format(ex, self.mtr_source, self.mtr_destin, traceback.format_exc())
            self.log.error(_str)
            self.error.add_error(_str)
            self.status.append(_str)
Beispiel #19
0
Created on Oct. 12 2011

@author: Jason MacWilliams
"""

import sys, os, time, signal, subprocess, atexit
import zipfile
from optparse import OptionParser
#import pdb

from utils.commonFedora import connectToFedora
from utils.ConfigData import *

import Navigator

config = ConfigData()

# try to handle an abrupt shutdown more cleanly
# we also hit the shutdown handler after this, so don't bother sending it now
def shutdown_handler(signum, frame):
    # is there enough time to save the script state, do we even have to?
    print("Script terminating with signal %d" % signum)
    config.message.addLine("Script was terminated with signal %d" % signum)
    # we might also have to remove the last object as it may be corrupt
    # need to look into how an interrupt can interfere with shutil.copy, os.chown, and ffmpeg
    sys.exit(1)

def sendReport():
    config.message.send()

""" ====== M A I N ====== """
Beispiel #20
0
def get_main_config():
    if not gc.main_cfg:
        gc.main_cfg = ConfigData(gc.MAIN_CONFIG_FILE)
    return gc.main_cfg
    def validate_inquiry_file(self):
        self.logger.info(
            'Start validating the current inquiry file "{}".'.format(
                self.filepath))
        row_count = 1
        failed_cnt = 0
        valid_aliquot_flag = self.conf_main.get_value(
            'Validate/aliquot_id_vs_manifest')
        valid_inquiry_values_flag = self.conf_main.get_value(
            'Validate/inquiry_values_vs_dictionary')
        inquiry_min_number_columns = self.conf_main.get_value(
            'Validate/inquiry_min_number_columns')
        inquiry_validate_number_columns = self.conf_main.get_value(
            'Validate/inquiry_validate_number_columns')
        if not inquiry_min_number_columns or not isinstance(
                inquiry_min_number_columns, int):
            inquiry_min_number_columns = 6  # set a default value if it is not provided in the config file
        if not inquiry_validate_number_columns or not isinstance(
                inquiry_validate_number_columns, int):
            inquiry_validate_number_columns = 6  # set a default value if it is not provided in the config file

        for row in self.lines_arr:
            if row_count == self.header_row_num:  # 1
                # skip the first column as it is a header
                row_count += 1
                continue

            sub_al = 'ND'  # set blank value as default
            assay = ''  # set blank value as default
            valid_aliquot_performed = False
            skip_final_check = False

            # check if inquiry file contain min number of columns
            if len(row) < inquiry_min_number_columns:
                # disqualify the current inquiry file
                _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \
                    .format(len(row), inquiry_min_number_columns)
                self.error.add_error(_str)
                self.logger.error(_str)
                return
            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # get sub-aliquot value before looping through all fields, so it can be used for reporting errors
            # also get program_code assigned to the row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', row)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', row, False)

            # validate program_code value
            if conf_dict.key_exists_in_dict(
                    str(program_code).lower(), 'program_code'):
                # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
                conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\
                    .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code'))
                conf_dict_program = ConfigData(conf_dict_program_path)
                conf_dict.update(conf_dict_program.get_whole_dictionary())
            else:
                _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \
                    .format(program_code, row_count)
                self.logger.critical(_str)
                # disqualify an inquiry file row, if unexpected value was provided
                self.disqualify_inquiry_item(sub_al, _str, row)
                failed_cnt += 1
                skip_final_check = True

            if not skip_final_check:
                # go through fields and validate the provided values
                for i in range(len(row)):
                    if i + 1 > inquiry_validate_number_columns:
                        # if number of columns in the inquiry file > expected maximum, exit the loop
                        break
                    col_category = conf_dict.get_dict_value(
                        str(i + 1), 'inquiry_file_structure')
                    if col_category in ('program_code', 'sub-aliquot'):
                        # no checking is needed for the listed field, proceed further
                        continue
                    elif col_category == 'db_center_id':
                        # get center id value and validate it
                        db_center_id = row[i]
                        # validate center_code or center_id value
                        self.logger.info(
                            'Start validation of center value "{}" provided for the current row'
                            .format(db_center_id))
                        db = DBAccess(self.logger, self.conf_main,
                                      self.error)  # create DBAccess object
                        db.open_connection()
                        # test center value assuming center code was provided
                        dataset = db.validate_center_code(
                            db_center_id, program_code, 'code', 'code')
                        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
                            dataset, 'center_id', 'center_code')
                        if center_id_out1:
                            # center id was returned, meaning center was validated fine
                            db_center_id = center_id_out1
                        else:
                            # if center code was not validated at first attempt, validate it assuming the center id was given
                            dataset = db.validate_center_code(
                                db_center_id, program_code, 'id', 'code')
                            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                                dataset, 'center_id', 'center_id')
                            if center_id_out2:
                                # center id was validated at the 2nd attempt, ignore the 1st validation attempt
                                db_center_id = center_id_out2
                            else:
                                # center validation attempts failed, report both failures
                                _str = 'Provided center value cannot be interpreted neither as code nor id; ' \
                                       'here are both validation outcomes: ' + \
                                       ' | '.join([_str_err_out1, _str_err_out2])
                                self.logger.warning(_str)
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                break

                        # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value
                        if valid_aliquot_flag:
                            # aliquot id validation is required
                            valid_aliquot_performed = True  # flag that aliquot validation was done
                            if isinstance(db_center_id,
                                          int):  # db_center_id.isnumeric():
                                # since center is numeric, proceed here
                                # get aliquot id based on the verified earlier assay value and given sub_aliquot id
                                aliquot = conf_dict.convert_sub_aliq_to_aliquot(
                                    sub_al, assay)
                                valid_status, valid_desc = self.db_access.validate_aliquot_id(
                                    aliquot, db_center_id)
                                if valid_status != 'OK':
                                    # disqualify an inquiry file row, if returned status is not OK
                                    _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \
                                           'of the database. DB response => Status: "{}"; Description: "{}".'\
                                        .format(aliquot, row_count, valid_status, valid_desc)
                                    self.logger.warning(_str)
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                            else:
                                # report unexpected center id value
                                _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \
                                       'critical error because this value is required (based on the configuration setting ' \
                                       '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \
                                    .format(db_center_id, row_count, i + 1, sub_al)
                                self.logger.warning(_str)
                                # disqualify an inquiry file row, if unexpected value was provided
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                # break
                        else:
                            self.logger.info(
                                'Validating of the provided aliquot_id "{}" is not required based on the '
                                'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".'
                                .format(sub_al, valid_aliquot_flag))
                    else:
                        if col_category == 'assay':
                            assay = row[i].strip().lower(
                            )  # save assay value to a dedicated variable
                        if valid_inquiry_values_flag:
                            # if validation of the inquiry values vs dictionary is required
                            validate_values = []
                            validate_categories = []
                            if col_category == 'bulk_location':
                                # get inquiry_file_structure_bulk_location value
                                bulk_value_delim = conf_dict.get_dict_value(
                                    'inquiry_file_structure_bulk_location_delim',
                                    '')
                                validate_values = str(
                                    row[i]).split(bulk_value_delim)
                                validate_categories = conf_dict.get_dict_object(
                                    'inquiry_file_structure_bulk_location', '')
                            else:
                                validate_values.append(str(row[i]).lower())
                                validate_categories.append(col_category)
                            for vv, vc in zip(validate_values,
                                              validate_categories):
                                if not conf_dict.key_exists_in_dict(
                                        vv.lower(), vc):
                                    if col_category == 'bulk_location':
                                        _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \
                                               'the "bulk_location" value (line #{}, column #{})' \
                                            .format(vv, vc, row_count, i + 1)
                                    else:
                                        _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\
                                            .format(vv, vc, row_count, i+1)
                                    self.logger.critical(_str)
                                    # disqualify an inquiry file row, if unexpected value was provided
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                    if skip_final_check:
                        break

            # check that if aliquot validation is required it was actually performed
            if not skip_final_check:
                if valid_aliquot_flag and not valid_aliquot_performed:
                    _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \
                           '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \
                           'was not provided). ' \
                        .format(row_count)
                    self.logger.critical(_str)
                    # disqualify an inquiry file row, if unexpected value was provided
                    self.disqualify_inquiry_item(sub_al, _str, row)
                    failed_cnt += 1

            row_count += 1

        self.logger.info('Finish validating the inquiry file with{}.'.format(
            ' no errors' if failed_cnt == 0 else
            ' errors; {} records were disqualified - see earlier log entries for details'
            .format(failed_cnt)))
class MappingFileText(File):
    def __init__(self,
                 filepath,
                 conf_source,
                 log_obj,
                 file_type=None,
                 file_delim=None):
        # setup default parameters
        if file_type is None:
            file_type = 1
        if file_delim is None:
            file_delim = ','  #'\t'

        File.__init__(self, filepath, file_type, file_delim)

        self.conf_src = ConfigData('', conf_source)
        self.logger = log_obj
        self.map = {
        }  # it will hold a dict where key is an aliquot id and value is the relative path to the file

        # set file properties before loading it
        self.file_delim = self.conf_src.get_value('file_delim') \
            if self.conf_src.get_value('file_delim') else self.file_delim
        self.header_row_num = self.conf_src.get_value('header_row_num') \
            if self.conf_src.get_value('header_row_num') else self.header_row_num

        # load the file
        self.get_file_content()

    def load_map(self, data_loc):
        disqualify = None
        aliquot_id_col_num = self.conf_src.get_value('aliquot_id_column_num')
        template_fields_col_num = self.conf_src.get_value(
            'template_fields_col_num')
        file_path = self.conf_src.get_value('file_path_template')
        # raw_file_name = self.conf_src.get_value('file_name_template')

        if aliquot_id_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "aliquot_id_col_num" was not provided.'
        if template_fields_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "template_fields_col_num" was not provided.'
        if template_fields_col_num is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.'
        if file_path is None:
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Expected map file\'s configuration parameter "file_path_template" was not provided.'
        if not isinstance(aliquot_id_col_num, int):
            disqualify = ('' if disqualify is None else disqualify + '| ')
            disqualify = disqualify + 'Non-integer value was provided for the map file\'s "aliquot_id_col_num" parameter.'
        for entry in template_fields_col_num:
            if not isinstance(template_fields_col_num[entry], int):
                disqualify = ('' if disqualify is None else disqualify + '| ')
                disqualify = disqualify + 'Non-integer value was provided for the map file\'s {} parameter.'.format(
                    entry)

        if disqualify is None:
            row_num = 0
            for row in self.lineList:
                row_num += 1
                if row_num <= self.header_row_num:
                    continue

                cur_aliquot_id = row[aliquot_id_col_num - 1]
                cur_fields = copy.deepcopy(template_fields_col_num)
                cur_raw_file_path = file_path
                # cur_raw_file_name = raw_file_name

                # combine path of the data file for the current row of mapping file
                for fld_name in cur_fields:
                    fld_val = row[cur_fields[fld_name] - 1]
                    cur_raw_file_path = cur_raw_file_path.replace(
                        '{' + fld_name + '}', fld_val)

                # print (str(Path(data_loc) / cur_raw_file_path))
                files = glob.glob(str(Path(data_loc) / cur_raw_file_path))

                if files:
                    for file in files:
                        if not cur_aliquot_id in self.map:
                            self.map[cur_aliquot_id] = []
                        self.map[cur_aliquot_id].append(file)

        return disqualify
def process_download_inquiries():

    # load main config file and get required values
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1

    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both configs were loaded, update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())
    # print ('m_cfg = {}'.format(m_cfg.cfg))
    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new inquiry files will be posted
    inquiries_loc = m_cfg.get_value('Location/inquiries')

    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing inquiry files will be stored
    # (one file per inquiry)
    gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path')
    # path to the folder where all processed (and renamed) inquiries will be stored
    gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value(
        'Location/inquiries_processed_relative_path')
    # get config setting for the processed_add_datestamp and save it to global const module
    processed_add_datestamp = m_cfg.get_value(
        'Location/processed_add_datestamp')
    if processed_add_datestamp:
        gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp
    # path to the folder where created submission packages will be located. One package sub_folder per inquiry.
    gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests')
    # path to dir with dynamically created inquiry files for disqualified aliquots
    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified_path')

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME

    # this variable define if Data Downloader app will be executed at the end of processing inquiries
    run_data_download = m_cfg.get_value('Execute/run_data_downloader')
    # path to the Data Downloader tool
    gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path')

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    # email_attchms = []

    inquiries_path = Path(inquiries_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']

    mlog.info(
        'Start processing download inquiries in "{}"'.format(inquiries_path))

    try:

        (root, source_inq_dirs, _) = next(walk(inquiries_path))

        inq_proc_cnt = 0
        errors_present = 'OK'

        for inq_dir in source_inq_dirs:
            source_inquiry_path = Path(root) / inq_dir
            mlog.info(
                'Selected for processing inquiry source: "{}", full path: {}'.
                format(inq_dir, source_inquiry_path))

            (_, _, inq_files) = next(walk(source_inquiry_path))

            # filter only excel files for processing as inquiries
            inquiries = [
                fl for fl in inq_files if fl.endswith(('xlsx', 'xls'))
            ]
            # filter out temp files (starting with '~$') created when an excel file is open
            inquiries = [fl for fl in inquiries if not fl.startswith('~$')]

            mlog.info('Inquiry files presented (count = {}): "{}"'.format(
                len(inquiries), inquiries))

            for inq_file in inquiries:
                inq_path = Path(source_inquiry_path) / inq_file

                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(inq_path))
                    mlog.info('The following Inquiry file was selected: "{}".'.
                              format(inq_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    inq_obj = Inquiry(inq_path, m_cfg)

                    if inq_obj and inq_obj.loaded:
                        # proceed processing inquiry
                        mlog.info('Inquiry file was successfully loaded.')
                        mlog.info(
                            'Starting processing Download Inquiry file: "{}".'.
                            format(inq_path))

                        inq_obj.process_inquiry()

                        mlog.info(
                            'Processing of Download Inquiry was finished for {}'
                            .format(inq_path))

                    inq_proc_cnt += 1

                    # identify if any errors were identified and set status variable accordingly
                    if not inq_obj.error.exist():
                        if not inq_obj.disqualified_items:
                            # no disqualified sub-aliquots present
                            fl_status = 'OK'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            # errors_present = 'OK'  # this variable is set to OK by default, no update needed
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK_with_Disqualifications'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \
                            .format(fl_status, inq_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                    else:
                        mlog.warning(_str)

                    processed_dir = inq_obj.processed_folder  # 'Processed'
                    # combine the name of the processed file
                    inq_processed_name = fl_status + '_' + str(
                        inq_file).replace(' ', '_').replace('__', '_')
                    if gc.PROCESSED_ADD_DATESTAMP:
                        inq_processed_name = ts + '_' + inq_processed_name
                    # move processed files to Processed folder
                    fl_processed_name = cm.move_file_to_processed(
                        inq_path, inq_processed_name, processed_dir,
                        inq_obj.logger, inq_obj.error)
                    if fl_processed_name:
                        mlog.info(
                            'Processed file "{}" was moved(renamed) to: "{}"'.
                            format(inq_path,
                                   processed_dir / fl_processed_name))
                    else:
                        errors_present = errors_present + '|MoveProcessedError'
                        mlog.warning(
                            'Moving the processed file "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                inq_path, inq_obj.log_handler.baseFilename))

                    # preps for email notification
                    # create a dictionary to feed into template for preparing an email body
                    template_feeder = {
                        'file_num':
                        inq_proc_cnt,
                        'file_path':
                        str(inq_path),
                        'file_path_new':
                        (str(processed_dir / fl_processed_name)
                         if processed_dir and fl_processed_name else None),
                        'inq_obj_errors_cnt':
                        inq_obj.error.count,
                        'log_file_path':
                        inq_obj.log_handler.baseFilename,
                        'dld_request_file_path':
                        str(inq_obj.download_request_path),
                        'inq_sources':
                        inq_obj.inq_sources,
                        'inq_match_aliquots':
                        inq_obj.inq_match_arr,
                        'inq_disqul_aliquots':
                        inq_obj.disqualified_items,
                        'inq_disqul_reprocess_path':
                        str(inq_obj.disqualified_inquiry_path)
                    }
                    email_body_part = cm.populate_email_template(
                        'processed_inquiry.html', template_feeder)
                    email_msgs.append(email_body_part)

                    # deactivate the current Inquiry logger
                    deactivate_logger_common(inq_obj.logger,
                                             inq_obj.log_handler)
                    inq_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, inq_path, traceback.format_exc()))
                    raise

        mlog.info('Number of successfully processed Inquiries = {}'.format(
            inq_proc_cnt))

        # start Data Download request if proper config setting was provided
        dd_status = {'status': '', 'message': ''}
        if run_data_download:
            # start process
            mlog.info(
                'Starting asynchronously Data Downloader app: "{}".'.format(
                    gc.DATA_DOWNLOADER_PATH))
            try:
                dd_process = cm.start_external_process_async(
                    gc.DATA_DOWNLOADER_PATH)
                # check if it is running
                dd_status = cm.check_external_process(dd_process)
                mlog.info(
                    'Status of running Data Downloader app: "{}".'.format(
                        dd_status))
            except Exception as ex:
                # report unexpected error during starting Data Downloader
                _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \
                    .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc())
                mlog.critical(_str)
                dd_status = {'status': 'Error', 'message': _str}

        mlog.info('Preparing to send notificatoin email.')

        email_to = m_cfg.get_value('Email/send_to_emails')
        email_subject = 'processing of download inquiry. '

        if inq_proc_cnt > 0:  # inquiries and len(inquiries) > 0:
            # collect final details and send email about this study results

            err_present = errors_present.split(
                '|'
            )  # get all statuses into an array; 1st element is the main status
            if err_present:
                # set email subject based on the main status err_present[0]
                if err_present[0] == 'OK':
                    email_subject = 'SUCCESSFUL ' + email_subject
                elif err_present[0] == 'DISQUALIFY':
                    email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
                else:
                    email_subject = 'ERROR(s) present during ' + email_subject
            if len(err_present) > 1:
                if err_present[1] == 'MoveProcessedError':
                    email_subject = email_subject + ' Error moving inquiry to processed.'

            if dd_status and 'status' in dd_status.keys(
            ) and dd_status['status'].lower() == 'error':
                email_subject = email_subject + ' Errors starting Data Downloader.'

            # create a dictionary to feed into template for preparing an email body
            template_feeder = {
                'inq_cnt': inq_proc_cnt,
                'run_data_download': run_data_download,
                'downloader_path': gc.DATA_DOWNLOADER_PATH,
                'downloader_start_status': dd_status['status'].lower(),
                'processed_details': '<br/>'.join(email_msgs)
            }
            email_body = cm.populate_email_template('processed_inquiries.html',
                                                    template_feeder)

            # remove return characters from the body of the email, to keep just clean html code
            email_body = email_body.replace("\r", "")
            email_body = email_body.replace("\n", "")

            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            mlog.info(
                'Sending a status email with subject "{}" to "{}".'.format(
                    email_subject, email_to))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=email_to,
                        subject=email_subject,
                        message=email_body
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            mlog.info(
                'End of processing of download inquiries in "{}".'.format(
                    inquiries_path))

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()
Beispiel #24
0
 def __init__(self, study_cfg):
     self.cfg = ConfigData(gc.CONFIG_FILE_MAIN)  # obj_cfg
     self.s_conn = self.cfg.get_item_by_key(gc.CFG_DB_CONN).strip()
     self.study_cfg = study_cfg
Beispiel #25
0
class SubmissionForm:
    def __init__(self,
                 form_name,
                 request,
                 sub_aliquot,
                 aliquot,
                 sample,
                 form_file_name_id=None):
        self.form_name = form_name
        if not form_file_name_id:
            form_file_name_id = form_name
        self.form_file_name_id = form_file_name_id
        self.req_obj = request  # reference to the current request object
        self.sub_aliquot = sub_aliquot
        self.aliquot = aliquot
        self.sample = sample
        self.error = self.req_obj.error
        self.logger = self.req_obj.logger
        self.conf_assay = request.conf_assay

        self.fl_json = None
        self.fl_json_schema = None
        self.fl_cfg_common = None
        self.fl_cfg_assay = None
        # self.fl_cfg_dict = None

        self.prepare_form(form_name)

    def prepare_form(self, form_name):
        forms_location = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' +
                              self.req_obj.project)
        # identify paths for json and config (yaml) files
        fl_path_json_common = forms_location / (form_name + '.json')
        fl_path_json_assay = forms_location / (
            form_name + '_' + str(self.req_obj.assay).lower() + '.json')
        fl_path_json_schema = forms_location / (form_name + '_schema.json')
        fl_path_cfg_common = forms_location / (form_name + '.yaml')

        # fl_path_json_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.json')
        # fl_path_json_assay = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_' +
        #                           str(self.req_obj.assay).lower() + '.json')
        # fl_path_json_schema = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '_schema.json')
        # fl_path_cfg_common = Path(gc.SUBMISSION_FORMS_DIR + '/' + form_name + '/' + form_name + '.yaml')

        # check the value assigned to the current request's data_source_forms_assignment
        # and select assay config file accordingly
        if self.req_obj.data_source_forms_assignment == 'file':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')
        elif self.req_obj.data_source_forms_assignment == 'db':
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '_db.yaml')
        else:  # data_source_forms_assignment = 'db' will be treated as a default assignment
            fl_path_cfg_assay = forms_location / (
                form_name + '_' + str(self.req_obj.assay).lower() + '.yaml')

        # check if assay specific json exists; if yes - use it, if not - use common one
        if cm.file_exists(fl_path_json_assay):
            fl_path_json = fl_path_json_assay
        else:
            fl_path_json = fl_path_json_common

        # load json and config files
        self.fl_json = FileJson(fl_path_json, self.req_obj.error,
                                self.req_obj.logger)
        self.fl_json_schema = FileJson(fl_path_json_schema, self.req_obj.error,
                                       self.req_obj.logger)
        self.fl_cfg_common = ConfigData(fl_path_cfg_common)
        self.fl_cfg_assay = ConfigData(fl_path_cfg_assay)
        # self.fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)

        # print(self.fl_json.json_data)
        # loop through all json keys and fill those with associated data
        self.get_json_keys(self.fl_json.json_data)
        # print(self.fl_json.json_data)

        # validate final json file against json schema (if present)
        self.validate_json(self.fl_json, self.fl_json_schema)

    def get_json_keys(self, json_node, parent_keys=''):
        for key, val in json_node.items():
            # TODO: add functionality to handle JSON arrays (if those are needed)
            if isinstance(val, dict):
                if parent_keys:
                    cur_parents = '/'.join([parent_keys, key])
                else:
                    cur_parents = key
                self.get_json_keys(val, cur_parents)
            else:
                if parent_keys:
                    full_key_name = '/'.join([parent_keys, key])
                else:
                    full_key_name = key

                # json_node[key] = 'print("{}")'.format(full_key_name)
                # json_node[key] = eval(json_node[key])
                # print("JSON file - {} : {}".format(full_key_name, val))  # val # json_node[key]
                # print("Config Common - {} = {}".format(key, self.fl_cfg_common.get_value(key)))
                # print("Config Assay - {} = {}".format(key, self.fl_cfg_assay.get_value(key)))

                val = self.eval_cfg_value(
                    full_key_name, self.fl_cfg_assay.get_value(full_key_name),
                    self.fl_cfg_common.get_value(full_key_name))
                if str(val).strip() == '':
                    # if returned value is blank, create a warning in the log file
                    self.logger.warning(
                        'Blank value was reported for field "{}" '.format(
                            full_key_name))

                # check if the assigned value is a special expected blank value that don't need to be reported in log
                if str(val).strip(
                ) == gc.SUBMISSION_FORM_EXPECTED_BLANK_VALUE:  # '!!blank!!'
                    json_node[key] = ''
                    self.logger.info(
                        'Field "{}" was assigned with the expected blank ("") value'
                        .format(key))
                else:
                    # assign retrieved key back to associated json key
                    json_node[key] = val
                    self.logger.info(
                        'Field "{}" was assigned with "{}" value'.format(
                            key, val))

                # print(key, '==>', json_node[key])
                pass

    def eval_cfg_value(self, key, assay_cfg_val, common_cfg_val):
        # if assay config key is not provided, use common assay val
        if assay_cfg_val:
            cfg_val = assay_cfg_val
        else:
            cfg_val = common_cfg_val

        eval_flag = gc.SUBMISSION_YAML_EVAL_FLAG  # 'eval!'

        # check if some configuration instruction/key was retrieved for the given "key"
        if cfg_val:
            if eval_flag in str(cfg_val):
                cfg_val = cfg_val.replace(eval_flag,
                                          '')  # replace 'eval!' flag key
                try:
                    out_val = eval(cfg_val)
                except Exception as ex:
                    _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \
                           'while attempting to interpret configuration key "{}" provided for the form\'s key ' \
                           '"{}". \n{} ' \
                        .format(ex, self.form_name, self.sub_aliquot, cfg_val, key, traceback.format_exc())
                    self.logger.error(_str)
                    self.error.add_error(_str)
                    out_val = ''
            else:
                out_val = cfg_val
        else:
            # requested "key" does not exist neither in assay or common config files
            _str = 'No value was assigned to "{}" key during preparing submission form "{}" for sub-aliquot "{}".' \
                .format(key, self.form_name, self.sub_aliquot)
            self.logger.warning(_str)
            out_val = ''
        return out_val

    def get_tarball_property(self, sa, val_type):

        value = ''
        if self.req_obj.attachments:
            tar_obj = self.req_obj.attachments.aliquots_tarball_dict[sa]
            if tar_obj:
                if val_type == 'name':
                    value = os.path.basename(tar_obj['path'])
                elif val_type == 'md5':
                    value = tar_obj['md5']
        return value

    # it will retrieve any existing property_val from the request object
    def get_request_value(self, property_name, check_dict=False):
        return self.get_property_value_from_object(self.req_obj, property_name,
                                                   check_dict)

    # it will retrieve any existing property_val from the submission_form object
    def get_submission_form_value(self, property_name, check_dict=False):
        return self.get_property_value_from_object(self, property_name,
                                                   check_dict)

    # it will retrieve any existing property_val from rawdata object
    def get_rawdata_value(self, property_name, check_dict=False):
        # return self.get_property_value_from_object(self.req_obj.raw_data.aliquots_data_dict[self.sub_aliquot],
        #                                            property_name, check_dict, 'dict')
        return self.get_sourcedata_value('rawdata', property_name, check_dict)

    # it will retrieve any existing property_val from assay data object
    def get_assaydata_value_by_col_number(self, col_num, check_dict=False):
        # obj = list(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot].items())
        # val = self.get_property_value_from_object(obj, col_num - 1, check_dict, 'dict', 'number')
        # if isinstance(val, tuple):
        #     return val[1]
        # else:
        #     return val
        return self.get_sourcedata_value_by_col_number('assaydata', col_num,
                                                       check_dict)

    # it will retrieve any existing property_val from assay data object
    def get_assaydata_value(self, property_name, check_dict=False):
        # return self.get_property_value_from_object(self.req_obj.assay_data.aliquots_data_dict[self.sub_aliquot],
        #                                            property_name, check_dict, 'dict')
        return self.get_sourcedata_value('assaydata', property_name,
                                         check_dict)

    # it will retrieve any existing property_val (specified by the name) from the data source object
    # specified by the data_source_name
    def get_sourcedata_value(self,
                             data_source_name,
                             property_name,
                             check_dict=False):
        if data_source_name in self.req_obj.data_source_names:
            return self.get_property_value_from_object(
                self.req_obj.data_source_objects[data_source_name].
                aliquots_data_dict[self.sub_aliquot], property_name,
                check_dict, 'dict')
        else:
            _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \
                   '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot)
            self.logger.error(_str)
            self.error.add_error(_str)
            return '#ERROR#'

    # it will retrieve any existing property_val (specified by the column number) from the data source object
    # specified by the data_source_name
    def get_sourcedata_value_by_col_number(self,
                                           data_source_name,
                                           col_num,
                                           check_dict=False):
        if data_source_name in self.req_obj.data_source_names:
            obj = list(self.req_obj.data_source_objects[data_source_name].
                       aliquots_data_dict[self.sub_aliquot].items())
            val = self.get_property_value_from_object(obj, col_num - 1,
                                                      check_dict, 'dict',
                                                      'number')
            if isinstance(val, tuple):
                return val[1]
            else:
                return val
        else:
            _str = 'Data source name ({}) requested during populating json submission form "{}" for aliquot id ' \
                   '"{}" does not exists for the current assay.'.format(data_source_name, self.form_name, self.aliquot)
            self.logger.error(_str)
            self.error.add_error(_str)
            return '#ERROR#'

    # it will retrieve a key of a property_val named in "property_val" parameter
    # from the object passed as a reference in "obj" parameter
    # obj_type possible values: "class" (type of "obj" is class),
    #                           "dict" (type of "obj" is dictionary)
    # property_type possible values: "name" ("property_val" is name of property_val),
    #                                "number" ("property_val" is number of items in dictionary)
    # noinspection PyUnusedLocal
    def get_property_value_from_object(self,
                                       obj,
                                       property_val,
                                       check_dict=False,
                                       obj_type='class',
                                       property_type='name'):
        property_val = str(property_val)
        if property_type == 'name':
            # if property_val name is given, proceed here
            if obj_type == 'class':
                get_item = 'obj.' + property_val + ' if hasattr(obj, "' + property_val + '") else ""'
            elif obj_type == 'dict':
                get_item = 'obj["' + property_val + '"] if "' + property_val + '" in obj else ""'
            else:
                get_item = None
        else:
            # if column number is given, proceed here
            get_item = 'obj[' + property_val + ']'

        try:
            out = eval(get_item)

            if check_dict:
                out = cm2.get_dict_value(out, property_val)

        except Exception as ex:
            _str = 'Error "{}" occurred during preparing submission form "{}" for sub-aliquot "{}" ' \
                   'while attempting to evaluate property_val: "{}". \n{} ' \
                .format(ex, self.form_name, self.sub_aliquot, get_item, traceback.format_exc())
            self.logger.error(_str)
            self.error.add_error(_str)
            out = ''
        return out

    # converts an array of values (i.e. list of aliquots) in to list of dictionaries with a given key name
    # For example: [1, 2, 3] => [{name: 1}, {name: 2}, {name: 3}]
    @staticmethod
    def convert_simple_list_to_list_of_dict(sm_arr, key_name):
        out = []
        for a in sm_arr:
            dict_ob = {key_name: a}
            out.append(dict_ob)
        return out

    def validate_json(self, json_file, schema_file):
        try:
            validate(json_file.json_data, schema_file.json_data)
            _str = 'Validation of "{}" against "{}" was successful.'.format(
                json_file.filepath, schema_file.filepath)
            self.logger.info(_str)
        except jsonschema.exceptions.ValidationError as ve:
            _str = 'Validation of "{}" file against schema "{}" failed with the following error: \n{}' \
                .format(json_file.filepath, schema_file.filepath, ve)
            self.logger.error(_str)
            self.error.add_error(_str)
    def match_inquiry_items_to_sources(self):
        cur_row = -1
        for inq_line in self.lines_arr:
            cur_row += 1  # increase row counter
            if cur_row == self.header_row_num - 1:
                continue

            # program_code = str(inq_line[0]) # get program code that must be a first column
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)

            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
            conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace(
                '{program}', program_code)
            conf_dict_program = ConfigData(conf_dict_program_path)
            conf_dict.update(conf_dict_program.get_whole_dictionary())

            # print (inq_study_path)
            bulk_location = self.get_inquiry_value_by_field_name(
                'bulk_location', inq_line, False)
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', inq_line, False)

            # inq_study_path = '/'.join([program_code, bulk_location, assay])
            inq_study_path = self.conf_main.get_value(
                'Destination/study_path_template')
            inq_study_path = inq_study_path.replace('{program_code}',
                                                    program_code)
            inq_study_path = inq_study_path.replace('{bulk_location}',
                                                    bulk_location)
            inq_study_path = inq_study_path.replace('{assay}', assay)

            # check if current sub-aliquot is not part of disqualified items array
            if self.disqualified_items and sub_al in self.disqualified_items.keys(
            ):
                # if sub-aliquot was disqualifed already, skip this line
                continue

            # identify aliquot for the given sub-aliquot
            al = conf_dict.convert_sub_aliq_to_aliquot(
                sub_al, assay)  # identify aliquot for the current inquiry line

            match = False

            # get reference to the Datasource object assigned to the current row
            if cur_row in self.inq_line_sources:
                cur_source = self.inq_sources[self.inq_line_sources[cur_row]]
            else:
                # if the data source was not assigned to the current row, skip the row using this datasource
                cur_source = None
                continue
            # check if any source types were disqualified during loading the datasource
            if cur_source.disqualified_data_sources:
                # if at least one source of the datasource was disqualified, skip the row using this datasource
                # and disqualify the current sub-aliquot as well
                self.disqualify_inquiry_item(
                    sub_al,
                    'Datasource associated with this aliquot_id was marked as disqualified.',
                    inq_line)
                continue

            # get a copy of the source type ids of the current datasource;
            # it will track number of items found for each source type
            cur_source_types = copy.deepcopy(cur_source.source_types)

            # loop through items of the source
            for src_item in cur_source.source_content_arr:
                match_out = False
                # attempt match by the sub-aliquot
                match_out, match_details = \
                    self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al)
                if match_out:
                    match = True
                # if sub-aliquot match was not success, attempt to match by the aliquot
                elif src_item['aliquot_match']:
                    match_out, match_details = \
                        self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al)
                    if match_out:
                        match = True
                # if a match was found using one of the above methods, record the item to inq_match_arr
                if match_out:
                    # since a match was found, verify that the source path is accessible (except for web locations)
                    web_loc = src_item['web_location']
                    # real_path = os.path.realpath(src_item['path'])  # real path of the current item

                    if web_loc or not web_loc and os.path.exists(
                            src_item['path']):
                        item_details = {
                            'sub-aliquot':
                            sub_al,
                            'study':
                            inq_study_path,
                            # 'source': src_item,
                            'source_item_name':
                            src_item['name'],
                            'target_subfolder':
                            src_item['target_subfolder'],
                            'real_path':
                            src_item['path'],
                            'target_copied_item_name':
                            src_item['target_copied_item_name'],
                            'match_details':
                            match_details,
                            'source_type_id':
                            src_item['source_type_id'],
                            'obj_type':
                            src_item['obj_type'],
                            'source_name_generic':
                            cur_source.source_name_generic
                        }
                        self.inq_match_arr.append(item_details)
                        # record the source type id of an item to track quantity of found matches for each source type
                        cur_source_types[
                            src_item['source_type_id']]['items_count'] += 1
                    else:
                        self.disqualify_inquiry_item(
                            sub_al,
                            'A match was found, but the identified source path is not accessible. Match details: {}. '
                            'Source path: "{}". Real source path: "{}".'.
                            format(match_details, src_item['path'],
                                   src_item['path']), inq_line)

            # report if no match was found and
            # verify that a match was found for each of the source types of the current datasource
            if not match:
                # no matches were found for the current datasource
                self.disqualify_inquiry_item(
                    sub_al,
                    'No matching items (files/folders) were found in the current data source.',
                    inq_line)
            else:
                if not cur_source.allow_nomatch_per_sourcetype:
                    # some matches were found; verify that a match was found for each of the source types
                    for src_type in cur_source_types:
                        if cur_source_types[src_type]['items_count'] == 0:
                            # no matches were found for this source type
                            self.disqualify_inquiry_item(
                                sub_al,
                                'No matches were found for the "{}" source type id in the datasource.'
                                .format(src_type), inq_line)
Beispiel #27
0
    def __init__(self, cfg_monitor_path, log_obj):
        self.action_completed = False
        self.status = []

        self.mtr_cfg_path = cfg_monitor_path
        self.log = log_obj
        self.error = MonitorError(self)
        self.mtr_cfg = ConfigData(cfg_monitor_path)
        if self.validate_config_file():
            self.loaded = True
        else:
            self.loaded = False
        cur_cfg_dir = os.path.dirname(cfg_monitor_path)
        cur_cfg_file_name = Path(os.path.abspath(cfg_monitor_path)).name
        stamp_dir = Path(str(cur_cfg_dir) + '/' + gc.STAMPS_FILES_FOLDER_NAME)
        if not os.path.exists(stamp_dir):
            os.mkdir(stamp_dir)
        stamp_file = Path(
            str(stamp_dir) + '/' +
            cur_cfg_file_name.replace('.yaml', '_stamp.yaml'))
        self.verify_config_stamp_file(stamp_file)
        self.mtr_cfg_stamp = ConfigData(stamp_file)

        self.mtr_source = None
        self.mtr_source_path = None

        if self.loaded:
            # get config file values
            self.mtr_source_dir = Path(
                cm.eval_cfg_value(
                    self.mtr_cfg.get_value('Location/source_dir'), self.log,
                    None))
            self.mtr_source_file = Path(
                self.mtr_cfg.get_value('Location/source_file'))
            found_files = cm.find_file_in_dir(self.mtr_source_dir,
                                              self.mtr_source_file, False)
            if found_files:
                ff_stamp = None
                for file_match in found_files:
                    if not ff_stamp or ff_stamp < os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime:
                        ff_stamp = os.stat(
                            Path(self.mtr_source_dir) / file_match).st_mtime
                        self.mtr_source = file_match
                # self.mtr_source = found_files[0]
                self.mtr_source_path = Path(
                    self.mtr_source_dir) / self.mtr_source
            # else:
            #    self.mtr_source = None
            #    self.mtr_source_path = None
            self.mtr_destin = self.mtr_cfg.get_value('Location/destination')
            self.mtr_item = self.mtr_cfg.get_value('Monitoring/item')
            self.mtr_type = self.mtr_cfg.get_value('Monitoring/type')
            self.mtr_action = self.mtr_cfg.get_value('Monitoring/action')
            self.mtr_frequency = self.mtr_cfg.get_value('Monitoring/frequency')
            # self.mtr_email = self.mtr_cfg.get_value('Monitoring/email_notification')
            # self.mtr_email_cc = self.mtr_cfg.get_value('Monitoring/email_cc')
            # load stamp info from stamp config file
            self.mtr_sync_date = self.mtr_cfg_stamp.get_value(
                'Last_sync/date_time')
            self.mtr_watch_value = self.mtr_cfg_stamp.get_value(
                'Last_sync/watch_value')
    def __init__(self, filepath, conf_main=None, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        self.sheet_name = sheet_name  # .strip()

        if conf_main:
            self.conf_main = conf_main
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)

        self.error = InquiryError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Download Inquiry file {}'.format(filepath))
        self.inq_match_arr = []
        self.columns_arr = []
        self.inq_sources = {}
        self.inq_line_sources = {}

        # load common for all programs dictionary config
        self.conf_dict = DictConfigData(gc.CONFIG_FILE_DICTIONARY)
        if not self.conf_dict.loaded:
            # disqualify the current inquiry file
            _str = 'Aborting processing of the inquiry file - the following common dictionary config file cannot ' \
                   'be loaded: {}.'.format(gc.CONFIG_FILE_MAIN)
            self.error.add_error(_str)
            self.logger.error(_str)
            return

        # save inquiry file structure into a dedicated variables
        self.file_structure_by_col_num = self.conf_dict.get_inqury_file_structure(
            'by_col_num')
        self.file_structure_by_col_name = self.conf_dict.get_inqury_file_structure(
            'by_col_name')

        self.processed_folder = gc.INQUIRY_PROCESSED_DIR
        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(self.processed_folder):
            self.processed_folder = Path(self.wrkdir) / self.processed_folder
        else:
            self.processed_folder = Path(self.processed_folder)

        self.download_request_path = None

        self.disqualified_items = {}
        self.disqualified_inquiry_path = ''  # will store path to a inquiry file with disqualified sub-aliquots

        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.INQUIRY_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_process_entity = None

        self.db_access = DBAccess(self.logger, self.conf_main, self.error)

        self.get_file_content()
Beispiel #29
0
class MetadataDB:

    # CFG_DB_CONN = 'DB/mdb_conn_str'  # name of the config parameter storing DB connection string
    # CFG_DB_SQL_PROC = 'DB/mdb_sql_proc_load_sample'  # name of the config parameter storing DB name of the stored proc
    # CFG_DB_STUDY_ID = 'DB/mdb_study_id'  # name of the config parameter storing key of the MDB study id
    # CFG_DICT_PATH = 'DB/dict_tmpl_fields_node' # name of the config parameter storing key of dictionary path
    # to list of fields
    # CFG_DB_ALLOW_DICT_UPDATE = 'DB/mdb_allow_dict_update'  # name of the config parameter storing values
    # for "allow dict updates"
    # CFG_DB_ALLOW_SAMPLE_UPDATE = 'DB/mdb_allow_sample_update' # name of the config parameter storing values
    # for "allow sample updates"

    s_conn = ''
    conn = None

    def __init__(self, study_cfg):
        self.cfg = ConfigData(gc.CONFIG_FILE_MAIN)  # obj_cfg
        self.s_conn = self.cfg.get_item_by_key(gc.CFG_DB_CONN).strip()
        self.study_cfg = study_cfg

    def open_connection(self):
        self.conn = pyodbc.connect(self.s_conn, autocommit=True)

    def submit_row(self, row,
                   file):  # sample_id, row_json, dict_json, filepath):

        dict_json = file.get_file_dictionary_json(True)
        filepath = str(file.filepath)
        sample_id = row.sample_id
        row_json = row.to_json()

        if not self.conn:
            self.open_connection()
        str_proc = self.cfg.get_item_by_key(gc.CFG_DB_SQL_PROC).strip()
        study_id = self.study_cfg.get_item_by_key(gc.CFG_DB_STUDY_ID).strip()
        dict_path = '$.' + self.study_cfg.get_item_by_key(
            gc.CFG_DICT_PATH).strip()
        dict_upd = self.study_cfg.get_item_by_key(
            gc.CFG_DB_ALLOW_DICT_UPDATE).strip()
        sample_upd = self.study_cfg.get_item_by_key(
            gc.CFG_DB_ALLOW_SAMPLE_UPDATE).strip()

        # prepare stored proc string to be executed
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_STUDY_ID),
            study_id)  # '{study_id}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_ID),
            sample_id)  # '{sample_id}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_ROW_JSON),
            row_json)  # '{smpl_json}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_JSON),
            dict_json)  # '{dict_json}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_PATH),
            dict_path)  # '{dict_path}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_FILEPATH),
            filepath)  # '{filepath}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_UPD),
            dict_upd)  # '{dict_update}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_UPD), sample_upd)
        # '{samlpe_update}'

        # get currrent file_processing_log
        file.logger.debug('SQL Procedure call = {}'.format(str_proc))
        # print ('procedure (str_proc) = {}'.format(str_proc))

        try:
            cursor = self.conn.cursor()
            cursor.execute(str_proc)
            # returned recordsets
            rs_out = []
            rows = cursor.fetchall()
            columns = [column[0] for column in cursor.description]
            results = []
            for row in rows:
                results.append(dict(zip(columns, row)))
            rs_out.append(results)
            return rs_out

        except Exception as ex:
            # report an error if DB call has failed.
            _str = 'Error "{}" occurred during submitting a row (sample_id = "{}") to database; ' \
                   'used SQL script "{}". Here is the traceback: \n{} '.format(
                    ex, sample_id, str_proc, traceback.format_exc())
            row.error.add_error(_str)
            file.logger.error(_str)
Beispiel #30
0
def process_submission():
    # load main config file
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1
    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both config were loaded, load update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())

    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new request files will be posted
    requests_loc = m_cfg.get_value('Location/requests')

    gc.DISQUALIFIED_REQUESTS = m_cfg.get_value(
        'Location/requests_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing request files will be stored
    # (one file per request)
    gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs')
    # path to the folder where all processed (and renamed) requests will be stored
    gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed')
    # path to the folder where created submission packages will be located. One package sub_folder per request.
    # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages')
    # tarball approach to be used for the current deployment
    gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach')
    # flag to save calculated md5sum to a physical file
    gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file')
    # tarball ignore directories
    ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs')
    if ignore_dirs:
        # update default ignore_dirs value with the value from a config file
        gc.TARBALL_IGNORE_DIRS = ignore_dirs

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME
    processed_folder_name = gc.REQ_PROCESSED_DIR  # gc.PROCESSED_FOLDER_NAME

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    email_attchms = []
    transfers = []

    # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles'
    requests_path = Path(requests_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']
    log_warnings = False

    mlog.info(
        'Start processing submission requests in "{}"'.format(requests_path))

    try:

        (_, _, requests) = next(walk(requests_path))
        # print('Study requests: {}'.format(requests))

        mlog.info(
            'Submission requests to be processed (count = {}): {}'.format(
                len(requests), requests))

        req_proc_cnt = 0
        errors_present = 'OK'
        req_path = ''

        # '~$' should filter out temp file created when excel is open
        requests = [file for file in requests if not file.startswith('~$')]

        for req_file in requests:
            if req_file.endswith(('xlsx', 'xls')):
                req_path = Path(requests_path) / req_file

                # transfer_path = ''  # set a default transfer path
                transfer_details = {
                    'transfer_path': '',
                    'request_file': req_file,
                    'process_handler': None,
                    'return_code': None,
                    'return_status': None
                }
                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(req_path))
                    mlog.info(
                        'Request file {} was selected for processing.'.format(
                            req_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    req_obj = Request(req_path, m_cfg)

                    if req_obj and req_obj.loaded:
                        # proceed processing request
                        mlog.info(
                            'Submission request loading status: Success. Submission request file: "{}".'
                            .format(req_path))
                        mlog.info(
                            'Loading local and project related configs for processing the request.'
                        )
                        req_obj.load_request_configuration()
                        if not req_obj.error.exist():
                            mlog.info(
                                'Local config files were loaded with no errors, proceeding to process '
                                'the request file.')
                            req_obj.process_request()
                        else:
                            mlog.info(
                                'Errors were reported during loading local config files. Aborting processing '
                                'this request.')

                        mlog.info(
                            'Processing of Submission request was finished for {}'
                            .format(req_path))

                    req_proc_cnt += 1

                    # print (req_obj.logger._cache)
                    if hasattr(req_obj.logger, '_cache'
                               ):  #verify that _cache attribute is present
                        # check if any warning were recorded to the log file and set a flag log_warnings
                        if 30 in req_obj.logger._cache and req_obj.logger._cache[
                                30]:
                            log_warnings = True
                        # else:
                        #     log_warnings = False
                    else:
                        mlog.warning(
                            'The current logger object has no "_cache" attribute - thus cannot determine '
                            'if any Warnings were reported during the process.'
                        )

                    # identify if any errors were identified and set status variable accordingly
                    if not req_obj.error.exist():
                        if not req_obj.disqualified_sub_aliquots:
                            # no disqualified sub-aliquots present
                            if not log_warnings:
                                fl_status = 'OK'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                                # errors_present = 'OK'
                            else:
                                fl_status = 'OK with Warnings'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK with Disqualifications'
                            _str = 'Processing status: "{}". Submission Request: {}'.format(
                                fl_status, req_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this request: {}' \
                            .format(fl_status, req_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            # update transfer details dictionary with the path to the transfer file
                            transfer_details['transfer_path'] = \
                                Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh'
                            transfers.append(
                                transfer_details
                            )  # add transfer details to transfers list
                            mlog.info(
                                'Since the last request was processed with "{}" status and transfer on '
                                'completion was requested ("--execute_transfer" argument was set to "yes"), '
                                'the following path was put in queue for execution: '
                                '{}'.format(fl_status,
                                            transfer_details['transfer_path']))

                    else:
                        mlog.warning(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            mlog.info(
                                'The transfer on completion request ("--execute_transfer" argument was set to '
                                '"yes") will be ignored since the last request was processed with "{}" status.'
                                .format(fl_status))

                    processed_dir = Path(processed_folder_name)
                    req_processed_name = ts + '_' + fl_status + '_' + req_file
                    file_name_new_path = cm.move_file_to_processed(
                        req_path, req_processed_name, processed_dir,
                        req_obj.logger, req_obj.error)
                    if file_name_new_path:
                        mlog.info(
                            'Processed Submission request "{}" was moved and renamed as: "{}"'
                            .format(req_path,
                                    processed_dir / req_processed_name))
                    else:
                        mlog.warning(
                            'Moving the processed request "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                req_path, req_obj.log_handler.baseFilename))

                    # deactivate the current Request logger
                    deactivate_logger_common(req_obj.logger,
                                             req_obj.log_handler)

                    if req_obj.submission_package and req_obj.submission_package.submission_dir:
                        # save transfer path to a local variable
                        transfer_path = Path(
                            req_obj.submission_package.submission_dir
                        ) / 'transfer_script.sh'
                    else:
                        transfer_path = None

                    # preps for email notification
                    email_msgs.append((
                        '-------------------------------------<br/>'
                        'Requested project: {}'.format(req_obj.project) +
                        '<br/>Requested Experiment: {}.'.format(
                            req_obj.experiment_id) +
                        ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.'
                         .format(req_path, processed_dir /
                                 req_processed_name) if file_name_new_path else
                         '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due '
                         'to some errors</font> reported in the request\'s log file.'
                         .format(req_path)) + '<br/> <b>Errors summary:</b> {}'
                        '<br/> <b>Warning(s) reported:</b> {}'
                        '<br/> <i>Log file location: <br/>{}</i>'
                        '<br/> Submission package locatoin:<br/>{}'
                        '<br/> Data source locatoin:<br/>{}'
                        '<br/> Processed Aliquots:<br/>{}'
                        '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}'
                        '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}'
                        '<br/> Automatic data transferring: {}'
                        '<br/> Command line to run data transferring manually: <br/> {}'
                        ''.format(
                            '<font color="red">Check Errors in the log file.</font>'
                            if req_obj.error.exist() else
                            '<font color="green">No Errors</font> ',
                            '<font color="red">Yes - check the log file.</font>'
                            if log_warnings else 'No',
                            req_obj.log_handler.baseFilename,
                            req_obj.submission_package.submission_dir
                            if req_obj.submission_package else 'N/A',
                            req_obj.attachments.data_loc if req_obj.attachments
                            else 'N/A', req_obj.qualified_aliquots
                            if req_obj.qualified_aliquots else 'None', [
                                val for val in
                                req_obj.disqualified_sub_aliquots.keys()
                            ] if req_obj.disqualified_sub_aliquots else 'None',
                            req_obj.disqualified_request_path,
                            '<font color="green">Performed.</font> '
                            'Additional email should be sent upon data transfer completion.'
                            if len(
                                str(transfer_details['transfer_path']).strip())
                            > 0 else 'Not performed.',
                            str(
                                Path(req_obj.submission_package.submission_dir)
                                / 'transfer_script.sh')
                            if req_obj.submission_package else 'N/A')))
                    email_attchms.append(req_obj.log_handler.baseFilename)

                    # print ('email_msgs = {}'.format(email_msgs))

                    req_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, req_path, traceback.format_exc()))
                    raise

        mlog.info('Number of processed Submission requests = {}'.format(
            req_proc_cnt))

        if req_proc_cnt > 0:
            # collect final details and send email about this study results
            email_subject = 'processing of Submission Requests '
            if errors_present == 'OK':
                if not log_warnings:
                    email_subject = 'SUCCESSFUL ' + email_subject
                else:
                    email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject
            elif errors_present == 'DISQUALIFY':
                email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
            else:
                email_subject = 'ERROR(s) present during ' + email_subject

            email_body = (
                'Number of requests processed: {}.'.format(req_proc_cnt) +
                '<br/><br/>' + '<br/><br/>'.join(email_msgs))
            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=m_cfg.get_value('Email/sent_to_emails'),
                        subject=email_subject,
                        message=email_body,
                        main_conf=m_cfg
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            # perform transfers, if anything qualifies for it
            if transfers and len(transfers) > 0:
                transfer_status_checking_delay = m_cfg.get_value(
                    'General/transfer_status_checking_delay')
                if transfer_status_checking_delay and str(
                        transfer_status_checking_delay).isnumeric():
                    if transfer_status_checking_delay > 0:
                        pass
                    else:
                        transfer_status_checking_delay = None
                else:
                    transfer_status_checking_delay = None

                mlog.info(
                    'Starting processing requested transfers. Total count: {} transfers.'
                    .format(len(transfers)))
                # process all collected transfer requests
                cm.process_transfers(transfers, mlog,
                                     transfer_status_checking_delay)

                # assess results of the transfer processing
                transfer_ok = 0
                transfer_err = 0
                transfer_nd = 0
                for transfer in transfers:
                    if transfer['return_status']:
                        if transfer['return_status'][:2] == 'OK':
                            transfer_ok += 1
                        elif transfer['return_status'][:5] == 'ERROR':
                            transfer_err += 1
                        else:
                            transfer_nd += 1
                    else:
                        transfer_nd += 1

                _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \
                       'transfer(s)'.format(transfer_ok, transfer_err)
                if transfer_nd > 0:
                    _str = _str + ', "ND" - {}'.format(transfer_nd)
                mlog.info(_str)

                # send email with the status of the transfers
                if transfers and len(transfers) > 0:
                    if transfer_err > 0:
                        email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)'
                    else:
                        email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)'

                    email_transfer_msgs = []
                    for transfer in transfers:
                        email_transfer_msgs.append(
                            ('Transfer process for the request file: "{}" '
                             '<br/>Transfer script file:<br/>{}'
                             '<br/>Completion status:<br/>{}'.format(
                                 transfer['request_file'],
                                 transfer['transfer_path'],
                                 transfer['return_status'])))

                    email_body = (
                        'Summary of transfer of prepared submissions:'
                        '<br/>Total count of completed transfers: {}. '
                        '<br/>Status "OK": {} transfer(s)'
                        '<br/>Status "ERROR": {} transfer(s)'
                        '<br/>Status "Not Defined": {} transfer(s)'
                        '<br/><br/>The following are details for each performed transfer:'
                        '<br/><br/>'.format(
                            len(transfers), '<font color="green">' +
                            str(transfer_ok) + '</font>' if transfer_ok > 0
                            else transfer_ok, '<font color="red">' +
                            str(transfer_err) + '</font>' if transfer_err > 0
                            else transfer_err, transfer_nd) +
                        '<br/><br/>'.join(email_transfer_msgs))

                    try:
                        if m_cfg.get_value('Email/send_emails'):
                            email.send_yagmail(emails_to=m_cfg.get_value(
                                'Email/sent_to_emails'),
                                               subject=email_subject,
                                               message=email_body,
                                               main_conf=m_cfg)
                    except Exception as ex:
                        # report unexpected error during sending emails to a log file and continue
                        _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                               'finishing automated transfers. \n{} '\
                            .format(ex, traceback.format_exc())
                        mlog.critical(_str)

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()