Exemple #1
0
 def load_project_config_into_main(self, project):
     # load project specific "project_config" config file
     cfg_project = ConfigData(
         gc.CONFIG_FILE_PROJECT.replace('{project}', project))
     if cfg_project.loaded:
         # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
         cfg_project_updated = self.update_cfg_dictionary_with_location_details(
             gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
             cfg_project.get_whole_dictionary())
         # update main config with the outcome of the previous updates
         self.conf_main.update(cfg_project_updated)
Exemple #2
0
 def update_cfg_dictionary_with_location_details(self, location_path,
                                                 project, cfg_to_update):
     cfg_location = ConfigData(location_path.replace('{project}', project))
     if cfg_location.loaded:
         self.logger.info(
             'Local config file "{}" was loaded and being used.'.format(
                 cfg_location.cfg_path))
         cfg_to_update = cm.update_dictionary_matching_keys(
             cfg_to_update, cfg_location.get_whole_dictionary())
     else:
         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
             .format(cfg_location.cfg_path)
         self.logger.error(_str)
         self.error.add_error(_str)
     return cfg_to_update
Exemple #3
0
def process_submission():
    # load main config file
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1
    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both config were loaded, load update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())

    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new request files will be posted
    requests_loc = m_cfg.get_value('Location/requests')

    gc.DISQUALIFIED_REQUESTS = m_cfg.get_value(
        'Location/requests_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing request files will be stored
    # (one file per request)
    gc.REQ_LOG_DIR = m_cfg.get_value('Location/request_logs')
    # path to the folder where all processed (and renamed) requests will be stored
    gc.REQ_PROCESSED_DIR = m_cfg.get_value('Location/requests_processed')
    # path to the folder where created submission packages will be located. One package sub_folder per request.
    # gc.OUTPUT_PACKAGES_DIR = m_cfg.get_value('Location/output_packages')
    # tarball approach to be used for the current deployment
    gc.TARBALL_APPROACH = m_cfg.get_value('Tar_ball/approach')
    # flag to save calculated md5sum to a physical file
    gc.TARBALL_SAVE_MD5SUM_FILE = m_cfg.get_value('Tar_ball/save_md5sum_file')
    # tarball ignore directories
    ignore_dirs = m_cfg.get_value('Tar_ball/ignore_dirs')
    if ignore_dirs:
        # update default ignore_dirs value with the value from a config file
        gc.TARBALL_IGNORE_DIRS = ignore_dirs

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME
    processed_folder_name = gc.REQ_PROCESSED_DIR  # gc.PROCESSED_FOLDER_NAME

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    email_attchms = []
    transfers = []

    # requests_loc = 'E:/MounSinai/MoTrPac_API/ProgrammaticConnectivity/MountSinai_metadata_file_loader/DataFiles'
    requests_path = Path(requests_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']
    log_warnings = False

    mlog.info(
        'Start processing submission requests in "{}"'.format(requests_path))

    try:

        (_, _, requests) = next(walk(requests_path))
        # print('Study requests: {}'.format(requests))

        mlog.info(
            'Submission requests to be processed (count = {}): {}'.format(
                len(requests), requests))

        req_proc_cnt = 0
        errors_present = 'OK'
        req_path = ''

        # '~$' should filter out temp file created when excel is open
        requests = [file for file in requests if not file.startswith('~$')]

        for req_file in requests:
            if req_file.endswith(('xlsx', 'xls')):
                req_path = Path(requests_path) / req_file

                # transfer_path = ''  # set a default transfer path
                transfer_details = {
                    'transfer_path': '',
                    'request_file': req_file,
                    'process_handler': None,
                    'return_code': None,
                    'return_status': None
                }
                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(req_path))
                    mlog.info(
                        'Request file {} was selected for processing.'.format(
                            req_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    req_obj = Request(req_path, m_cfg)

                    if req_obj and req_obj.loaded:
                        # proceed processing request
                        mlog.info(
                            'Submission request loading status: Success. Submission request file: "{}".'
                            .format(req_path))
                        mlog.info(
                            'Loading local and project related configs for processing the request.'
                        )
                        req_obj.load_request_configuration()
                        if not req_obj.error.exist():
                            mlog.info(
                                'Local config files were loaded with no errors, proceeding to process '
                                'the request file.')
                            req_obj.process_request()
                        else:
                            mlog.info(
                                'Errors were reported during loading local config files. Aborting processing '
                                'this request.')

                        mlog.info(
                            'Processing of Submission request was finished for {}'
                            .format(req_path))

                    req_proc_cnt += 1

                    # print (req_obj.logger._cache)
                    if hasattr(req_obj.logger, '_cache'
                               ):  #verify that _cache attribute is present
                        # check if any warning were recorded to the log file and set a flag log_warnings
                        if 30 in req_obj.logger._cache and req_obj.logger._cache[
                                30]:
                            log_warnings = True
                        # else:
                        #     log_warnings = False
                    else:
                        mlog.warning(
                            'The current logger object has no "_cache" attribute - thus cannot determine '
                            'if any Warnings were reported during the process.'
                        )

                    # identify if any errors were identified and set status variable accordingly
                    if not req_obj.error.exist():
                        if not req_obj.disqualified_sub_aliquots:
                            # no disqualified sub-aliquots present
                            if not log_warnings:
                                fl_status = 'OK'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                                # errors_present = 'OK'
                            else:
                                fl_status = 'OK with Warnings'
                                _str = 'Processing status: "{}". Submission Request: {}'.format(
                                    fl_status, req_path)
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK with Disqualifications'
                            _str = 'Processing status: "{}". Submission Request: {}'.format(
                                fl_status, req_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this request: {}' \
                            .format(fl_status, req_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            # update transfer details dictionary with the path to the transfer file
                            transfer_details['transfer_path'] = \
                                Path(req_obj.submission_package.submission_dir) / 'transfer_script.sh'
                            transfers.append(
                                transfer_details
                            )  # add transfer details to transfers list
                            mlog.info(
                                'Since the last request was processed with "{}" status and transfer on '
                                'completion was requested ("--execute_transfer" argument was set to "yes"), '
                                'the following path was put in queue for execution: '
                                '{}'.format(fl_status,
                                            transfer_details['transfer_path']))

                    else:
                        mlog.warning(_str)
                        # if transfer on completion was requested through the command line argument
                        if gc.TRANSFER_ON_COMPLETION:
                            mlog.info(
                                'The transfer on completion request ("--execute_transfer" argument was set to '
                                '"yes") will be ignored since the last request was processed with "{}" status.'
                                .format(fl_status))

                    processed_dir = Path(processed_folder_name)
                    req_processed_name = ts + '_' + fl_status + '_' + req_file
                    file_name_new_path = cm.move_file_to_processed(
                        req_path, req_processed_name, processed_dir,
                        req_obj.logger, req_obj.error)
                    if file_name_new_path:
                        mlog.info(
                            'Processed Submission request "{}" was moved and renamed as: "{}"'
                            .format(req_path,
                                    processed_dir / req_processed_name))
                    else:
                        mlog.warning(
                            'Moving the processed request "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                req_path, req_obj.log_handler.baseFilename))

                    # deactivate the current Request logger
                    deactivate_logger_common(req_obj.logger,
                                             req_obj.log_handler)

                    if req_obj.submission_package and req_obj.submission_package.submission_dir:
                        # save transfer path to a local variable
                        transfer_path = Path(
                            req_obj.submission_package.submission_dir
                        ) / 'transfer_script.sh'
                    else:
                        transfer_path = None

                    # preps for email notification
                    email_msgs.append((
                        '-------------------------------------<br/>'
                        'Requested project: {}'.format(req_obj.project) +
                        '<br/>Requested Experiment: {}.'.format(
                            req_obj.experiment_id) +
                        ('<br/>Request file <br/>{} <br/> was processed and moved/renamed to <br/> {}.'
                         .format(req_path, processed_dir /
                                 req_processed_name) if file_name_new_path else
                         '<br/> Request file <br/>{} <br/> was processed but <font color="red">NOT moved due '
                         'to some errors</font> reported in the request\'s log file.'
                         .format(req_path)) + '<br/> <b>Errors summary:</b> {}'
                        '<br/> <b>Warning(s) reported:</b> {}'
                        '<br/> <i>Log file location: <br/>{}</i>'
                        '<br/> Submission package locatoin:<br/>{}'
                        '<br/> Data source locatoin:<br/>{}'
                        '<br/> Processed Aliquots:<br/>{}'
                        '<br/> Disqualified Aliquots (if present, see the log file for more details):<br/>{}'
                        '<br/> A request file for re-processing Disqualified Aliquots was prepared in:<br/>{}'
                        '<br/> Automatic data transferring: {}'
                        '<br/> Command line to run data transferring manually: <br/> {}'
                        ''.format(
                            '<font color="red">Check Errors in the log file.</font>'
                            if req_obj.error.exist() else
                            '<font color="green">No Errors</font> ',
                            '<font color="red">Yes - check the log file.</font>'
                            if log_warnings else 'No',
                            req_obj.log_handler.baseFilename,
                            req_obj.submission_package.submission_dir
                            if req_obj.submission_package else 'N/A',
                            req_obj.attachments.data_loc if req_obj.attachments
                            else 'N/A', req_obj.qualified_aliquots
                            if req_obj.qualified_aliquots else 'None', [
                                val for val in
                                req_obj.disqualified_sub_aliquots.keys()
                            ] if req_obj.disqualified_sub_aliquots else 'None',
                            req_obj.disqualified_request_path,
                            '<font color="green">Performed.</font> '
                            'Additional email should be sent upon data transfer completion.'
                            if len(
                                str(transfer_details['transfer_path']).strip())
                            > 0 else 'Not performed.',
                            str(
                                Path(req_obj.submission_package.submission_dir)
                                / 'transfer_script.sh')
                            if req_obj.submission_package else 'N/A')))
                    email_attchms.append(req_obj.log_handler.baseFilename)

                    # print ('email_msgs = {}'.format(email_msgs))

                    req_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, req_path, traceback.format_exc()))
                    raise

        mlog.info('Number of processed Submission requests = {}'.format(
            req_proc_cnt))

        if req_proc_cnt > 0:
            # collect final details and send email about this study results
            email_subject = 'processing of Submission Requests '
            if errors_present == 'OK':
                if not log_warnings:
                    email_subject = 'SUCCESSFUL ' + email_subject
                else:
                    email_subject = 'SUCCESSFUL (wit Warnings) ' + email_subject
            elif errors_present == 'DISQUALIFY':
                email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
            else:
                email_subject = 'ERROR(s) present during ' + email_subject

            email_body = (
                'Number of requests processed: {}.'.format(req_proc_cnt) +
                '<br/><br/>' + '<br/><br/>'.join(email_msgs))
            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=m_cfg.get_value('Email/sent_to_emails'),
                        subject=email_subject,
                        message=email_body,
                        main_conf=m_cfg
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, req_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            # perform transfers, if anything qualifies for it
            if transfers and len(transfers) > 0:
                transfer_status_checking_delay = m_cfg.get_value(
                    'General/transfer_status_checking_delay')
                if transfer_status_checking_delay and str(
                        transfer_status_checking_delay).isnumeric():
                    if transfer_status_checking_delay > 0:
                        pass
                    else:
                        transfer_status_checking_delay = None
                else:
                    transfer_status_checking_delay = None

                mlog.info(
                    'Starting processing requested transfers. Total count: {} transfers.'
                    .format(len(transfers)))
                # process all collected transfer requests
                cm.process_transfers(transfers, mlog,
                                     transfer_status_checking_delay)

                # assess results of the transfer processing
                transfer_ok = 0
                transfer_err = 0
                transfer_nd = 0
                for transfer in transfers:
                    if transfer['return_status']:
                        if transfer['return_status'][:2] == 'OK':
                            transfer_ok += 1
                        elif transfer['return_status'][:5] == 'ERROR':
                            transfer_err += 1
                        else:
                            transfer_nd += 1
                    else:
                        transfer_nd += 1

                _str = 'Finish processing transfers with the following statuses: "OK" - {} transfer(s), "ERROR" - {} ' \
                       'transfer(s)'.format(transfer_ok, transfer_err)
                if transfer_nd > 0:
                    _str = _str + ', "ND" - {}'.format(transfer_nd)
                mlog.info(_str)

                # send email with the status of the transfers
                if transfers and len(transfers) > 0:
                    if transfer_err > 0:
                        email_subject = 'Errors produced during automated transfer(s) of prepared Submission Request(s)'
                    else:
                        email_subject = 'Completion of automated transfer(s) of prepared Submission Request(s)'

                    email_transfer_msgs = []
                    for transfer in transfers:
                        email_transfer_msgs.append(
                            ('Transfer process for the request file: "{}" '
                             '<br/>Transfer script file:<br/>{}'
                             '<br/>Completion status:<br/>{}'.format(
                                 transfer['request_file'],
                                 transfer['transfer_path'],
                                 transfer['return_status'])))

                    email_body = (
                        'Summary of transfer of prepared submissions:'
                        '<br/>Total count of completed transfers: {}. '
                        '<br/>Status "OK": {} transfer(s)'
                        '<br/>Status "ERROR": {} transfer(s)'
                        '<br/>Status "Not Defined": {} transfer(s)'
                        '<br/><br/>The following are details for each performed transfer:'
                        '<br/><br/>'.format(
                            len(transfers), '<font color="green">' +
                            str(transfer_ok) + '</font>' if transfer_ok > 0
                            else transfer_ok, '<font color="red">' +
                            str(transfer_err) + '</font>' if transfer_err > 0
                            else transfer_err, transfer_nd) +
                        '<br/><br/>'.join(email_transfer_msgs))

                    try:
                        if m_cfg.get_value('Email/send_emails'):
                            email.send_yagmail(emails_to=m_cfg.get_value(
                                'Email/sent_to_emails'),
                                               subject=email_subject,
                                               message=email_body,
                                               main_conf=m_cfg)
                    except Exception as ex:
                        # report unexpected error during sending emails to a log file and continue
                        _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                               'finishing automated transfers. \n{} '\
                            .format(ex, traceback.format_exc())
                        mlog.critical(_str)

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()
    def match_inquiry_items_to_sources(self):
        cur_row = -1
        for inq_line in self.lines_arr:
            cur_row += 1  # increase row counter
            if cur_row == self.header_row_num - 1:
                continue

            # program_code = str(inq_line[0]) # get program code that must be a first column
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)

            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
            conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM.replace(
                '{program}', program_code)
            conf_dict_program = ConfigData(conf_dict_program_path)
            conf_dict.update(conf_dict_program.get_whole_dictionary())

            # print (inq_study_path)
            bulk_location = self.get_inquiry_value_by_field_name(
                'bulk_location', inq_line, False)
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', inq_line, False)

            # inq_study_path = '/'.join([program_code, bulk_location, assay])
            inq_study_path = self.conf_main.get_value(
                'Destination/study_path_template')
            inq_study_path = inq_study_path.replace('{program_code}',
                                                    program_code)
            inq_study_path = inq_study_path.replace('{bulk_location}',
                                                    bulk_location)
            inq_study_path = inq_study_path.replace('{assay}', assay)

            # check if current sub-aliquot is not part of disqualified items array
            if self.disqualified_items and sub_al in self.disqualified_items.keys(
            ):
                # if sub-aliquot was disqualifed already, skip this line
                continue

            # identify aliquot for the given sub-aliquot
            al = conf_dict.convert_sub_aliq_to_aliquot(
                sub_al, assay)  # identify aliquot for the current inquiry line

            match = False

            # get reference to the Datasource object assigned to the current row
            if cur_row in self.inq_line_sources:
                cur_source = self.inq_sources[self.inq_line_sources[cur_row]]
            else:
                # if the data source was not assigned to the current row, skip the row using this datasource
                cur_source = None
                continue
            # check if any source types were disqualified during loading the datasource
            if cur_source.disqualified_data_sources:
                # if at least one source of the datasource was disqualified, skip the row using this datasource
                # and disqualify the current sub-aliquot as well
                self.disqualify_inquiry_item(
                    sub_al,
                    'Datasource associated with this aliquot_id was marked as disqualified.',
                    inq_line)
                continue

            # get a copy of the source type ids of the current datasource;
            # it will track number of items found for each source type
            cur_source_types = copy.deepcopy(cur_source.source_types)

            # loop through items of the source
            for src_item in cur_source.source_content_arr:
                match_out = False
                # attempt match by the sub-aliquot
                match_out, match_details = \
                    self.is_item_found_soft_match(sub_al, src_item['name'], src_item['soft_comparisions'], sub_al)
                if match_out:
                    match = True
                # if sub-aliquot match was not success, attempt to match by the aliquot
                elif src_item['aliquot_match']:
                    match_out, match_details = \
                        self.is_item_found_soft_match(al, src_item['name'], src_item['soft_comparisions'], sub_al)
                    if match_out:
                        match = True
                # if a match was found using one of the above methods, record the item to inq_match_arr
                if match_out:
                    # since a match was found, verify that the source path is accessible (except for web locations)
                    web_loc = src_item['web_location']
                    # real_path = os.path.realpath(src_item['path'])  # real path of the current item

                    if web_loc or not web_loc and os.path.exists(
                            src_item['path']):
                        item_details = {
                            'sub-aliquot':
                            sub_al,
                            'study':
                            inq_study_path,
                            # 'source': src_item,
                            'source_item_name':
                            src_item['name'],
                            'target_subfolder':
                            src_item['target_subfolder'],
                            'real_path':
                            src_item['path'],
                            'target_copied_item_name':
                            src_item['target_copied_item_name'],
                            'match_details':
                            match_details,
                            'source_type_id':
                            src_item['source_type_id'],
                            'obj_type':
                            src_item['obj_type'],
                            'source_name_generic':
                            cur_source.source_name_generic
                        }
                        self.inq_match_arr.append(item_details)
                        # record the source type id of an item to track quantity of found matches for each source type
                        cur_source_types[
                            src_item['source_type_id']]['items_count'] += 1
                    else:
                        self.disqualify_inquiry_item(
                            sub_al,
                            'A match was found, but the identified source path is not accessible. Match details: {}. '
                            'Source path: "{}". Real source path: "{}".'.
                            format(match_details, src_item['path'],
                                   src_item['path']), inq_line)

            # report if no match was found and
            # verify that a match was found for each of the source types of the current datasource
            if not match:
                # no matches were found for the current datasource
                self.disqualify_inquiry_item(
                    sub_al,
                    'No matching items (files/folders) were found in the current data source.',
                    inq_line)
            else:
                if not cur_source.allow_nomatch_per_sourcetype:
                    # some matches were found; verify that a match was found for each of the source types
                    for src_type in cur_source_types:
                        if cur_source_types[src_type]['items_count'] == 0:
                            # no matches were found for this source type
                            self.disqualify_inquiry_item(
                                sub_al,
                                'No matches were found for the "{}" source type id in the datasource.'
                                .format(src_type), inq_line)
    def process_inquiry_sources(self):
        cur_row = 0
        for inq_line in self.lines_arr:
            if cur_row == self.header_row_num - 1:
                # skip the header row
                cur_row += 1
                continue

            # get program code assigned to the current row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', inq_line)
            # get assay assigned to the current row
            assay = self.get_inquiry_value_by_field_name('assay', inq_line)
            # get source id assigned to the current row
            source_id = self.get_inquiry_value_by_field_name(
                'source_id', inq_line)

            # get source config file
            # 2 values are saved in tuple: program name specific path and default one.
            # if program name specific path does not exist, the default will be used
            cfg_source_path = (
                # configuration path for the current program by name
                gc.CONFIG_FILE_SOURCE_PATH\
                    .replace('{program}', program_code)\
                    .replace('{assay}', assay)\
                    .replace('{source_id}', source_id),
                # configuration path for the default program (used if no program specific path is present)
                gc.CONFIG_FILE_SOURCE_PATH \
                    .replace('{program}', 'default') \
                    .replace('{assay}', assay) \
                    .replace('{source_id}', source_id)
            )
            # get the source location config file path
            cfg_source_location_path = gc.CONFIG_FILE_SOURCE_LOCATION_PATH.replace(
                '{source_id}', source_id)

            # attempt to load configuration for the program specific path
            cfg_source = ConfigData(Path(cfg_source_path[0]))
            if not cfg_source.loaded:
                # if config was not loaded from the program specific path, load the default one
                cfg_source = ConfigData(Path(cfg_source_path[1]))

            if cfg_source.loaded:
                # proceed here if the source config was loaded
                # load source location config with location specific settings for the current source
                cfg_source_location = ConfigData(
                    Path(cfg_source_location_path))
                if cfg_source_location.loaded:
                    # if the source location config was loaded, update cfg_source config with the source location config
                    cfg_source.update(
                        cfg_source_location.get_whole_dictionary())

                # get unique id of the datasource and check if the same id was used already, reuse that in such case
                inq_line_datasource_id = self.get_inquiry_line_datasource_id(
                    inq_line)
                self.logger.info(
                    'Current inquiry row #{} was identified with the following data source id: {}'
                    .format(cur_row, inq_line_datasource_id))
                # assign source id (inq_line_datasource_id) to the current inquiry line
                self.inq_line_sources[cur_row] = inq_line_datasource_id
                if inq_line_datasource_id in self.inq_sources:
                    # reuse existing datasource
                    self.logger.info(
                        'Identified data source id for the current inquiry row #{} was identified as '
                        'earlier retrieved one (for this or another row) and will be re-used for '
                        'the current row.'.format(cur_row))
                else:
                    # create a new datasource object
                    inq_line_datasource = DataSource(self, cfg_source,
                                                     inq_line,
                                                     inq_line_datasource_id)
                    self.inq_sources[
                        inq_line_datasource_id] = inq_line_datasource
            else:
                sub_al = self.get_inquiry_value_by_field_name(
                    'sub-aliquot', inq_line, False)
                _str = 'Datasource config file for the row #{} (sub_aliquot: {}) cannot be loaded. ' \
                       'None of the expected to exist files is accessible: {}'\
                    .format(cur_row, sub_al, ' | '.join(cfg_source_path))
                self.logger.warning(_str)
                self.disqualify_inquiry_item(
                    sub_al, _str, cur_row
                )  # TODO: verify if inq_line should be used instead of curr_row
            cur_row += 1
        pass
    def validate_inquiry_file(self):
        self.logger.info(
            'Start validating the current inquiry file "{}".'.format(
                self.filepath))
        row_count = 1
        failed_cnt = 0
        valid_aliquot_flag = self.conf_main.get_value(
            'Validate/aliquot_id_vs_manifest')
        valid_inquiry_values_flag = self.conf_main.get_value(
            'Validate/inquiry_values_vs_dictionary')
        inquiry_min_number_columns = self.conf_main.get_value(
            'Validate/inquiry_min_number_columns')
        inquiry_validate_number_columns = self.conf_main.get_value(
            'Validate/inquiry_validate_number_columns')
        if not inquiry_min_number_columns or not isinstance(
                inquiry_min_number_columns, int):
            inquiry_min_number_columns = 6  # set a default value if it is not provided in the config file
        if not inquiry_validate_number_columns or not isinstance(
                inquiry_validate_number_columns, int):
            inquiry_validate_number_columns = 6  # set a default value if it is not provided in the config file

        for row in self.lines_arr:
            if row_count == self.header_row_num:  # 1
                # skip the first column as it is a header
                row_count += 1
                continue

            sub_al = 'ND'  # set blank value as default
            assay = ''  # set blank value as default
            valid_aliquot_performed = False
            skip_final_check = False

            # check if inquiry file contain min number of columns
            if len(row) < inquiry_min_number_columns:
                # disqualify the current inquiry file
                _str = 'The current inquiry file has {} columns while {} are expected and will be disqualified.' \
                    .format(len(row), inquiry_min_number_columns)
                self.error.add_error(_str)
                self.logger.error(_str)
                return
            # create a local DictConfigData object and copy there a dictionary object
            conf_dict = DictConfigData(None,
                                       self.conf_dict.get_dictionary_copy())
            # get sub-aliquot value before looping through all fields, so it can be used for reporting errors
            # also get program_code assigned to the row
            program_code = self.get_inquiry_value_by_field_name(
                'program_code', row)
            sub_al = self.get_inquiry_value_by_field_name(
                'sub-aliquot', row, False)

            # validate program_code value
            if conf_dict.key_exists_in_dict(
                    str(program_code).lower(), 'program_code'):
                # update dictionary config (conf_dict) with the program specific settings loaded into conf_dict_program
                conf_dict_program_path = gc.CONFIG_FILE_DICTIONARY_PROGRAM\
                    .replace('{program}', conf_dict.get_dict_value(str(program_code).lower(), 'program_code'))
                conf_dict_program = ConfigData(conf_dict_program_path)
                conf_dict.update(conf_dict_program.get_whole_dictionary())
            else:
                _str = 'Unexpected value "{}" was provided for "program_code" (line #{})' \
                    .format(program_code, row_count)
                self.logger.critical(_str)
                # disqualify an inquiry file row, if unexpected value was provided
                self.disqualify_inquiry_item(sub_al, _str, row)
                failed_cnt += 1
                skip_final_check = True

            if not skip_final_check:
                # go through fields and validate the provided values
                for i in range(len(row)):
                    if i + 1 > inquiry_validate_number_columns:
                        # if number of columns in the inquiry file > expected maximum, exit the loop
                        break
                    col_category = conf_dict.get_dict_value(
                        str(i + 1), 'inquiry_file_structure')
                    if col_category in ('program_code', 'sub-aliquot'):
                        # no checking is needed for the listed field, proceed further
                        continue
                    elif col_category == 'db_center_id':
                        # get center id value and validate it
                        db_center_id = row[i]
                        # validate center_code or center_id value
                        self.logger.info(
                            'Start validation of center value "{}" provided for the current row'
                            .format(db_center_id))
                        db = DBAccess(self.logger, self.conf_main,
                                      self.error)  # create DBAccess object
                        db.open_connection()
                        # test center value assuming center code was provided
                        dataset = db.validate_center_code(
                            db_center_id, program_code, 'code', 'code')
                        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
                            dataset, 'center_id', 'center_code')
                        if center_id_out1:
                            # center id was returned, meaning center was validated fine
                            db_center_id = center_id_out1
                        else:
                            # if center code was not validated at first attempt, validate it assuming the center id was given
                            dataset = db.validate_center_code(
                                db_center_id, program_code, 'id', 'code')
                            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                                dataset, 'center_id', 'center_id')
                            if center_id_out2:
                                # center id was validated at the 2nd attempt, ignore the 1st validation attempt
                                db_center_id = center_id_out2
                            else:
                                # center validation attempts failed, report both failures
                                _str = 'Provided center value cannot be interpreted neither as code nor id; ' \
                                       'here are both validation outcomes: ' + \
                                       ' | '.join([_str_err_out1, _str_err_out2])
                                self.logger.warning(_str)
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                break

                        # if the aliquot validation is required, validate the sub-aliquot value using the db_center_id value
                        if valid_aliquot_flag:
                            # aliquot id validation is required
                            valid_aliquot_performed = True  # flag that aliquot validation was done
                            if isinstance(db_center_id,
                                          int):  # db_center_id.isnumeric():
                                # since center is numeric, proceed here
                                # get aliquot id based on the verified earlier assay value and given sub_aliquot id
                                aliquot = conf_dict.convert_sub_aliq_to_aliquot(
                                    sub_al, assay)
                                valid_status, valid_desc = self.db_access.validate_aliquot_id(
                                    aliquot, db_center_id)
                                if valid_status != 'OK':
                                    # disqualify an inquiry file row, if returned status is not OK
                                    _str = 'No match was found for the aliquot id "{}" (row #{}) in the manifest dataset ' \
                                           'of the database. DB response => Status: "{}"; Description: "{}".'\
                                        .format(aliquot, row_count, valid_status, valid_desc)
                                    self.logger.warning(_str)
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                            else:
                                # report unexpected center id value
                                _str = 'Unexpected value "{}" was provided for "db_center_id" (line #{}, column #{}). This is a ' \
                                       'critical error because this value is required (based on the configuration setting ' \
                                       '"Validate/aliquot_id_vs_manifest") to validate the provided aliquot id "{}"' \
                                    .format(db_center_id, row_count, i + 1, sub_al)
                                self.logger.warning(_str)
                                # disqualify an inquiry file row, if unexpected value was provided
                                self.disqualify_inquiry_item(sub_al, _str, row)
                                failed_cnt += 1
                                skip_final_check = True
                                # break
                        else:
                            self.logger.info(
                                'Validating of the provided aliquot_id "{}" is not required based on the '
                                'value of the config parameter "Validate/aliquot_id_vs_manifest": "{}".'
                                .format(sub_al, valid_aliquot_flag))
                    else:
                        if col_category == 'assay':
                            assay = row[i].strip().lower(
                            )  # save assay value to a dedicated variable
                        if valid_inquiry_values_flag:
                            # if validation of the inquiry values vs dictionary is required
                            validate_values = []
                            validate_categories = []
                            if col_category == 'bulk_location':
                                # get inquiry_file_structure_bulk_location value
                                bulk_value_delim = conf_dict.get_dict_value(
                                    'inquiry_file_structure_bulk_location_delim',
                                    '')
                                validate_values = str(
                                    row[i]).split(bulk_value_delim)
                                validate_categories = conf_dict.get_dict_object(
                                    'inquiry_file_structure_bulk_location', '')
                            else:
                                validate_values.append(str(row[i]).lower())
                                validate_categories.append(col_category)
                            for vv, vc in zip(validate_values,
                                              validate_categories):
                                if not conf_dict.key_exists_in_dict(
                                        vv.lower(), vc):
                                    if col_category == 'bulk_location':
                                        _str = 'Unexpected value "{}" was provided for "{}" as a part of ' \
                                               'the "bulk_location" value (line #{}, column #{})' \
                                            .format(vv, vc, row_count, i + 1)
                                    else:
                                        _str = 'Unexpected value "{}" was provided for "{}" (line #{}, column #{})'\
                                            .format(vv, vc, row_count, i+1)
                                    self.logger.critical(_str)
                                    # disqualify an inquiry file row, if unexpected value was provided
                                    self.disqualify_inquiry_item(
                                        sub_al, _str, row)
                                    failed_cnt += 1
                                    skip_final_check = True
                                    break
                    if skip_final_check:
                        break

            # check that if aliquot validation is required it was actually performed
            if not skip_final_check:
                if valid_aliquot_flag and not valid_aliquot_performed:
                    _str = 'Required aliquot validation vs. database manifest was not performed for the current row ' \
                           '(#{}) and it is considered a disqualification reason (most likely the db_center_id column ' \
                           'was not provided). ' \
                        .format(row_count)
                    self.logger.critical(_str)
                    # disqualify an inquiry file row, if unexpected value was provided
                    self.disqualify_inquiry_item(sub_al, _str, row)
                    failed_cnt += 1

            row_count += 1

        self.logger.info('Finish validating the inquiry file with{}.'.format(
            ' no errors' if failed_cnt == 0 else
            ' errors; {} records were disqualified - see earlier log entries for details'
            .format(failed_cnt)))
def process_download_inquiries():

    # load main config file and get required values
    m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)
    if not m_cfg.loaded:
        print(
            'Specified main config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_MAIN))
        return 1

    # load location config file (with local value specific for the location)
    cfg_location = ConfigData(gc.CONFIG_FILE_LOCATION)
    if not cfg_location.loaded:
        print(
            'Specified location config file ({}) was not loaded. Aborting execution.'
            .format(gc.CONFIG_FILE_LOCATION))
        return 1
    # if both configs were loaded, update the main config with the location config
    m_cfg.update(cfg_location.get_whole_dictionary())
    # print ('m_cfg = {}'.format(m_cfg.cfg))
    # assign values
    common_logger_name = gc.MAIN_LOG_NAME  # m_cfg.get_value('Logging/main_log_name')

    # get path configuration values
    logging_level = m_cfg.get_value('Logging/main_log_level')
    # path to the folder where all new inquiry files will be posted
    inquiries_loc = m_cfg.get_value('Location/inquiries')

    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified')
    # get path configuration values and save them to global_const module
    # path to the folder where all application level log files will be stored (one file per run)
    gc.APP_LOG_DIR = m_cfg.get_value('Location/app_logs')
    # path to the folder where all log files for processing inquiry files will be stored
    # (one file per inquiry)
    gc.INQUIRY_LOG_DIR = m_cfg.get_value('Location/inquiry_logs_relative_path')
    # path to the folder where all processed (and renamed) inquiries will be stored
    gc.INQUIRY_PROCESSED_DIR = m_cfg.get_value(
        'Location/inquiries_processed_relative_path')
    # get config setting for the processed_add_datestamp and save it to global const module
    processed_add_datestamp = m_cfg.get_value(
        'Location/processed_add_datestamp')
    if processed_add_datestamp:
        gc.PROCESSED_ADD_DATESTAMP = processed_add_datestamp
    # path to the folder where created submission packages will be located. One package sub_folder per inquiry.
    gc.OUTPUT_REQUESTS_DIR = m_cfg.get_value('Location/output_requests')
    # path to dir with dynamically created inquiry files for disqualified aliquots
    gc.DISQUALIFIED_INQUIRIES = m_cfg.get_value(
        'Location/inquiries_disqualified_path')

    log_folder_name = gc.APP_LOG_DIR  # gc.LOG_FOLDER_NAME

    # this variable define if Data Downloader app will be executed at the end of processing inquiries
    run_data_download = m_cfg.get_value('Execute/run_data_downloader')
    # path to the Data Downloader tool
    gc.DATA_DOWNLOADER_PATH = m_cfg.get_value('Location/data_downloader_path')

    prj_wrkdir = os.path.dirname(os.path.abspath(__file__))

    email_msgs = []
    # email_attchms = []

    inquiries_path = Path(inquiries_loc)

    # get current location of the script and create Log folder
    # if a relative path provided, convert it to the absolute address based on the application working dir
    if not os.path.isabs(log_folder_name):
        logdir = Path(prj_wrkdir) / log_folder_name
    else:
        logdir = Path(log_folder_name)
    # logdir = Path(prj_wrkdir) / log_folder_name  # 'logs'
    lg_filename = time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log'

    lg = setup_logger_common(common_logger_name, logging_level, logdir,
                             lg_filename)  # logging_level
    mlog = lg['logger']

    mlog.info(
        'Start processing download inquiries in "{}"'.format(inquiries_path))

    try:

        (root, source_inq_dirs, _) = next(walk(inquiries_path))

        inq_proc_cnt = 0
        errors_present = 'OK'

        for inq_dir in source_inq_dirs:
            source_inquiry_path = Path(root) / inq_dir
            mlog.info(
                'Selected for processing inquiry source: "{}", full path: {}'.
                format(inq_dir, source_inquiry_path))

            (_, _, inq_files) = next(walk(source_inquiry_path))

            # filter only excel files for processing as inquiries
            inquiries = [
                fl for fl in inq_files if fl.endswith(('xlsx', 'xls'))
            ]
            # filter out temp files (starting with '~$') created when an excel file is open
            inquiries = [fl for fl in inquiries if not fl.startswith('~$')]

            mlog.info('Inquiry files presented (count = {}): "{}"'.format(
                len(inquiries), inquiries))

            for inq_file in inquiries:
                inq_path = Path(source_inquiry_path) / inq_file

                # email_msgs = []
                # email_attchms = []

                try:
                    # print('--------->Process file {}'.format(inq_path))
                    mlog.info('The following Inquiry file was selected: "{}".'.
                              format(inq_path))

                    # save timestamp of beginning of the file processing
                    ts = time.strftime("%Y%m%d_%H%M%S", time.localtime())

                    inq_obj = Inquiry(inq_path, m_cfg)

                    if inq_obj and inq_obj.loaded:
                        # proceed processing inquiry
                        mlog.info('Inquiry file was successfully loaded.')
                        mlog.info(
                            'Starting processing Download Inquiry file: "{}".'.
                            format(inq_path))

                        inq_obj.process_inquiry()

                        mlog.info(
                            'Processing of Download Inquiry was finished for {}'
                            .format(inq_path))

                    inq_proc_cnt += 1

                    # identify if any errors were identified and set status variable accordingly
                    if not inq_obj.error.exist():
                        if not inq_obj.disqualified_items:
                            # no disqualified sub-aliquots present
                            fl_status = 'OK'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            # errors_present = 'OK'  # this variable is set to OK by default, no update needed
                        else:
                            # some disqualified sub-aliquots are presetn
                            fl_status = 'OK_with_Disqualifications'
                            _str = 'Processing status: "{}". Download Inquiry: {}'.format(
                                fl_status, inq_path)
                            if not errors_present == 'ERROR':
                                errors_present = 'DISQUALIFY'
                    else:
                        fl_status = 'ERROR'
                        _str = 'Processing status: "{}". Check processing log file for this inquiry: {}' \
                            .format(fl_status, inq_obj.logger.handlers[0])
                        errors_present = 'ERROR'

                    if fl_status == "OK":
                        mlog.info(_str)
                    else:
                        mlog.warning(_str)

                    processed_dir = inq_obj.processed_folder  # 'Processed'
                    # combine the name of the processed file
                    inq_processed_name = fl_status + '_' + str(
                        inq_file).replace(' ', '_').replace('__', '_')
                    if gc.PROCESSED_ADD_DATESTAMP:
                        inq_processed_name = ts + '_' + inq_processed_name
                    # move processed files to Processed folder
                    fl_processed_name = cm.move_file_to_processed(
                        inq_path, inq_processed_name, processed_dir,
                        inq_obj.logger, inq_obj.error)
                    if fl_processed_name:
                        mlog.info(
                            'Processed file "{}" was moved(renamed) to: "{}"'.
                            format(inq_path,
                                   processed_dir / fl_processed_name))
                    else:
                        errors_present = errors_present + '|MoveProcessedError'
                        mlog.warning(
                            'Moving the processed file "{}" was not successful due to some errors '
                            'reported in the request\'s log file {}.'.format(
                                inq_path, inq_obj.log_handler.baseFilename))

                    # preps for email notification
                    # create a dictionary to feed into template for preparing an email body
                    template_feeder = {
                        'file_num':
                        inq_proc_cnt,
                        'file_path':
                        str(inq_path),
                        'file_path_new':
                        (str(processed_dir / fl_processed_name)
                         if processed_dir and fl_processed_name else None),
                        'inq_obj_errors_cnt':
                        inq_obj.error.count,
                        'log_file_path':
                        inq_obj.log_handler.baseFilename,
                        'dld_request_file_path':
                        str(inq_obj.download_request_path),
                        'inq_sources':
                        inq_obj.inq_sources,
                        'inq_match_aliquots':
                        inq_obj.inq_match_arr,
                        'inq_disqul_aliquots':
                        inq_obj.disqualified_items,
                        'inq_disqul_reprocess_path':
                        str(inq_obj.disqualified_inquiry_path)
                    }
                    email_body_part = cm.populate_email_template(
                        'processed_inquiry.html', template_feeder)
                    email_msgs.append(email_body_part)

                    # deactivate the current Inquiry logger
                    deactivate_logger_common(inq_obj.logger,
                                             inq_obj.log_handler)
                    inq_obj = None

                except Exception as ex:
                    # report an error to log file and proceed to next file.
                    mlog.error(
                        'Error "{}" occurred during processing file: {}\n{} '.
                        format(ex, inq_path, traceback.format_exc()))
                    raise

        mlog.info('Number of successfully processed Inquiries = {}'.format(
            inq_proc_cnt))

        # start Data Download request if proper config setting was provided
        dd_status = {'status': '', 'message': ''}
        if run_data_download:
            # start process
            mlog.info(
                'Starting asynchronously Data Downloader app: "{}".'.format(
                    gc.DATA_DOWNLOADER_PATH))
            try:
                dd_process = cm.start_external_process_async(
                    gc.DATA_DOWNLOADER_PATH)
                # check if it is running
                dd_status = cm.check_external_process(dd_process)
                mlog.info(
                    'Status of running Data Downloader app: "{}".'.format(
                        dd_status))
            except Exception as ex:
                # report unexpected error during starting Data Downloader
                _str = 'Unexpected Error "{}" occurred during an attempt to start Data Downloader app ({})\n{} ' \
                    .format(ex, gc.DATA_DOWNLOADER_PATH, traceback.format_exc())
                mlog.critical(_str)
                dd_status = {'status': 'Error', 'message': _str}

        mlog.info('Preparing to send notificatoin email.')

        email_to = m_cfg.get_value('Email/send_to_emails')
        email_subject = 'processing of download inquiry. '

        if inq_proc_cnt > 0:  # inquiries and len(inquiries) > 0:
            # collect final details and send email about this study results

            err_present = errors_present.split(
                '|'
            )  # get all statuses into an array; 1st element is the main status
            if err_present:
                # set email subject based on the main status err_present[0]
                if err_present[0] == 'OK':
                    email_subject = 'SUCCESSFUL ' + email_subject
                elif err_present[0] == 'DISQUALIFY':
                    email_subject = 'SUCCESSFUL (with disqualifications) ' + email_subject
                else:
                    email_subject = 'ERROR(s) present during ' + email_subject
            if len(err_present) > 1:
                if err_present[1] == 'MoveProcessedError':
                    email_subject = email_subject + ' Error moving inquiry to processed.'

            if dd_status and 'status' in dd_status.keys(
            ) and dd_status['status'].lower() == 'error':
                email_subject = email_subject + ' Errors starting Data Downloader.'

            # create a dictionary to feed into template for preparing an email body
            template_feeder = {
                'inq_cnt': inq_proc_cnt,
                'run_data_download': run_data_download,
                'downloader_path': gc.DATA_DOWNLOADER_PATH,
                'downloader_start_status': dd_status['status'].lower(),
                'processed_details': '<br/>'.join(email_msgs)
            }
            email_body = cm.populate_email_template('processed_inquiries.html',
                                                    template_feeder)

            # remove return characters from the body of the email, to keep just clean html code
            email_body = email_body.replace("\r", "")
            email_body = email_body.replace("\n", "")

            # print ('email_subject = {}'.format(email_subject))
            # print('email_body = {}'.format(email_body))

            mlog.info(
                'Sending a status email with subject "{}" to "{}".'.format(
                    email_subject, email_to))

            try:
                if m_cfg.get_value('Email/send_emails'):
                    email.send_yagmail(
                        emails_to=email_to,
                        subject=email_subject,
                        message=email_body
                        # commented adding attachements, since some log files go over 25GB limit and fail email sending
                        # ,attachment_path=email_attchms
                    )
            except Exception as ex:
                # report unexpected error during sending emails to a log file and continue
                _str = 'Unexpected Error "{}" occurred during an attempt to send email upon ' \
                       'finishing processing "{}" study: {}\n{} ' \
                    .format(ex, inq_path, os.path.abspath(__file__), traceback.format_exc())
                mlog.critical(_str)

            mlog.info(
                'End of processing of download inquiries in "{}".'.format(
                    inquiries_path))

    except Exception as ex:
        # report unexpected error to log file
        _str = 'Unexpected Error "{}" occurred during processing file: {}\n{} ' \
            .format(ex, os.path.abspath(__file__), traceback.format_exc())
        mlog.critical(_str)
        raise

    sys.exit()