コード例 #1
0
def key_exists_in_dict(key, section):
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE)
    try:
        v = fl_cfg_dict.get_item_by_key(section + "/" + key)
        if v is not None:
            return True
        else:
            return False
    except Exception:
        return False
コード例 #2
0
def get_dict_value(key, section):
    fl_cfg_dict = ConfigData(gc.CONFIG_FILE_DICTIONARY)
    # replace spaces and slashes with "_"
    key = replace_unacceptable_chars(key, gc.ASSAY_CHARS_TO_REPLACE)
    try:
        v = fl_cfg_dict.get_item_by_key(section + "/" + key)
        if v is not None:
            return v
        else:
            return key
    except Exception:
        return key
コード例 #3
0
def send_yagmail(emails_to,
                 subject,
                 message,
                 main_conf=None,
                 email_from=None,
                 attachment_path=None,
                 smtp_server=None,
                 smtp_server_port=None):
    if main_conf:
        m_cfg = main_conf
    else:
        root_dir = cm.get_project_root()
        cnf_path = str(root_dir.joinpath(gc.CONFIG_FILE_MAIN))
        m_cfg = ConfigData(cnf_path)

    if not email_from:
        email_from = m_cfg.get_value('Email/default_from_email')
    if not smtp_server:
        smtp_server = cm.get_environment_variable(
            m_cfg.get_item_by_key('Email/smtp_server_env_name'))
    if not smtp_server_port:
        smtp_server_port = cm.get_environment_variable(
            m_cfg.get_item_by_key('Email/smtp_server_port_env_name'))

    # receiver = emails_to  # '[email protected], [email protected], [email protected]'
    body = message
    filename = attachment_path  # 'test.png'

    yag = yagmail.SMTP(email_from,
                       host=smtp_server,
                       smtp_skip_login=True,
                       smtp_ssl=False,
                       soft_email_validation=False,
                       port=smtp_server_port)
    yag.send(
        to=emails_to,
        subject=subject,
        contents=body,
        attachments=filename,
    )
コード例 #4
0
class MetadataDB:

    # CFG_DB_CONN = 'DB/mdb_conn_str'  # name of the config parameter storing DB connection string
    # CFG_DB_SQL_PROC = 'DB/mdb_sql_proc_load_sample'  # name of the config parameter storing DB name of the stored proc
    # CFG_DB_STUDY_ID = 'DB/mdb_study_id'  # name of the config parameter storing key of the MDB study id
    # CFG_DICT_PATH = 'DB/dict_tmpl_fields_node' # name of the config parameter storing key of dictionary path
    # to list of fields
    # CFG_DB_ALLOW_DICT_UPDATE = 'DB/mdb_allow_dict_update'  # name of the config parameter storing values
    # for "allow dict updates"
    # CFG_DB_ALLOW_SAMPLE_UPDATE = 'DB/mdb_allow_sample_update' # name of the config parameter storing values
    # for "allow sample updates"

    s_conn = ''
    conn = None

    def __init__(self, study_cfg):
        self.cfg = ConfigData(gc.CONFIG_FILE_MAIN)  # obj_cfg
        self.s_conn = self.cfg.get_item_by_key(gc.CFG_DB_CONN).strip()
        self.study_cfg = study_cfg

    def open_connection(self):
        self.conn = pyodbc.connect(self.s_conn, autocommit=True)

    def submit_row(self, row,
                   file):  # sample_id, row_json, dict_json, filepath):

        dict_json = file.get_file_dictionary_json(True)
        filepath = str(file.filepath)
        sample_id = row.sample_id
        row_json = row.to_json()

        if not self.conn:
            self.open_connection()
        str_proc = self.cfg.get_item_by_key(gc.CFG_DB_SQL_PROC).strip()
        study_id = self.study_cfg.get_item_by_key(gc.CFG_DB_STUDY_ID).strip()
        dict_path = '$.' + self.study_cfg.get_item_by_key(
            gc.CFG_DICT_PATH).strip()
        dict_upd = self.study_cfg.get_item_by_key(
            gc.CFG_DB_ALLOW_DICT_UPDATE).strip()
        sample_upd = self.study_cfg.get_item_by_key(
            gc.CFG_DB_ALLOW_SAMPLE_UPDATE).strip()

        # prepare stored proc string to be executed
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_STUDY_ID),
            study_id)  # '{study_id}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_ID),
            sample_id)  # '{sample_id}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_ROW_JSON),
            row_json)  # '{smpl_json}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_JSON),
            dict_json)  # '{dict_json}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_PATH),
            dict_path)  # '{dict_path}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_FILEPATH),
            filepath)  # '{filepath}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_DICT_UPD),
            dict_upd)  # '{dict_update}'
        str_proc = str_proc.replace(
            self.cfg.get_item_by_key(gc.CFG_FLD_TMPL_SAMPLE_UPD), sample_upd)
        # '{samlpe_update}'

        # get currrent file_processing_log
        file.logger.debug('SQL Procedure call = {}'.format(str_proc))
        # print ('procedure (str_proc) = {}'.format(str_proc))

        try:
            cursor = self.conn.cursor()
            cursor.execute(str_proc)
            # returned recordsets
            rs_out = []
            rows = cursor.fetchall()
            columns = [column[0] for column in cursor.description]
            results = []
            for row in rows:
                results.append(dict(zip(columns, row)))
            rs_out.append(results)
            return rs_out

        except Exception as ex:
            # report an error if DB call has failed.
            _str = 'Error "{}" occurred during submitting a row (sample_id = "{}") to database; ' \
                   'used SQL script "{}". Here is the traceback: \n{} '.format(
                    ex, sample_id, str_proc, traceback.format_exc())
            row.error.add_error(_str)
            file.logger.error(_str)
コード例 #5
0
class Request(File):
    def __init__(self, filepath, main_cfg, file_type=2, sheet_name=''):

        # load_configuration (main_cfg_obj) # load global and local configureations

        File.__init__(self, filepath, file_type)

        if main_cfg:
            self.conf_main = main_cfg
        else:
            self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # if cfg_path=='':
        #     self.conf_main = ConfigData(gc.CONFIG_FILE_MAIN)
        # else:
        #     self.conf_main = ConfigData(cfg_path)

        self.error = RequestError(self)

        self.log_handler = None
        self.logger = self.setup_logger(self.wrkdir, self.filename)
        self.logger.info(
            'Start working with Submission request file {}'.format(filepath))

        # self.file_dict = OrderedDict()
        # self.rows = OrderedDict()

        self.columnlist = []
        self.samples = []
        self.sub_aliquots = []
        self.disqualified_sub_aliquots = {}
        self.aliquots_to_subaliquots_map = {
        }  # holds the map of aliquots to sub-aliquots for interpreting DB responses
        self.disqualified_request_path = ''  # will store path to a request file with disqualified sub-aliquots
        self.project = ''
        self.bulk_location = ''
        self.assay = ''
        self.center = ''
        self.center_id = None
        self.center_code = None
        self.experiment_id = ''
        self.data_source_names = ''
        self.data_source_objects = {
        }  # dictionary to store all collected data sources for the request

        self.aliquots = None
        self.qualified_aliquots = None
        self.raw_data = None
        self.assay_data = None
        self.attachments = None
        self.submission_forms = None
        self.submission_package = None
        self.data_source_names = None
        # will hold value corresponding to the type of data source being used (attachments are not ignored)
        # possible value 'db' and 'file'. The value of the variable being set based on the first data source being used
        self.data_source_forms_assignment = None

        # self.sheet_name = ''
        self.sheet_name = sheet_name.strip()
        if not self.sheet_name or len(self.sheet_name) == 0:
            # if sheet name was not passed as a parameter, try to get it from config file
            self.sheet_name = gc.REQUEST_EXCEL_WK_SHEET_NAME  # 'wk_sheet_name'
        # print (self.sheet_name)
        self.logger.info('Data will be loaded from worksheet: "{}"'.format(
            self.sheet_name))

        self.conf_assay = None

        self.get_file_content()

    def get_file_content(self):
        if not self.columnlist:
            if cm.file_exists(self.filepath):
                self.logger.debug('Loading file content of "{}"'.format(
                    self.filepath))

                with xlrd.open_workbook(self.filepath) as wb:
                    if not self.sheet_name or len(self.sheet_name) == 0:
                        # by default retrieve the first sheet in the excel file
                        sheet = wb.sheet_by_index(0)
                    else:
                        # if sheet name was provided
                        sheets = wb.sheet_names()  # get list of all sheets
                        if self.sheet_name in sheets:
                            # if given sheet name in the list of available sheets, load the sheet
                            sheet = wb.sheet_by_name(self.sheet_name)
                        else:
                            # report an error if given sheet name not in the list of available sheets
                            _str = (
                                'Given worksheet name "{}" was not found in the file "{}". '
                                'Verify that the worksheet name exists in the file.'
                            ).format(self.sheet_name, self.filepath)
                            self.error.add_error(_str)
                            self.logger.error(_str)

                            self.lineList = None
                            self.loaded = False
                            return self.lineList

                sheet.cell_value(0, 0)

                lines = [
                ]  # will hold content of the request file as an array of arrays (rows)
                for i in range(sheet.ncols):
                    column = []
                    for j in range(sheet.nrows):
                        if i == 0:
                            lines.append(
                                []
                            )  # adds an array for each new row in the request file

                        # print(sheet.cell_value(i, j))
                        cell = sheet.cell(j, i)
                        cell_value = cell.value
                        # take care of number and dates received from Excel and converted to float by default
                        if cell.ctype == 2 and int(cell_value) == cell_value:
                            # the key is integer
                            cell_value = str(int(cell_value))
                        elif cell.ctype == 2:
                            # the key is float
                            cell_value = str(cell_value)
                        # convert date back to human readable date format
                        # print ('cell_value = {}'.format(cell_value))
                        if cell.ctype == 3:
                            cell_value_date = xlrd.xldate_as_datetime(
                                cell_value, wb.datemode)
                            cell_value = cell_value_date.strftime(
                                "%Y-%m-%directory")
                        column.append(
                            cell_value
                        )  # adds value to the current column array
                        lines[j].append(
                            '"' + str(cell_value) + '"'
                        )  # adds value in "csv" format for a current row

                    # self.columnlist.append(','.join(column))
                    self.columnlist.append(
                        column)  # adds a column to a list of columns

                # populate lineList property
                self.lineList = []
                for ln in lines:
                    self.lineList.append(','.join(ln))

                wb.unload_sheet(sheet.name)

                # load passed request parameters (by columns)
                self.get_request_parameters()

                # validate provided information
                self.logger.info(
                    'Validating provided request parameters. project: "{}", bulk location: "{}", '
                    'assay: "{}", db_center_code_or_id: "{}",'
                    'Sub-Aliquots: "{}"'.format(self.project,
                                                self.bulk_location, self.assay,
                                                self.center,
                                                self.sub_aliquots))
                self.validate_request_params()

                if self.error.exist():
                    # report that errors exist
                    self.loaded = False
                    # print(self.error.count)
                    # print(self.error.get_errors_to_str())
                    _str = 'Errors ({}) were identified during validating of the request. \nError(s): {}'.format(
                        self.error.count, self.error.get_errors_to_str())
                else:
                    self.loaded = True
                    _str = 'Request parameters were successfully validated - no errors found.'
                self.logger.info(_str)

                # combine Experiment_id out of request parameters
                if self.center_code and len(self.center_code.strip()) > 0:
                    # use center code if available
                    self.experiment_id = "_".join(
                        [self.project, self.center_code, self.assay])
                else:
                    # use provided value for the center column from request, if center_code is not available
                    self.experiment_id = "_".join(
                        [self.project, self.center, self.assay])

            else:
                _str = 'Loading content of the file "{}" failed since the file does not appear to exist".'.format(
                    self.filepath)
                self.error.add_error(_str)
                self.logger.error(_str)

                self.columnlist = None
                self.lineList = None
                self.loaded = False
        return self.lineList

    # get all values provided in the request file
    def get_request_parameters(self):
        self.project = self.columnlist[0][1]
        self.bulk_location = self.columnlist[1][1]
        self.assay = self.columnlist[2][1].lower()
        self.center = self.columnlist[3][
            1]  # center code (if alpha numeric) or center id (if numeric)
        self.sub_aliquots = self.columnlist[4]
        if self.sub_aliquots and len(self.sub_aliquots) > 0:
            self.sub_aliquots.pop(0)  # get rid of the column header
        # self.samples = self.columnlist[5]
        # if self.samples and len(self.samples) > 0:
        #     self.samples.pop(0) # get rid of the column header

    # validates provided parameters (loaded from the submission request file)
    def validate_request_params(self):
        _str_err = ''
        _str_warn = ''
        if len(self.sub_aliquots) == 0:
            _str_err = '\n'.join([
                _str_err, 'List of provided sub-samples is empty. '
                'Aborting processing of the submission request.'
            ])
        # Check if empty sub-samples were provided
        if '' in self.sub_aliquots:
            i = 0
            cleaned_cnt = 0
            for s in self.sub_aliquots:
                # check for any empty sub-aliquot values and remove them
                if len(s.strip()) == 0:
                    self.sub_aliquots.pop(i)
                    cleaned_cnt += 1
                else:
                    i += 1
            if cleaned_cnt > 0:
                _str_warn = '\n'.join([
                    _str_warn,
                    'Empty sub-aliqouts (count = {}) were removed from the list. '
                    'Here is the list of sub-aliqouts after cleaning (count = {}): "{}" '
                    .format(cleaned_cnt, len(self.sub_aliquots),
                            self.sub_aliquots)
                ])
        # check for empty values
        if len(self.project) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Program name was provided. Aborting processing of the submission request.'
            ])
        if len(self.bulk_location) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Bulk Location was provided. Aborting processing of the submission request.'
            ])
        if len(self.assay) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No Assay was provided. Aborting processing of the submission request.'
            ])
        if len(self.center) == 0:
            _str_err = '\n'.join([
                _str_err,
                'No DB Center information was provided. Aborting processing of the submission request.'
            ])

        # check for values that should match some predefined values from a dictionary
        # check assay value
        if not cm2.key_exists_in_dict(self.assay, 'assay'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Assay name "{}" is not matching a list of expected assay names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.assay, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.assay = cm2.get_dict_value(self.assay, 'assay')

        # check project value
        if not cm2.key_exists_in_dict(self.project.lower(), 'project'):
            _str_err = '\n'.join([
                _str_err,
                'Provided Program name "{}" is not matching a list of expected names '
                '(as stored in "{}" dictionary file). '
                'Aborting processing of the submission request.'.format(
                    self.project, gc.CONFIG_FILE_DICTIONARY)
            ])
        else:
            # if provided assay name is expected, convert it to the name expected by the Submission logic
            self.project = cm2.get_dict_value(self.project.lower(), 'project')

        # validate center_code or center_id value
        self.logger.info(
            'Start validation of center value "{}" provided in the request'.
            format(self.center))
        db = DBAccess(self.logger, self.error,
                      self.conf_main)  # create DBAccess object
        db.open_connection()
        # test center value assuming center code was provided
        dataset = db.validate_center_code(self.center, self.project, 'code',
                                          'code')
        _str_err_out1, center_id_out1 = self.check_validation_dataset_outcome(
            dataset, 'center_id', 'center_code')
        if center_id_out1:
            # center id was returned, meaning center was validated fine
            self.center_id = center_id_out1
            # get center code value from the current DB dataset
            _str_err_out3, center_code = self.get_field_value_from_dataset(
                dataset, 'center_code')
            if center_code:
                # center code retrieved OK
                self.center_code = center_code
            else:
                # report an error during retrieving center_code
                _str_err = '\n'.join([_str_err, _str_err_out3])
        else:
            # if center code was not validated at first attempt, validate it assuming the center id was given
            dataset = db.validate_center_code(self.center, self.project, 'id',
                                              'code')
            _str_err_out2, center_id_out2 = self.check_validation_dataset_outcome(
                dataset, 'center_id', 'center_id')
            if center_id_out2:
                # center id was validated at the 2nd attempt, ignore the 1st failed center code validation
                self.center_id = center_id_out2
                # get center code value from the current DB dataset
                _str_err_out3, center_code = self.get_field_value_from_dataset(
                    dataset, 'center_code')
                if center_code:
                    # center code retrieved OK
                    self.center_code = center_code
                else:
                    # report an error during retrieving center_code
                    _str_err = '\n'.join([_str_err, _str_err_out3])
            else:
                # center validation attempts failed, report both failures
                _str_err = '\n'.join([_str_err, _str_err_out1, _str_err_out2])

        # get list of aliquots from list of sub-aliquots
        self.aliquots = [
            cm2.convert_sub_aliq_to_aliquot(al, self.assay)
            for al in self.sub_aliquots
        ]

        # create a map to convert aliquot value to sub_aliquot value (for processing DB responses given for aliquots)
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            self.aliquots_to_subaliquots_map[a] = sa

        if self.center_id:
            self.logger.info('Start validation of aliquot ids vs DB')
            # if center id was validated in the above code, validate received aliquots vs manifest dataset in DB
            dataset = db.validate_aliquot_ids(self.center_id, self.aliquots)
            if dataset:
                # create dictionary of received aliquots/sample ids
                aliquots_to_samples_map = {}
                for row in dataset:
                    if '_aliquot_id' in row and '_sample_id' in row:
                        aliquots_to_samples_map[
                            row['_aliquot_id']] = row['_sample_id']
                # check if each aliquot id was returned from a database and get the sample id from the dataset
                for sa, a in zip(self.sub_aliquots, self.aliquots):
                    if a in aliquots_to_samples_map:
                        if len(str(aliquots_to_samples_map[a]).strip()) > 0:
                            self.samples.append(aliquots_to_samples_map[a])
                        else:
                            _str = 'Blank Sample Id value was returned from DB for the sub-aliquot id "{}". ' \
                                   'The sub-aliquot was disqualified'.format(sa)
                            self.disqualify_sub_aliquot(sa, _str)
                            _str_warn = '\n'.join([_str_warn, _str])
                    else:
                        _str = 'Sub-aliquot id "{}" was not found in the database and was disqualified'.format(
                            sa)
                        self.disqualify_sub_aliquot(sa, _str)
                        _str_warn = '\n'.join([_str_warn, _str])
            else:
                _str_err = '\n'.join([
                    _str_err,
                    'Aliquot ids cannot be validated since no data was returned from DB for '
                    'center_id = "{}" and aliquot ids as following: {} '.
                    format(self.center_id, self.aliquots)
                ])
        db = None

        # report any collected errors
        if len(_str_err) > 0:
            _str_err = 'Validation of request parameters:' + _str_err
            self.error.add_error(_str_err)
            self.logger.error(_str_err)
        # report any collected warnings
        if len(_str_warn) > 0:
            _str_warn = 'Validation of request parameters:' + _str_warn
            self.logger.warning(_str_warn)

    def check_validation_dataset_outcome(self, dataset, validation_id_column,
                                         validation_id_name):
        _str_err = ''
        row_num = 1
        validation_id_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]  # get the first row of the dataset
                if 'status' in row:
                    status = row['status']
                if 'description' in row:
                    description = row['description']
                if validation_id_column in row:  # center_id
                    validation_id = row[validation_id_column]
            if status == 'OK':  # validation was successful
                validation_id_out = validation_id
            elif status == 'Failed':  # validation has failed
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB has Failed, description: {}'
                    .format(validation_id_name, description)
                ])
            else:  # unexpected status value was returned
                _str_err = '\n'.join([
                    _str_err,
                    'Validation of the provided {} value vs DB returned unexpected status {}'
                    .format(validation_id_name, status)
                ])
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during validating {} in the DB. '
                'Check earlier entries in the log file.'.format(
                    validation_id_name)
            ])

        return _str_err, validation_id_out

    def get_field_value_from_dataset(self, dataset, field_name, row_num=None):
        # set default values
        if row_num is None:
            row_num = 1  # default row is #1

        _str_err = ''
        value_out = None
        if dataset:
            if len(dataset) >= row_num:
                row = dataset[row_num - 1]
                if field_name in row:
                    value_out = row[field_name]
        else:
            _str_err = '\n'.join([
                _str_err,
                'Unexpected error was reported during retrieving value of "{}" (row #{})from the dataset. '
                .format(field_name, row_num)
            ])

        return _str_err, value_out

    def setup_logger(self, wrkdir, filename):

        # m_cfg = ConfigData(gc.CONFIG_FILE_MAIN)

        log_folder_name = gc.REQ_LOG_DIR  # gc.LOG_FOLDER_NAME

        # m_logger_name = gc.MAIN_LOG_NAME
        # m_logger = logging.getLogger(m_logger_name)

        logger_name = gc.REQUEST_LOG_NAME
        logging_level = self.conf_main.get_value('Logging/request_log_level')

        # if a relative path provided, convert it to the absolute address based on the application working dir
        if not os.path.isabs(log_folder_name):
            log_folder_path = Path(wrkdir) / log_folder_name
        else:
            log_folder_path = Path(log_folder_name)

        lg = setup_logger_common(
            logger_name,
            logging_level,
            log_folder_path,  # Path(wrkdir) / log_folder_name,
            str(filename) + '_' +
            time.strftime("%Y%m%d_%H%M%S", time.localtime()) + '.log')

        self.log_handler = lg['handler']
        return lg['logger']

    def load_request_configuration(self):
        # update main config file with the project/environmetn specific details from additional config files
        self.load_project_config_into_main(
            self.project
        )  # loads project specific config and merges it into main config
        # load project specific assay config file
        self.conf_assay = self.load_assay_conf(self.assay, self.project)
        if self.conf_assay:
            # update loaded assay config file with project/environment specific config assay_locatoin_config.yaml
            self.conf_assay = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_ASSAY_LOCATION, self.project, self.conf_assay)

    def process_request(self):
        self.data_source_names = cm.get_value_from_dictionary(
            'data_sources', self.conf_assay)  # self.conf_assay['data_sources']

        # path to the folder where created submission packages will be located.
        # since this location can be provided in the project config file, this assignment is happening
        # after loading the project config
        gc.OUTPUT_PACKAGES_DIR = self.conf_main.get_value(
            'Submission_location/output_packages')

        for data_source_name in self.data_source_names:
            # if isinstance(data_source_name, tuple)
            if isinstance(data_source_name, str):
                if data_source_name == 'attachment':
                    self.attachments = Attachment(self)
                elif data_source_name[-3:] == "_db":
                    self.data_source_objects[data_source_name] = DataSourceDB(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'db'
                else:
                    self.data_source_objects[data_source_name] = DataSource(
                        self, data_source_name, data_source_name)
                    if not self.data_source_forms_assignment:
                        self.data_source_forms_assignment = 'file'
            elif isinstance(data_source_name, tuple):
                if data_source_name[0][-3:] == "_db":
                    self.data_source_objects[
                        data_source_name[0]] = DataSourceDB(
                            self, data_source_name[0], data_source_name[1])
                else:
                    self.data_source_objects[data_source_name[0]] = DataSource(
                        self, data_source_name[0], data_source_name[1])
            else:
                self.logger.error(
                    'Provided data source name ({}) is of unexpected format and cannot be processed.'
                    .format(data_source_name))

        # if data_source_forms_assignment was not assigned with any value in code before, assign a default to it
        # this a case when an assay submits only attachments and do not use any assay or QC data
        if not self.data_source_forms_assignment:
            self.data_source_forms_assignment = gc.DEFAULT_DATA_SOURCE_FORMS_ASSIGNMENT

        self.submission_package = SubmissionPackage(self)

        self.create_request_for_disqualified_sub_aliquots()

        self.create_trasfer_script_file()

        # check for errors and put final log entry for the request.
        if self.error.exist():
            _str = 'Processing of the current request was finished with the following errors: {}\n'.format(
                self.error.get_errors_to_str())
            self.logger.error(_str)
        else:
            _str = 'Processing of the current request was finished successfully.\n'
            self.logger.info(_str)

    def load_assay_conf(self, assay, project):
        assay_cfg_path = gc.CONFIG_FILE_ASSAY.replace('{project}', project)
        cfg_assay = ConfigData(assay_cfg_path)
        assay_config = cfg_assay.get_value(assay.upper())
        if assay_config:
            self.logger.info(
                "Configuration for the {} assay was loaded from the assay config file: {}. "
                .format(assay.upper(), assay_cfg_path))
        else:
            _str = "Configuration for the {} assay CANNOT be loaded from the assay config file: {}. " \
                   "Aborting execution.".format(assay.upper(), assay_cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)

        return assay_config

    # def update_cfg_assay_with_location_details(self, project, cfg_assay):
    #     cfg_assay_location = ConfigData(gc.CONFIG_FILE_ASSAY_LOCATION.replace('{project}', project))
    #     if cfg_assay_location.loaded:
    #         self.logger.info('Local config file "{}" was loaded and being used.'.format(cfg_assay_location.cfg_path))
    #         cfg_assay = cm.update_dictionary_matching_keys(cfg_assay, cfg_assay_location.get_whole_dictionary())
    #     else:
    #         _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
    #             .format(cfg_assay_location.cfg_path)
    #         self.logger.error(_str)
    #         self.error.add_error(_str)
    #     return cfg_assay

    def update_cfg_dictionary_with_location_details(self, location_path,
                                                    project, cfg_to_update):
        cfg_location = ConfigData(location_path.replace('{project}', project))
        if cfg_location.loaded:
            self.logger.info(
                'Local config file "{}" was loaded and being used.'.format(
                    cfg_location.cfg_path))
            cfg_to_update = cm.update_dictionary_matching_keys(
                cfg_to_update, cfg_location.get_whole_dictionary())
        else:
            _str = 'Local config file "{}" was NOT loaded. Aborting processing of the current request file.'\
                .format(cfg_location.cfg_path)
            self.logger.error(_str)
            self.error.add_error(_str)
        return cfg_to_update

    def load_project_config_into_main(self, project):
        # load project specific "project_config" config file
        cfg_project = ConfigData(
            gc.CONFIG_FILE_PROJECT.replace('{project}', project))
        if cfg_project.loaded:
            # if cfg_project was loaded, update it with the environment specific settings (from project_location config)
            cfg_project_updated = self.update_cfg_dictionary_with_location_details(
                gc.CONFIG_FILE_PROJECT_LOCATION, self.project,
                cfg_project.get_whole_dictionary())
            # update main config with the outcome of the previous updates
            self.conf_main.update(cfg_project_updated)

    def create_trasfer_script_file(self):
        self.logger.info("Start preparing transfer_script.sh file.")
        # path for the script file being created
        sf_path = Path(self.submission_package.submission_dir +
                       "/transfer_script.sh")

        # get script file template
        with open('scripts/' + self.project + '/transfer_script.sh',
                  'r') as ft:
            scr_tmpl = ft.read()

        # update placeholders in the script with the actual values
        smtp_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_env_name'))
        smtp_port = cm.get_environment_variable(
            self.conf_main.get_item_by_key('Email/smtp_server_port_env_name'))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!smtp!}", smtp_server + ":" + str(smtp_port))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!to_email!}",
            ','.join(self.conf_main.get_value("Email/sent_to_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!from_email!}",
            self.conf_main.get_value("Email/default_from_email"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!send_email_flag!}",
            str(self.conf_main.get_value("Email/send_emails")))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!cmd!}",
            self.conf_main.get_value("DataTransfer/transfer_command"))

        # the following will be utilized if mount point is being used by the transfer script (i.e. for Peerless)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mp_cmd!}",
            self.conf_main.get_value("DataTransfer/mount_point_command"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_local_dir!}",
            self.conf_main.get_value("DataTransfer/mount_local_dir"))
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!mount_remote_dir!}",
            self.conf_main.get_value("DataTransfer/mount_remote_dir"))

        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!source_dir!}", self.submission_package.submission_dir)
        scr_tmpl = cm.replace_value_in_string(
            scr_tmpl, "{!target_dir!}",
            self.conf_main.get_value("DataTransfer/remote_target_dir"))

        ssh_server = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_server_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_server!}",
                                              str(ssh_server))
        # apply user name as the very last replacement statement, since it can be used as part of previous replacements
        ssh_user = cm.get_environment_variable(
            self.conf_main.get_item_by_key('DataTransfer/ssh_user_env_name'))
        scr_tmpl = cm.replace_value_in_string(scr_tmpl, "{!ssh_user!}",
                                              str(ssh_user))

        set_permissions = False
        set_perm_value = self.conf_main.get_value("DataTransfer/exec_permis")
        if set_perm_value:
            try:
                exec_permission = eval(set_perm_value.strip())
                set_permissions = True
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during evaluating of "DataTransfer/exec_permis" value ' \
                       '"{}" retrieved from the main config file. Permission setup operation will be skipped. \n{} '\
                    .format(ex, set_perm_value, traceback.format_exc())
                self.logger.warning(_str)
                # self.error.add_error(_str)
                set_permissions = False

        with open(sf_path, "w") as sf:
            sf.write(scr_tmpl)

        if set_permissions:
            try:
                # if permissions to be set were retrieved from config file, set them here
                st = os.stat(sf_path)
                os.chmod(sf_path, st.st_mode | exec_permission)  #stat.S_IXUSR
            except Exception as ex:
                _str = 'Unexpected error Error "{}" occurred during setting up permissions "{}" for the script file ' \
                       '"{}". \n{} '\
                    .format(ex, set_perm_value, sf_path, traceback.format_exc())
                self.logger.warning(_str)
                self.error.add_error(_str)
        else:
            _str = 'Permission setup was skipped for the transfer script file. ' \
                   'Note: value of "DataTransfer/exec_permis" from main config was set to "{}".'\
                                    .format(set_perm_value)
            self.logger.warning(_str)

        self.logger.info("Finish preparing '{}' file.".format(sf_path))

    def disqualify_sub_aliquot(self, sa, details):
        # adds a sub aliquots to the disctionary of disqualified sub_aliquots
        # key = sub-aliquot, value = array of details for disqualification; 1 entry can have multiple detail reasons
        if sa in self.disqualified_sub_aliquots.keys():
            self.disqualified_sub_aliquots[sa].append(details)
        else:
            arr_details = [details]
            self.disqualified_sub_aliquots[sa] = arr_details
        self.logger.warning(
            'Sub-aliquot "{}" was disqualified with the following details: "{}"'
            .format(sa, details))

    def populate_qualified_aliquots(self):
        # reset self.qualified_aliquots array
        self.qualified_aliquots = []
        #select only aliquots that were not disqualified
        for sa, a in zip(self.sub_aliquots, self.aliquots):
            if not sa in self.disqualified_sub_aliquots.keys():
                self.qualified_aliquots.append(a)

    def create_request_for_disqualified_sub_aliquots(self):

        # proceed only if some disqualified sub-aliquots are present
        if self.disqualified_sub_aliquots:

            self.logger.info(
                "Start preparing a request file for disqualified sub-aliquots '{}'."
                .format([val
                         for val in self.disqualified_sub_aliquots.keys()]))

            wb = xlwt.Workbook()  # create empty workbook object
            sh = wb.add_sheet(
                'Submission_Request'
            )  # sheet name can not be longer than 32 characters

            cur_row = 0  # first row for 0-based array
            cur_col = 0  # first col for 0-based array
            #write headers to the file
            headers = self.get_headers()
            for val in headers:
                sh.write(cur_row, cur_col, val)
                cur_col += 1

            cur_row += 1

            for sa in self.sub_aliquots:
                if sa in self.disqualified_sub_aliquots.keys():
                    sh.write(cur_row, 0, self.project)
                    sh.write(cur_row, 1, self.bulk_location)
                    sh.write(cur_row, 2, self.assay)
                    sh.write(cur_row, 3, self.center)
                    sh.write(cur_row, 4, sa)
                    cur_row += 1

            self.disqualified_request_path = Path(
                gc.DISQUALIFIED_REQUESTS + '/' +
                time.strftime("%Y%m%d_%H%M%S", time.localtime()) +
                '_reprocess_disqualified _' + Path(self.filename).stem +
                '.xls')

            # if DISQUALIFIED_REQUESTS folder does not exist, it will be created
            os.makedirs(gc.DISQUALIFIED_REQUESTS, exist_ok=True)

            wb.save(str(self.disqualified_request_path))

            self.logger.info(
                "Successfully prepared the request file for disqualified sub-aliquots and saved in '{}'."
                .format(str(self.disqualified_request_path)))