def __init__(self, expected_attributes=None, **kwargs):
        # add expected attributes to super._attributes
        self._add_expected_attributes = []
        # This is a method of adding expected attributes to StandardData from StandardData children
        if isinstance(expected_attributes, list):
            self._add_expected_attributes.extend(expected_attributes)
        # initialize Standard data with the extended _attributes
        # recall that this will check for and/or create the directory structure found at
        super(S288C_R54QualAssessAuditObject, self).__init__(self._add_expected_attributes, **kwargs)
        # overwrite super.self_type with object type of child (this object)
        self.self_type = 'CryptoQualAssessAuditObject'
        # create logger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)

        # extract threshold/status from config file #TODO: move to constructor
        qual_assess_config = configparser.ConfigParser()
        qual_assess_config.read(self.config_file)
        qual_assess_1_dict = qual_assess_config['S288C_R64QualityAssessOne']

        # extract thresholds #TODO: CLEAN UP TO DICTIONARY, AUTOMATICALLY EXTRACT
        self.library_size_threshold = int(qual_assess_1_dict['LIBRARY_SIZE_THRESHOLD'])
        self.not_aligned_total_percent_threshold = float(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_THRESHOLD'])

        # extract status
        self.library_size_bit_status = int(qual_assess_1_dict['LIBRARY_SIZE_STATUS'])
        self.not_aligned_total_percent_bit_status = int(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_STATUS'])

        self.auditQualAssessDataframe()
    def __init__(self, expected_attributes=None, **kwargs):
        """
            constructor
            :param **kwargs: unspecified # keyword arguments. The keywords that are currently handled, if entered:
                                logger_path = path to the directory in which to deposit the logger

            PLEASE NOTE: order of database_subdirectories is important. see comment in constructor

        """
        # Call StandardData (parent class) constructor
        self._add_expected_attributes = [
            'database_subdirectories', 'filter_json_path', 'database_df'
        ]
        super(DatabaseObject, self).__init__(self._add_expected_attributes,
                                             **kwargs)
        self.self_type = 'DatabaseObject'
        # set DatabaseObject logger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)
        try:
            self.database_directory = kwargs['database_files']
        except KeyError:
            self.database_directory = self.database_files

        # set default database subdirectories. PLEASE NOTE: order is important here -- list in the order you wish them to merge in
        try:
            self.database_subdirectories = kwargs['database_subdirectories']
        except KeyError:
            #self.database_subdirectories = ['fastqFiles', 'library', 's2cDNASample', 's1cDNASample', 'rnaSample', 'bioSample'] # concat in reverse order
            self.database_subdirectories = [
                'bioSample', 'rnaSample', 's1cDNASample', 's2cDNASample',
                'library', 'fastqFiles'
            ]

        # see setter setFilterJson()
        self.filter_json = None
        try:
            self.filter_json_path = kwargs['filter_json_path']
        except KeyError:
            self.filter_json_path = None

        # see setter setDatabaseDataframe()
        try:
            self.database_df = kwargs['database_df']
        except KeyError:
            self.database_df = None

        # see filterDatabaseDataframe()
        try:
            self.filtered_database_df = kwargs['filtered_database_df']
        except KeyError:
            self.filtered_database_df = None

        # data_dir_dict will store {database_subdirectory: [list, of, filepaths, in, each, subdir], ... }. See self.setDatabaseDict()
        self.database_dict = {}
        # see setter setDatabaseDict()
        self.concat_database_dict = {}
        # see setter setKeyColumns()
        self.database_key_columns = []
 def createOrganismDataLogger(self):
     """
         create logger for OrganismData
         :raises: NotADirectoryError if logger_directory_path does not exist
     """
     logger_directory_path = utils.dirPath(self.log_file_path)
     if os.path.isdir(logger_directory_path):
         self.logger = utils.createStandardObjectChildLogger(self, __name__)
     else:
         raise NotADirectoryError('LogDirectoryDoesNotExist')
Example #4
0
    def __init__(self, expected_attributes=None, **kwargs):
        # add expected attributes to super._attributes
        self._add_expected_attributes = []
        # This is a method of adding expected attributes to StandardData from StandardData children
        if isinstance(expected_attributes, list):
            self._add_expected_attributes.extend(expected_attributes)
        # initialize Standard data with the extended _attributes
        # recall that this will check for and/or create the directory structure found at
        super(CryptoQualityAssessmentObject,
              self).__init__(self._add_expected_attributes, **kwargs)
        # overwrite super.self_type with object type of child (this object)
        self.self_type = 'CryptoQualityAssessmentObject'
        # create logger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)
        # for ordering columns below. genotype1_coverage and genotype2_coverage added if coverage_check is passed
        self.column_order = [
            'FASTQFILENAME', 'LIBRARY_SIZE', 'EFFECTIVE_LIBRARY_SIZE',
            'EFFECTIVE_UNIQUE_ALIGNMENT', 'EFFECTIVE_UNIQUE_ALIGNMENT_PERCENT',
            'MULTI_MAP_PERCENT', 'PROTEIN_CODING_TOTAL',
            'PROTEIN_CODING_TOTAL_PERCENT', 'PROTEIN_CODING_COUNTED',
            'PROTEIN_CODING_COUNTED_PERCENT', 'AMBIGUOUS_FEATURE_PERCENT',
            'NO_FEATURE_PERCENT', 'INTERGENIC_COVERAGE',
            'NOT_ALIGNED_TOTAL_PERCENT', 'GENOTYPE1_COVERAGE',
            'GENOTYPE1_LOG2CPM', 'GENOTYPE2_COVERAGE', 'GENOTYPE2_LOG2CPM',
            'OVEREXPRESSION_FOW', 'NAT_COVERAGE', 'NAT_LOG2CPM',
            'G418_COVERAGE', 'G418_LOG2CPM', 'NO_MAP_PERCENT',
            'HOMOPOLY_FILTER_PERCENT', 'READ_LENGTH_FILTER_PERCENT',
            'TOO_LOW_AQUAL_PERCENT', 'rRNA_PERCENT', 'nctrRNA_PERCENT'
        ]

        print('Quantifying noncoding rRNA (rRNA, tRNA and ncRNA)')
        # extract rRNA, tRNA and ncRNA quantification for crypto from bam files -- this takes a long time
        ncRNA_df = self.quantifyNonCodingRna(self.qual_assess_df)
        # merge this into the self.qual_assess_df
        self.qual_assess_df = pd.merge(self.qual_assess_df,
                                       ncRNA_df,
                                       on='FASTQFILENAME')
        print('Quantifying intergenic coverage')
        self.qual_assess_df = self.calculateIntergenicCoverage(
            self.qual_assess_df)
        # if coverage_check_flag true, check coverage of perturbed genes
        try:
            if self.coverage_check_flag:
                coverage_df = self.perturbedCheck()
                self.qual_assess_df = pd.merge(self.qual_assess_df,
                                               coverage_df,
                                               how='left',
                                               on='FASTQFILENAME')
        except AttributeError:
            self.logger.info(
                'query_df or coverage_check_flag not present -- no coverage check'
            )
        # format the self.qual_assess_df dataframe
        self.qual_assess_df = self.formatQualAssessDataFrame(
            self.qual_assess_df)
    def __init__(self, expected_attributes=None, **kwargs):
        """
            constructor
            :param **kwargs: unspecified # keyword arguments. The keywords that are currently handled, if entered:
                                logger_path = path to the directory in which to deposit the logger

        """
        # Call StandardData (parent class) constructor
        self._add_expected_attributes = []
        super(DatabaseAccuracyObject,
              self).__init__(self._add_expected_attributes, **kwargs)
        self.self_type = 'DatabaseAccuracyObject'
        # set DatabaseAccuracyObjectLogger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)
        # set the database dictionary ({subdirectory: [list, of, files]} --> see DatabaseObject)
        self.setDatabaseDict()
        # if full_report passed in constructor
        try:
            if kwargs['full_report']:
                self.fullReport()  # Report output to report
        except KeyError:
            pass
        # create specification dict -- see class metadataSpecificationObject below this class
        self.specification_dict = metadataSpecificationObject(
        ).specification_dict
        # set last_git_change
        try:
            self.last_git_change = self.getLastGitChange()
        except FileNotFoundError:
            print(
                'Cannot find .git/FETCH_HEAD in database_files. If this is a new, or newly cloned, directory, pull from the remote.'
            )
        except AttributeError:
            print('.git/FETCH_HEAD is empty. Make a commit and try again.')
        # set accuracyCheckFilename (expecting to be overwritten by @property method below when needed)
        self.accuracy_check_output_file = self.accuracyCheckFilename()
        self.key_column_dict = {
            "fastqFiles":
            ['libraryDate', 'libraryPreparer', 'librarySampleNumber'],
            "library":
            ['libraryDate', 'libraryPreparer', 'librarySampleNumber'],
            "s2cDNASample":
            ['s2cDNADate', 's2cDNAPreparer', 's2cDNASampleNumber'],
            "s1cDNASample":
            ['s1cDNADate', 's1cDNAPreparer', 's1cDNASampleNumber'],
            "rnaSample": ['rnaDate', 'rnaPreparer', 'rnaSampleNumber'],
            "bioSample": ['harvestDate', 'harvester', 'bioSampleNumber']
        }
    def __init__(self, expected_attributes=None, **kwargs):
        # add expected attributes to super._attributes
        self._add_expected_attributes = []
        # This is a method of adding expected attributes to StandardData from StandardData children
        if isinstance(expected_attributes, list):
            self._add_expected_attributes.extend(expected_attributes)
        # initialize Standard data with the extended _attributes
        # recall that this will check for and/or create the directory structure found at
        super(CryptoQualAssessAuditObject, self).__init__(self._add_expected_attributes, **kwargs)
        # overwrite super.self_type with object type of child (this object)
        self.self_type = 'CryptoQualAssessAuditObject'
        # create logger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)

        # extract threshold/status from config file #TODO: move to constructor
        qual_assess_config = configparser.ConfigParser()
        qual_assess_config.read(self.config_file)
        qual_assess_1_dict = qual_assess_config['KN99QualityAssessOne']

        # extract thresholds #TODO: CLEAN UP TO DICTIONARY, AUTOMATICALLY EXTRACT
        self.protein_coding_total_threshold = int(qual_assess_1_dict['PROTEIN_CODING_TOTAL_THRESHOLD'])
        self.not_aligned_total_percent_threshold = float(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_THRESHOLD'])
        self.perturbed_coverage_threshold = float(qual_assess_1_dict['PERTURBED_COVERAGE_THRESHOLD'])
        self.nat_expected_coverage_threshold = float(qual_assess_1_dict['NAT_EXPECTED_COVERAGE_THRESHOLD'])
        self.nat_expected_log2cpm_threshold = float(qual_assess_1_dict['NAT_EXPECTED_LOG2CPM_THRESHOLD'])
        self.nat_unexpected_coverage_threshold = float(qual_assess_1_dict['NAT_UNEXPECTED_COVERAGE_THRESHOLD'])
        self.nat_unexpected_log2cpm_threshold = float(qual_assess_1_dict['NAT_UNEXPECTED_LOG2CPM_THRESHOLD'])
        self.g418_log2cpm_threshold = float(qual_assess_1_dict['G418_LOG2CPM_THRESHOLD'])
        self.overexpression_fow_threshold = float(qual_assess_1_dict['OVEREXPRESSION_FOW_THRESHOLD'])

        # extract status
        self.protein_coding_total_bit_status = int(qual_assess_1_dict['PROTEIN_CODING_TOTAL_STATUS'])
        self.not_aligned_total_percent_bit_status = int(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_STATUS'])
        self.perturbed_coverage_bit_status = int(qual_assess_1_dict['PERTURBED_COVERAGE_STATUS'])
        self.nat_expected_marker_status = int(qual_assess_1_dict['NAT_EXPECTED_MARKER_STATUS'])
        self.nat_unexpected_marker_status = int(qual_assess_1_dict['NAT_UNEXPECTED_MARKER_STATUS'])
        self.g418_expected_marker_status = int(qual_assess_1_dict['G418_EXPECTED_MARKER_STATUS'])
        self.g418_unexpected_marker_status = int(qual_assess_1_dict['G418_UNEXPECTED_MARKER_STATUS'])
        self.overexpression_fow_status = int(qual_assess_1_dict['OVEREXPRESSION_FOW_STATUS'])
        self.no_metadata_marker_status = int(qual_assess_1_dict['NO_METADATA_MARKER_STATUS'])

        self.auditQualAssessDataframe()
    def __init__(self, expected_attributes=None, **kwargs):
        # add expected attributes to super._attributes
        self._add_expected_attributes = [
            'organism', 'output_dir', 'wildtype', 'experiment_dir',
            'norm_count_path', 'max_replicates', 'drug_marker', 'qc_config',
            'experiment_conditions'
        ]
        # TODO: This is a messy and repetitive way of adding expected attributes from children of OrganismData to add to StandardData
        if isinstance(expected_attributes, list):
            self._add_expected_attributes.extend(expected_attributes)
        # initialize Standard data with the extended _attributes
        # recall that this will check for and/or create the directory structure found at
        super(OrganismData, self).__init__(self._add_expected_attributes,
                                           **kwargs)
        # overwrite super.self_type with object type of child (this object)
        self.self_type = 'OrganismData'

        # set organism, if an organism is passed
        if hasattr(self, 'organism'):
            # set organism directory
            self.organism_directory = os.path.join(
                self.user_rnaseq_pipeline_directory, self.genome_files,
                self.organism)
            # set OrganismData config found in rnaseq_pipeline/genome_files/<organism>/OrganismData_config.ini
            self.organism_config_file = os.path.join(
                self.organism_directory, 'OrganismData_config.ini')
            if self.organism in self._configured_organisms_list:
                self.setOrganismData()
            else:
                print(
                    '\n{self.organism} is not configured. You will have to set the OrganismData attributes manually. '
                    'See the config/rnaseq_pipeline_config.ini. Alternatively, see one of the configured genome_files (in {self.genome_files}) '
                    'and create a subdir of genomes_files with an OrganismData_config.ini file, zip it into '
                    '/lts/mblab/Crypto/rnaseq_data/genome_files.zip, remove your genome_files in your {self.user_rnaseq_pipeline_directory}'
                    'and either re-run this script or start an interactive python session, import and instantiate a StandardData object.\n'
                )
                # see [OrganismData] in config/rnaseq_pipeline_config.ini
                utils.configure(self, self.config_file, self.self_type)

        # create OrganismData logger
        self.logger = utils.createStandardObjectChildLogger(self, __name__)