def __init__(self, expected_attributes=None, **kwargs): # add expected attributes to super._attributes self._add_expected_attributes = [] # This is a method of adding expected attributes to StandardData from StandardData children if isinstance(expected_attributes, list): self._add_expected_attributes.extend(expected_attributes) # initialize Standard data with the extended _attributes # recall that this will check for and/or create the directory structure found at super(S288C_R54QualAssessAuditObject, self).__init__(self._add_expected_attributes, **kwargs) # overwrite super.self_type with object type of child (this object) self.self_type = 'CryptoQualAssessAuditObject' # create logger self.logger = utils.createStandardObjectChildLogger(self, __name__) # extract threshold/status from config file #TODO: move to constructor qual_assess_config = configparser.ConfigParser() qual_assess_config.read(self.config_file) qual_assess_1_dict = qual_assess_config['S288C_R64QualityAssessOne'] # extract thresholds #TODO: CLEAN UP TO DICTIONARY, AUTOMATICALLY EXTRACT self.library_size_threshold = int(qual_assess_1_dict['LIBRARY_SIZE_THRESHOLD']) self.not_aligned_total_percent_threshold = float(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_THRESHOLD']) # extract status self.library_size_bit_status = int(qual_assess_1_dict['LIBRARY_SIZE_STATUS']) self.not_aligned_total_percent_bit_status = int(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_STATUS']) self.auditQualAssessDataframe()
def __init__(self, expected_attributes=None, **kwargs): """ constructor :param **kwargs: unspecified # keyword arguments. The keywords that are currently handled, if entered: logger_path = path to the directory in which to deposit the logger PLEASE NOTE: order of database_subdirectories is important. see comment in constructor """ # Call StandardData (parent class) constructor self._add_expected_attributes = [ 'database_subdirectories', 'filter_json_path', 'database_df' ] super(DatabaseObject, self).__init__(self._add_expected_attributes, **kwargs) self.self_type = 'DatabaseObject' # set DatabaseObject logger self.logger = utils.createStandardObjectChildLogger(self, __name__) try: self.database_directory = kwargs['database_files'] except KeyError: self.database_directory = self.database_files # set default database subdirectories. PLEASE NOTE: order is important here -- list in the order you wish them to merge in try: self.database_subdirectories = kwargs['database_subdirectories'] except KeyError: #self.database_subdirectories = ['fastqFiles', 'library', 's2cDNASample', 's1cDNASample', 'rnaSample', 'bioSample'] # concat in reverse order self.database_subdirectories = [ 'bioSample', 'rnaSample', 's1cDNASample', 's2cDNASample', 'library', 'fastqFiles' ] # see setter setFilterJson() self.filter_json = None try: self.filter_json_path = kwargs['filter_json_path'] except KeyError: self.filter_json_path = None # see setter setDatabaseDataframe() try: self.database_df = kwargs['database_df'] except KeyError: self.database_df = None # see filterDatabaseDataframe() try: self.filtered_database_df = kwargs['filtered_database_df'] except KeyError: self.filtered_database_df = None # data_dir_dict will store {database_subdirectory: [list, of, filepaths, in, each, subdir], ... }. See self.setDatabaseDict() self.database_dict = {} # see setter setDatabaseDict() self.concat_database_dict = {} # see setter setKeyColumns() self.database_key_columns = []
def createOrganismDataLogger(self): """ create logger for OrganismData :raises: NotADirectoryError if logger_directory_path does not exist """ logger_directory_path = utils.dirPath(self.log_file_path) if os.path.isdir(logger_directory_path): self.logger = utils.createStandardObjectChildLogger(self, __name__) else: raise NotADirectoryError('LogDirectoryDoesNotExist')
def __init__(self, expected_attributes=None, **kwargs): # add expected attributes to super._attributes self._add_expected_attributes = [] # This is a method of adding expected attributes to StandardData from StandardData children if isinstance(expected_attributes, list): self._add_expected_attributes.extend(expected_attributes) # initialize Standard data with the extended _attributes # recall that this will check for and/or create the directory structure found at super(CryptoQualityAssessmentObject, self).__init__(self._add_expected_attributes, **kwargs) # overwrite super.self_type with object type of child (this object) self.self_type = 'CryptoQualityAssessmentObject' # create logger self.logger = utils.createStandardObjectChildLogger(self, __name__) # for ordering columns below. genotype1_coverage and genotype2_coverage added if coverage_check is passed self.column_order = [ 'FASTQFILENAME', 'LIBRARY_SIZE', 'EFFECTIVE_LIBRARY_SIZE', 'EFFECTIVE_UNIQUE_ALIGNMENT', 'EFFECTIVE_UNIQUE_ALIGNMENT_PERCENT', 'MULTI_MAP_PERCENT', 'PROTEIN_CODING_TOTAL', 'PROTEIN_CODING_TOTAL_PERCENT', 'PROTEIN_CODING_COUNTED', 'PROTEIN_CODING_COUNTED_PERCENT', 'AMBIGUOUS_FEATURE_PERCENT', 'NO_FEATURE_PERCENT', 'INTERGENIC_COVERAGE', 'NOT_ALIGNED_TOTAL_PERCENT', 'GENOTYPE1_COVERAGE', 'GENOTYPE1_LOG2CPM', 'GENOTYPE2_COVERAGE', 'GENOTYPE2_LOG2CPM', 'OVEREXPRESSION_FOW', 'NAT_COVERAGE', 'NAT_LOG2CPM', 'G418_COVERAGE', 'G418_LOG2CPM', 'NO_MAP_PERCENT', 'HOMOPOLY_FILTER_PERCENT', 'READ_LENGTH_FILTER_PERCENT', 'TOO_LOW_AQUAL_PERCENT', 'rRNA_PERCENT', 'nctrRNA_PERCENT' ] print('Quantifying noncoding rRNA (rRNA, tRNA and ncRNA)') # extract rRNA, tRNA and ncRNA quantification for crypto from bam files -- this takes a long time ncRNA_df = self.quantifyNonCodingRna(self.qual_assess_df) # merge this into the self.qual_assess_df self.qual_assess_df = pd.merge(self.qual_assess_df, ncRNA_df, on='FASTQFILENAME') print('Quantifying intergenic coverage') self.qual_assess_df = self.calculateIntergenicCoverage( self.qual_assess_df) # if coverage_check_flag true, check coverage of perturbed genes try: if self.coverage_check_flag: coverage_df = self.perturbedCheck() self.qual_assess_df = pd.merge(self.qual_assess_df, coverage_df, how='left', on='FASTQFILENAME') except AttributeError: self.logger.info( 'query_df or coverage_check_flag not present -- no coverage check' ) # format the self.qual_assess_df dataframe self.qual_assess_df = self.formatQualAssessDataFrame( self.qual_assess_df)
def __init__(self, expected_attributes=None, **kwargs): """ constructor :param **kwargs: unspecified # keyword arguments. The keywords that are currently handled, if entered: logger_path = path to the directory in which to deposit the logger """ # Call StandardData (parent class) constructor self._add_expected_attributes = [] super(DatabaseAccuracyObject, self).__init__(self._add_expected_attributes, **kwargs) self.self_type = 'DatabaseAccuracyObject' # set DatabaseAccuracyObjectLogger self.logger = utils.createStandardObjectChildLogger(self, __name__) # set the database dictionary ({subdirectory: [list, of, files]} --> see DatabaseObject) self.setDatabaseDict() # if full_report passed in constructor try: if kwargs['full_report']: self.fullReport() # Report output to report except KeyError: pass # create specification dict -- see class metadataSpecificationObject below this class self.specification_dict = metadataSpecificationObject( ).specification_dict # set last_git_change try: self.last_git_change = self.getLastGitChange() except FileNotFoundError: print( 'Cannot find .git/FETCH_HEAD in database_files. If this is a new, or newly cloned, directory, pull from the remote.' ) except AttributeError: print('.git/FETCH_HEAD is empty. Make a commit and try again.') # set accuracyCheckFilename (expecting to be overwritten by @property method below when needed) self.accuracy_check_output_file = self.accuracyCheckFilename() self.key_column_dict = { "fastqFiles": ['libraryDate', 'libraryPreparer', 'librarySampleNumber'], "library": ['libraryDate', 'libraryPreparer', 'librarySampleNumber'], "s2cDNASample": ['s2cDNADate', 's2cDNAPreparer', 's2cDNASampleNumber'], "s1cDNASample": ['s1cDNADate', 's1cDNAPreparer', 's1cDNASampleNumber'], "rnaSample": ['rnaDate', 'rnaPreparer', 'rnaSampleNumber'], "bioSample": ['harvestDate', 'harvester', 'bioSampleNumber'] }
def __init__(self, expected_attributes=None, **kwargs): # add expected attributes to super._attributes self._add_expected_attributes = [] # This is a method of adding expected attributes to StandardData from StandardData children if isinstance(expected_attributes, list): self._add_expected_attributes.extend(expected_attributes) # initialize Standard data with the extended _attributes # recall that this will check for and/or create the directory structure found at super(CryptoQualAssessAuditObject, self).__init__(self._add_expected_attributes, **kwargs) # overwrite super.self_type with object type of child (this object) self.self_type = 'CryptoQualAssessAuditObject' # create logger self.logger = utils.createStandardObjectChildLogger(self, __name__) # extract threshold/status from config file #TODO: move to constructor qual_assess_config = configparser.ConfigParser() qual_assess_config.read(self.config_file) qual_assess_1_dict = qual_assess_config['KN99QualityAssessOne'] # extract thresholds #TODO: CLEAN UP TO DICTIONARY, AUTOMATICALLY EXTRACT self.protein_coding_total_threshold = int(qual_assess_1_dict['PROTEIN_CODING_TOTAL_THRESHOLD']) self.not_aligned_total_percent_threshold = float(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_THRESHOLD']) self.perturbed_coverage_threshold = float(qual_assess_1_dict['PERTURBED_COVERAGE_THRESHOLD']) self.nat_expected_coverage_threshold = float(qual_assess_1_dict['NAT_EXPECTED_COVERAGE_THRESHOLD']) self.nat_expected_log2cpm_threshold = float(qual_assess_1_dict['NAT_EXPECTED_LOG2CPM_THRESHOLD']) self.nat_unexpected_coverage_threshold = float(qual_assess_1_dict['NAT_UNEXPECTED_COVERAGE_THRESHOLD']) self.nat_unexpected_log2cpm_threshold = float(qual_assess_1_dict['NAT_UNEXPECTED_LOG2CPM_THRESHOLD']) self.g418_log2cpm_threshold = float(qual_assess_1_dict['G418_LOG2CPM_THRESHOLD']) self.overexpression_fow_threshold = float(qual_assess_1_dict['OVEREXPRESSION_FOW_THRESHOLD']) # extract status self.protein_coding_total_bit_status = int(qual_assess_1_dict['PROTEIN_CODING_TOTAL_STATUS']) self.not_aligned_total_percent_bit_status = int(qual_assess_1_dict['NOT_ALIGNED_TOTAL_PERCENT_STATUS']) self.perturbed_coverage_bit_status = int(qual_assess_1_dict['PERTURBED_COVERAGE_STATUS']) self.nat_expected_marker_status = int(qual_assess_1_dict['NAT_EXPECTED_MARKER_STATUS']) self.nat_unexpected_marker_status = int(qual_assess_1_dict['NAT_UNEXPECTED_MARKER_STATUS']) self.g418_expected_marker_status = int(qual_assess_1_dict['G418_EXPECTED_MARKER_STATUS']) self.g418_unexpected_marker_status = int(qual_assess_1_dict['G418_UNEXPECTED_MARKER_STATUS']) self.overexpression_fow_status = int(qual_assess_1_dict['OVEREXPRESSION_FOW_STATUS']) self.no_metadata_marker_status = int(qual_assess_1_dict['NO_METADATA_MARKER_STATUS']) self.auditQualAssessDataframe()
def __init__(self, expected_attributes=None, **kwargs): # add expected attributes to super._attributes self._add_expected_attributes = [ 'organism', 'output_dir', 'wildtype', 'experiment_dir', 'norm_count_path', 'max_replicates', 'drug_marker', 'qc_config', 'experiment_conditions' ] # TODO: This is a messy and repetitive way of adding expected attributes from children of OrganismData to add to StandardData if isinstance(expected_attributes, list): self._add_expected_attributes.extend(expected_attributes) # initialize Standard data with the extended _attributes # recall that this will check for and/or create the directory structure found at super(OrganismData, self).__init__(self._add_expected_attributes, **kwargs) # overwrite super.self_type with object type of child (this object) self.self_type = 'OrganismData' # set organism, if an organism is passed if hasattr(self, 'organism'): # set organism directory self.organism_directory = os.path.join( self.user_rnaseq_pipeline_directory, self.genome_files, self.organism) # set OrganismData config found in rnaseq_pipeline/genome_files/<organism>/OrganismData_config.ini self.organism_config_file = os.path.join( self.organism_directory, 'OrganismData_config.ini') if self.organism in self._configured_organisms_list: self.setOrganismData() else: print( '\n{self.organism} is not configured. You will have to set the OrganismData attributes manually. ' 'See the config/rnaseq_pipeline_config.ini. Alternatively, see one of the configured genome_files (in {self.genome_files}) ' 'and create a subdir of genomes_files with an OrganismData_config.ini file, zip it into ' '/lts/mblab/Crypto/rnaseq_data/genome_files.zip, remove your genome_files in your {self.user_rnaseq_pipeline_directory}' 'and either re-run this script or start an interactive python session, import and instantiate a StandardData object.\n' ) # see [OrganismData] in config/rnaseq_pipeline_config.ini utils.configure(self, self.config_file, self.self_type) # create OrganismData logger self.logger = utils.createStandardObjectChildLogger(self, __name__)