def execute(self, input_commstructs): if input_commstructs == None or len(input_commstructs) == 0: raise ExecutionException("BlockProcessor.execute : No inputs") input_commstruct = input_commstructs[0] # retrieve the processor parameters self.windowSize = self.getParameterAsint( BlockProcessor.WINDOW_SIZE_PARAM) self.residuConservationLimit = self.getParameterAsfloat( BlockProcessor.RESIDU_CONSERVATION_LIMIT_PARAM) self.windowConservationLimit = self.getParameterAsfloat( BlockProcessor.WINDOW_CONSERVATION_LIMIT_PARAM) algo = self.getParameter(BlockProcessor.ALGORITHM_PARAM, False) if algo != None: self.algorithm = algo.lower() referenceSpecies = self.getParameter( BlockProcessor.REFERENCE_SPECIES_PARAM) desired_species_line = self.getParameter( BlockProcessor.DESIRED_SPECIES_LIST_PARAM, False) Log.trace("BlockProcessor.execute : Chosen Algorithm is '" + self.algorithm + "'") self.desiredSpeciesList = [] self.desiredSpeciesList.append(referenceSpecies) if desired_species_line != None: self.desiredSpeciesList.extend(desired_species_line.split()) # Analyze the conserved region in each MSA # If 'None' algorithm is chosen, the entire MSA is considered as conserved for bed_seq in input_commstruct.bedToMA.keys(): for alignment in input_commstruct.bedToMA[bed_seq]: pwm = PWM() pwm.initFromAlignment(alignment, self.desiredSpeciesList) if self.algorithm != BlockProcessor.ALGORITHM_NONE_VALUE: self.analyzeConservedBlocks(pwm, alignment) else: new_block = Motif(0, alignment.totalLength, "", pwm) new_block.composeName(alignment.name) alignment.addMotif(new_block, True) return input_commstruct
def getMotifDefinitions(self, motif_name_list, database_file_path): motif_list = [] for name in motif_name_list: motif_list.append(Motif(0, 0, name, None)) MotifUtils.getMotifsPWMFromJasparTF(motif_list, database_file_path) return motif_list
def getAlignmentMotifs( sub_node, bedseq, seqalign): for node_motif in sub_node: if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG: start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT) end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT) name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT) motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False) consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False) nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False) strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False) offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT) score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT) # Retrieve the PWM of the motif pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT) if pwm_s1 != None and len( pwm_s1) > 0: pwm_matrix = {} pwm_s2 = pwm_s1.split(";") for line in pwm_s2: pwm_s3 = line.split(":") if len( pwm_s3) > 1: pwm_s4 = pwm_s3[1].split() try: length = 0 for value in pwm_s4: length += 1 if not pwm_matrix.has_key( pwm_s3[0]): pwm_matrix[ pwm_s3[0]]=[] pwm_matrix[ pwm_s3[0]].append( int( value)) except ValueError, val_exce: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce)) pwm = PWM() pwm.matrix = pwm_matrix pwm.totalLength = length pwm.nbSequences = nb_species else: pwm = None if start != None and end != None and name != None: motif = Motif( start, end, name, pwm) motif.offset = offset motif.score = score if consensus != None: motif.consensus = consensus if motif_id != None: motif.id = motif_id if strand != None: motif.strand = strand seqalign.addMotif( motif) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() + "'")
def getMotif(node_motif, comm_struct): name = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT) consensus = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False) if name != None: motif = Motif(0, 0, name, None) if consensus != None: motif.consensus = consensus comm_struct.addMotif(motif) statistics = MotifStatisticsCommStruct.getMotifStatistics( node_motif, motif) comm_struct.addMotifStatistics(motif, statistics) else: raise ParsingException( "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'" )
def getMotifMatrices(self, motif_name_list, database_file_path, database_format): motif_list = [] for name in motif_name_list: motif_list.append(Motif(0, 0, name, None)) if database_format == "transfac" or database_format == "tf": MotifUtils.getMotifsPWMFromJasparTF(motif_list, database_file_path) elif database_format == "meme": MotifUtils.getMotifsPWMFromMeme(motif_list, database_file_path) else: raise ExecutionException( "CompareIdentifiedMotifsProcessor.getMotifMatrices : Unknown database format : " + database_format) return motif_list
def extendBlock(self, index_start, index_end, ratio, left_limit, pwm): # try to extend on the right # note: since the intervals are semi-open, the value at position index_end was not # taken into account in the previous ratio. That's why, extending on the right means # adding the value at position index_end finished = False while not finished and index_end <= pwm.totalLength - 1: if pwm.ratioMatrix[Constants.MAX_INDEX][index_end] >= 0: new_ratio = self.computeBlockRatio(index_start, index_end + 1, pwm) if new_ratio < self.windowConservationLimit: finished = True else: index_end += 1 else: finished = True # try to extend on the left but not more then left_limit finished = False while not finished and index_start >= 1 and index_start > left_limit: if pwm.ratioMatrix[Constants.MAX_INDEX][index_start - 1] >= 0: new_ratio = self.computeBlockRatio(index_start - 1, index_end, pwm) if new_ratio < self.windowConservationLimit: finished = True else: index_start -= 1 else: finished = True # build the resulting block block_pwm = pwm.getPWM(index_start, index_end) block = Motif(index_start, index_end, "", block_pwm) return block