예제 #1
0
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException("BlockProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # retrieve the processor parameters
        self.windowSize = self.getParameterAsint(
            BlockProcessor.WINDOW_SIZE_PARAM)
        self.residuConservationLimit = self.getParameterAsfloat(
            BlockProcessor.RESIDU_CONSERVATION_LIMIT_PARAM)
        self.windowConservationLimit = self.getParameterAsfloat(
            BlockProcessor.WINDOW_CONSERVATION_LIMIT_PARAM)
        algo = self.getParameter(BlockProcessor.ALGORITHM_PARAM, False)
        if algo != None:
            self.algorithm = algo.lower()

        referenceSpecies = self.getParameter(
            BlockProcessor.REFERENCE_SPECIES_PARAM)

        desired_species_line = self.getParameter(
            BlockProcessor.DESIRED_SPECIES_LIST_PARAM, False)
        Log.trace("BlockProcessor.execute : Chosen Algorithm is '" +
                  self.algorithm + "'")

        self.desiredSpeciesList = []
        self.desiredSpeciesList.append(referenceSpecies)
        if desired_species_line != None:
            self.desiredSpeciesList.extend(desired_species_line.split())

        # Analyze the conserved region in each MSA
        # If 'None' algorithm is chosen, the entire MSA is considered as conserved
        for bed_seq in input_commstruct.bedToMA.keys():
            for alignment in input_commstruct.bedToMA[bed_seq]:
                pwm = PWM()
                pwm.initFromAlignment(alignment, self.desiredSpeciesList)
                if self.algorithm != BlockProcessor.ALGORITHM_NONE_VALUE:
                    self.analyzeConservedBlocks(pwm, alignment)
                else:
                    new_block = Motif(0, alignment.totalLength, "", pwm)
                    new_block.composeName(alignment.name)
                    alignment.addMotif(new_block, True)

        return input_commstruct
예제 #2
0
    def getMotifDefinitions(self, motif_name_list, database_file_path):

        motif_list = []
        for name in motif_name_list:
            motif_list.append(Motif(0, 0, name, None))

        MotifUtils.getMotifsPWMFromJasparTF(motif_list, database_file_path)

        return motif_list
예제 #3
0
    def getAlignmentMotifs( sub_node, bedseq, seqalign):
        
        for node_motif in sub_node:
            if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG:
                
                start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT)
                end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT)
                name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT)
                motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False)
                consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False)
                nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False)
                strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False)
                offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT)
                score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT)
                
                # Retrieve the PWM of the motif

                pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT)
                if pwm_s1 != None and len( pwm_s1) > 0:
                    pwm_matrix = {}
                    pwm_s2 = pwm_s1.split(";")
                    for line in pwm_s2:
                        pwm_s3 = line.split(":")
                        if len( pwm_s3) > 1:
                            pwm_s4 = pwm_s3[1].split()
                            try:
                                length = 0
                                for value in pwm_s4:
                                    length += 1
                                    if not pwm_matrix.has_key( pwm_s3[0]):
                                        pwm_matrix[ pwm_s3[0]]=[]
                                    pwm_matrix[ pwm_s3[0]].append( int( value))
                            except ValueError, val_exce:
                                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce))
                     
                    pwm = PWM()
                    pwm.matrix = pwm_matrix
                    pwm.totalLength = length
                    pwm.nbSequences = nb_species
                else:
                    pwm = None
                
                if start != None and end != None and name != None:
                    motif = Motif( start, end, name, pwm)
                    motif.offset = offset
                    motif.score = score
                    if consensus != None:
                        motif.consensus = consensus
                    if motif_id != None:
                        motif.id = motif_id
                    if strand != None:
                        motif.strand = strand
                    seqalign.addMotif( motif)
                else:
                    raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes")
                    
            else:
                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() +  "'")
    def getMotif(node_motif, comm_struct):

        name = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT)
        consensus = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False)

        if name != None:
            motif = Motif(0, 0, name, None)
            if consensus != None:
                motif.consensus = consensus
            comm_struct.addMotif(motif)
            statistics = MotifStatisticsCommStruct.getMotifStatistics(
                node_motif, motif)
            comm_struct.addMotifStatistics(motif, statistics)
        else:
            raise ParsingException(
                "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'"
            )
    def getMotifMatrices(self, motif_name_list, database_file_path,
                         database_format):

        motif_list = []
        for name in motif_name_list:
            motif_list.append(Motif(0, 0, name, None))

        if database_format == "transfac" or database_format == "tf":
            MotifUtils.getMotifsPWMFromJasparTF(motif_list, database_file_path)
        elif database_format == "meme":
            MotifUtils.getMotifsPWMFromMeme(motif_list, database_file_path)
        else:
            raise ExecutionException(
                "CompareIdentifiedMotifsProcessor.getMotifMatrices : Unknown database format : "
                + database_format)

        return motif_list
예제 #6
0
    def extendBlock(self, index_start, index_end, ratio, left_limit, pwm):

        # try to extend on the right
        # note: since the intervals are semi-open, the value at position index_end was not
        # taken into account in the previous ratio. That's why, extending on the right means
        # adding the value at position index_end
        finished = False
        while not finished and index_end <= pwm.totalLength - 1:
            if pwm.ratioMatrix[Constants.MAX_INDEX][index_end] >= 0:
                new_ratio = self.computeBlockRatio(index_start, index_end + 1,
                                                   pwm)
                if new_ratio < self.windowConservationLimit:
                    finished = True
                else:
                    index_end += 1
            else:
                finished = True

        # try to extend on the left but not more then left_limit
        finished = False
        while not finished and index_start >= 1 and index_start > left_limit:
            if pwm.ratioMatrix[Constants.MAX_INDEX][index_start - 1] >= 0:
                new_ratio = self.computeBlockRatio(index_start - 1, index_end,
                                                   pwm)
                if new_ratio < self.windowConservationLimit:
                    finished = True
                else:
                    index_start -= 1
            else:
                finished = True

        # build the resulting block

        block_pwm = pwm.getPWM(index_start, index_end)

        block = Motif(index_start, index_end, "", block_pwm)

        return block