Beispiel #1
0
 def getStatistics( statistics_node, comm_struct):
     
     for son_node in statistics_node:
         if son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TAG:
             name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_NAME_ATT)
             if name != None and len( name) > 0:
                 motif_id = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_ID_ATT)
                 family = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_FAMILY_ATT)
                 classe = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_CLASS_ATT)
                 motif_type = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TYPE_ATT)
                 size = CommStruct.getAttributeAsint( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_SIZE_ATT)
                 
                 motif_stats = MotifStatistics( name)
                 motif_stats.motifID = motif_id
                 motif_stats.motifFamily = family
                 motif_stats.motifClass = classe
                 motif_stats.motifType = motif_type
                 motif_stats.motifSize = size
                 
                 for param_node in son_node:
                     if param_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG:
                         att_name = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT)
                         att_value = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT)
                         motif_stats.setAttribute( att_name, att_value)
                 
                 comm_struct.motifStatistics[ name] = motif_stats
                 
         elif son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG:
             att_name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT)
             att_value = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT)
             comm_struct.paramStatistics[ att_name] = att_value
         else:
             raise ParsingException( "BedSeqAlignmentStatsCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() +  "'")                    
    def getAlignmentSequences(sub_node, bedseq, seqalign):

        for node_sequence in sub_node:
            if node_sequence.tag.lower(
            ) == BedSeqAlignmentStatsCommStruct.SEQUENCE_TAG:
                species = CommStruct.getAttribute(
                    node_sequence,
                    BedSeqAlignmentStatsCommStruct.SEQUENCE_SPECIES_ATT)
                text = list(
                    CommStruct.getAttribute(
                        node_sequence,
                        BedSeqAlignmentStatsCommStruct.SEQUENCE_TEXT_ATT))

                if species != None and text != None:
                    seqalign.addSequence(species, text)
                else:
                    raise ParsingException(
                        "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : A sequence of the alignment of '"
                        + bedseq.toString() +
                        "' is missing required attributes")

            else:
                raise ParsingException(
                    "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : The sequences of the alignment of '"
                    + bedseq.toString() +
                    "' contains an unauthorized element : '" +
                    node_sequence.tag.lower() + "'")
Beispiel #3
0
    def __init__(self):

        CommStruct.__init__(self)
        self.baseSpecies = ""
        # dicitonnary of the BED Sequences : key = <species>.<chromosom> / value = list of BEDSequence
        self.bedSequencesDict = {}
        # dictionnary of statistics parameters: key = parameter name / Value = parameter value
        self.paramStatistics = {}
Beispiel #4
0
    def __init__(self):

        CommStruct.__init__(self)
        self.baseSpecies = ""
        # dicitonnary of the BED Sequences : key = chromosom / value = list of BED Sequence
        self.bedSequencesDict = {}
        # dicitonnary of the Multiple Alignments : key = BED Sequence / value = list of multiple alignments
        self.bedToMA = {}
Beispiel #5
0
    def getAlignmentMotifs( sub_node, bedseq, seqalign):
        
        for node_motif in sub_node:
            if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG:
                
                start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT)
                end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT)
                name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT)
                motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False)
                consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False)
                nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False)
                strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False)
                offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT)
                score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT)
                
                # Retrieve the PWM of the motif

                pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT)
                if pwm_s1 != None and len( pwm_s1) > 0:
                    pwm_matrix = {}
                    pwm_s2 = pwm_s1.split(";")
                    for line in pwm_s2:
                        pwm_s3 = line.split(":")
                        if len( pwm_s3) > 1:
                            pwm_s4 = pwm_s3[1].split()
                            try:
                                length = 0
                                for value in pwm_s4:
                                    length += 1
                                    if not pwm_matrix.has_key( pwm_s3[0]):
                                        pwm_matrix[ pwm_s3[0]]=[]
                                    pwm_matrix[ pwm_s3[0]].append( int( value))
                            except ValueError, val_exce:
                                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce))
                     
                    pwm = PWM()
                    pwm.matrix = pwm_matrix
                    pwm.totalLength = length
                    pwm.nbSequences = nb_species
                else:
                    pwm = None
                
                if start != None and end != None and name != None:
                    motif = Motif( start, end, name, pwm)
                    motif.offset = offset
                    motif.score = score
                    if consensus != None:
                        motif.consensus = consensus
                    if motif_id != None:
                        motif.id = motif_id
                    if strand != None:
                        motif.strand = strand
                    seqalign.addMotif( motif)
                else:
                    raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes")
                    
            else:
                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() +  "'")
    def __init__(self):

        CommStruct.__init__(self)
        self.baseSpecies = ""
        # dicitonnary of the BED Sequences : key = chromosom / value = list of BED Sequence
        self.bedSequencesDict = {}
        # dictionnary of the Multiple Alignments : key = BED Sequence / value = list of multiple alignments
        self.bedToMA = {}
        # dictionnary of statistics parameters: key = parameter name / Value = parameter value
        self.paramStatistics = {}
        # dictionnary of identified motifs statistics : key = motif name / Value = MotifStatistics instance
        self.motifStatistics = {}
Beispiel #7
0
    def getStatistics(statistics_node, comm_struct):

        for son_node in statistics_node:
            if son_node.tag.lower() == BedSeqCommStruct.PARAM_TAG:
                att_name = CommStruct.getAttribute(
                    son_node, BedSeqCommStruct.PARAM_NAME_ATT)
                att_value = CommStruct.getAttribute(
                    son_node, BedSeqCommStruct.PARAM_VALUE_ATT)
                comm_struct.paramStatistics[att_name] = att_value
            else:
                raise ParsingException(
                    "BedSeqCommStruct.getStatistics : The statistics contains an unauthorized element : '"
                    + son_node.tag.lower() + "'")
Beispiel #8
0
    def getBEDSequence(node_bedseq, comm_struct):

        species = CommStruct.getAttribute(node_bedseq,
                                          BedSeqCommStruct.BEDSEQ_SPECIES_ATT)
        chrom = CommStruct.getAttribute(node_bedseq,
                                        BedSeqCommStruct.BEDSEQ_CHROM_ATT)
        start = CommStruct.getAttributeAsint(node_bedseq,
                                             BedSeqCommStruct.BEDSEQ_START_ATT)
        end = CommStruct.getAttributeAsint(node_bedseq,
                                           BedSeqCommStruct.BEDSEQ_END_ATT)
        score = CommStruct.getAttributeAsint(node_bedseq,
                                             BedSeqCommStruct.BEDSEQ_SCORE_ATT,
                                             False)
        max = CommStruct.getAttributeAsint(
            node_bedseq, BedSeqCommStruct.BEDSEQ_MAX_PEAK_ATT, False)
        id = CommStruct.getAttribute(node_bedseq,
                                     BedSeqCommStruct.BEDSEQ_ID_ATT, False)

        if species != None and chrom != None and start != None and end != None:
            bed_sequence = BEDSequence(species, chrom, start, end)
            if score != None:
                bed_sequence.score = score
            if max != None:
                bed_sequence.referenceIndex = max
            if id != None:
                bed_sequence.id = id

            comm_struct.addBEDSequence(bed_sequence)
            return bed_sequence
        else:
            raise ParsingException(
                "BedSeqCommStruct.getBEDSequence : Malformed BED Sequence - unable to retrieve sequence information"
            )
Beispiel #9
0
 def getAlignment( align_node, bedseq, comm_struct):
     
     name = CommStruct.getAttribute( align_node, BedSeqAlignmentStatsCommStruct.ALIGNMENT_NAME_ATT)
     seq_align = SequenceAlignment()
     seq_align.name = name
     seq_align.referenceSpecies = bedseq.species
     comm_struct.addSequenceAlignment( bedseq, seq_align)
     
     return seq_align
    def getMotif(node_motif, comm_struct):

        name = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT)
        consensus = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False)

        if name != None:
            motif = Motif(0, 0, name, None)
            if consensus != None:
                motif.consensus = consensus
            comm_struct.addMotif(motif)
            statistics = MotifStatisticsCommStruct.getMotifStatistics(
                node_motif, motif)
            comm_struct.addMotifStatistics(motif, statistics)
        else:
            raise ParsingException(
                "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'"
            )
Beispiel #11
0
 def getBEDSequence( node_bedseq, comm_struct):
     
     try:
         species = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SPECIES_ATT)
         chrom = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_CHROM_ATT)
         start = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_START_ATT)
         end = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_END_ATT)
         score = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SCORE_ATT, False)
         peak_max = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_MAX_PEAK_ATT, False)
         peak_id = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_ID_ATT, False)
     except ParsingException,  par_exce:
         raise ParsingException ( "BedSeqAlignmentStatsCommStruct.getBEDSequence : Malformed BED Sequence - some attributes are not numbers. From:\n\t---> " + str( par_exce))
    def __init__(self):

        CommStruct.__init__(self)
        self.motifList = []
        self.motifToStatistics = {}