def getStatistics( statistics_node, comm_struct): for son_node in statistics_node: if son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TAG: name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_NAME_ATT) if name != None and len( name) > 0: motif_id = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_ID_ATT) family = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_FAMILY_ATT) classe = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_CLASS_ATT) motif_type = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TYPE_ATT) size = CommStruct.getAttributeAsint( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_SIZE_ATT) motif_stats = MotifStatistics( name) motif_stats.motifID = motif_id motif_stats.motifFamily = family motif_stats.motifClass = classe motif_stats.motifType = motif_type motif_stats.motifSize = size for param_node in son_node: if param_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT) motif_stats.setAttribute( att_name, att_value) comm_struct.motifStatistics[ name] = motif_stats elif son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT) comm_struct.paramStatistics[ att_name] = att_value else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() + "'")
def getAlignmentSequences(sub_node, bedseq, seqalign): for node_sequence in sub_node: if node_sequence.tag.lower( ) == BedSeqAlignmentStatsCommStruct.SEQUENCE_TAG: species = CommStruct.getAttribute( node_sequence, BedSeqAlignmentStatsCommStruct.SEQUENCE_SPECIES_ATT) text = list( CommStruct.getAttribute( node_sequence, BedSeqAlignmentStatsCommStruct.SEQUENCE_TEXT_ATT)) if species != None and text != None: seqalign.addSequence(species, text) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : A sequence of the alignment of '" + bedseq.toString() + "' is missing required attributes") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : The sequences of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_sequence.tag.lower() + "'")
def __init__(self): CommStruct.__init__(self) self.baseSpecies = "" # dicitonnary of the BED Sequences : key = <species>.<chromosom> / value = list of BEDSequence self.bedSequencesDict = {} # dictionnary of statistics parameters: key = parameter name / Value = parameter value self.paramStatistics = {}
def __init__(self): CommStruct.__init__(self) self.baseSpecies = "" # dicitonnary of the BED Sequences : key = chromosom / value = list of BED Sequence self.bedSequencesDict = {} # dicitonnary of the Multiple Alignments : key = BED Sequence / value = list of multiple alignments self.bedToMA = {}
def getAlignmentMotifs( sub_node, bedseq, seqalign): for node_motif in sub_node: if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG: start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT) end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT) name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT) motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False) consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False) nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False) strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False) offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT) score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT) # Retrieve the PWM of the motif pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT) if pwm_s1 != None and len( pwm_s1) > 0: pwm_matrix = {} pwm_s2 = pwm_s1.split(";") for line in pwm_s2: pwm_s3 = line.split(":") if len( pwm_s3) > 1: pwm_s4 = pwm_s3[1].split() try: length = 0 for value in pwm_s4: length += 1 if not pwm_matrix.has_key( pwm_s3[0]): pwm_matrix[ pwm_s3[0]]=[] pwm_matrix[ pwm_s3[0]].append( int( value)) except ValueError, val_exce: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce)) pwm = PWM() pwm.matrix = pwm_matrix pwm.totalLength = length pwm.nbSequences = nb_species else: pwm = None if start != None and end != None and name != None: motif = Motif( start, end, name, pwm) motif.offset = offset motif.score = score if consensus != None: motif.consensus = consensus if motif_id != None: motif.id = motif_id if strand != None: motif.strand = strand seqalign.addMotif( motif) else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes") else: raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() + "'")
def __init__(self): CommStruct.__init__(self) self.baseSpecies = "" # dicitonnary of the BED Sequences : key = chromosom / value = list of BED Sequence self.bedSequencesDict = {} # dictionnary of the Multiple Alignments : key = BED Sequence / value = list of multiple alignments self.bedToMA = {} # dictionnary of statistics parameters: key = parameter name / Value = parameter value self.paramStatistics = {} # dictionnary of identified motifs statistics : key = motif name / Value = MotifStatistics instance self.motifStatistics = {}
def getStatistics(statistics_node, comm_struct): for son_node in statistics_node: if son_node.tag.lower() == BedSeqCommStruct.PARAM_TAG: att_name = CommStruct.getAttribute( son_node, BedSeqCommStruct.PARAM_NAME_ATT) att_value = CommStruct.getAttribute( son_node, BedSeqCommStruct.PARAM_VALUE_ATT) comm_struct.paramStatistics[att_name] = att_value else: raise ParsingException( "BedSeqCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() + "'")
def getBEDSequence(node_bedseq, comm_struct): species = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_SPECIES_ATT) chrom = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_CHROM_ATT) start = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_START_ATT) end = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_END_ATT) score = CommStruct.getAttributeAsint(node_bedseq, BedSeqCommStruct.BEDSEQ_SCORE_ATT, False) max = CommStruct.getAttributeAsint( node_bedseq, BedSeqCommStruct.BEDSEQ_MAX_PEAK_ATT, False) id = CommStruct.getAttribute(node_bedseq, BedSeqCommStruct.BEDSEQ_ID_ATT, False) if species != None and chrom != None and start != None and end != None: bed_sequence = BEDSequence(species, chrom, start, end) if score != None: bed_sequence.score = score if max != None: bed_sequence.referenceIndex = max if id != None: bed_sequence.id = id comm_struct.addBEDSequence(bed_sequence) return bed_sequence else: raise ParsingException( "BedSeqCommStruct.getBEDSequence : Malformed BED Sequence - unable to retrieve sequence information" )
def getAlignment( align_node, bedseq, comm_struct): name = CommStruct.getAttribute( align_node, BedSeqAlignmentStatsCommStruct.ALIGNMENT_NAME_ATT) seq_align = SequenceAlignment() seq_align.name = name seq_align.referenceSpecies = bedseq.species comm_struct.addSequenceAlignment( bedseq, seq_align) return seq_align
def getMotif(node_motif, comm_struct): name = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT) consensus = CommStruct.getAttribute( node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False) if name != None: motif = Motif(0, 0, name, None) if consensus != None: motif.consensus = consensus comm_struct.addMotif(motif) statistics = MotifStatisticsCommStruct.getMotifStatistics( node_motif, motif) comm_struct.addMotifStatistics(motif, statistics) else: raise ParsingException( "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'" )
def getBEDSequence( node_bedseq, comm_struct): try: species = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SPECIES_ATT) chrom = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_CHROM_ATT) start = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_START_ATT) end = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_END_ATT) score = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SCORE_ATT, False) peak_max = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_MAX_PEAK_ATT, False) peak_id = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_ID_ATT, False) except ParsingException, par_exce: raise ParsingException ( "BedSeqAlignmentStatsCommStruct.getBEDSequence : Malformed BED Sequence - some attributes are not numbers. From:\n\t---> " + str( par_exce))
def __init__(self): CommStruct.__init__(self) self.motifList = [] self.motifToStatistics = {}