Beispiel #1
0
    def parseClassfreqResults(self, file_path):

        result = {}

        total_number_colums = 9
        class_col = 1
        frequency_col = 3

        try:
            file = open(file_path, "r")
            for line in file:
                tokens = line.split()
                if len(tokens) == total_number_colums:
                    try:
                        result[int(tokens[class_col])] = int(
                            tokens[frequency_col])
                    except (TypeError, ValueError), exce:
                        raise ParsingException(
                            "HistogramProcessor.parseClassfreqResults : Unable to get int value from histogram file : '"
                            + file_path + "'. From:\n\t---> " + str(exce))
                else:
                    raise ParsingException(
                        "HistogramProcessor.parseClassfreqResults : The histogram file is not correct formatted. Number of column is abnormal : '"
                        + file_path)
            file.close()
    def getAlignmentSequences(sub_node, bedseq, seqalign):

        for node_sequence in sub_node:
            if node_sequence.tag.lower(
            ) == BedSeqAlignmentStatsCommStruct.SEQUENCE_TAG:
                species = CommStruct.getAttribute(
                    node_sequence,
                    BedSeqAlignmentStatsCommStruct.SEQUENCE_SPECIES_ATT)
                text = list(
                    CommStruct.getAttribute(
                        node_sequence,
                        BedSeqAlignmentStatsCommStruct.SEQUENCE_TEXT_ATT))

                if species != None and text != None:
                    seqalign.addSequence(species, text)
                else:
                    raise ParsingException(
                        "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : A sequence of the alignment of '"
                        + bedseq.toString() +
                        "' is missing required attributes")

            else:
                raise ParsingException(
                    "BedSeqAlignmentStatsCommStruct.getAlignmentSequences : The sequences of the alignment of '"
                    + bedseq.toString() +
                    "' contains an unauthorized element : '" +
                    node_sequence.tag.lower() + "'")
Beispiel #3
0
    def getAlignmentMotifs( sub_node, bedseq, seqalign):
        
        for node_motif in sub_node:
            if node_motif.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_TAG:
                
                start = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_START_ATT)
                end = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_END_ATT)
                name = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NAME_ATT)
                motif_id = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_ID_ATT, False)
                consensus = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_CONSENSUS_ATT, False)
                nb_species = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_NBSPECIES_ATT, False)
                strand = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_STRAND_ATT, False)
                offset = CommStruct.getAttributeAsint( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_OFFSET_ATT)
                score = CommStruct.getAttributeAsfloat( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_SCORE_ATT)
                
                # Retrieve the PWM of the motif

                pwm_s1 = CommStruct.getAttribute( node_motif, BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT)
                if pwm_s1 != None and len( pwm_s1) > 0:
                    pwm_matrix = {}
                    pwm_s2 = pwm_s1.split(";")
                    for line in pwm_s2:
                        pwm_s3 = line.split(":")
                        if len( pwm_s3) > 1:
                            pwm_s4 = pwm_s3[1].split()
                            try:
                                length = 0
                                for value in pwm_s4:
                                    length += 1
                                    if not pwm_matrix.has_key( pwm_s3[0]):
                                        pwm_matrix[ pwm_s3[0]]=[]
                                    pwm_matrix[ pwm_s3[0]].append( int( value))
                            except ValueError, val_exce:
                                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : Unable to get integer value for Motif '" + BedSeqAlignmentStatsCommStruct.MOTIF_PWM_ATT + "' attributes. From:\n\t---> " + str( val_exce))
                     
                    pwm = PWM()
                    pwm.matrix = pwm_matrix
                    pwm.totalLength = length
                    pwm.nbSequences = nb_species
                else:
                    pwm = None
                
                if start != None and end != None and name != None:
                    motif = Motif( start, end, name, pwm)
                    motif.offset = offset
                    motif.score = score
                    if consensus != None:
                        motif.consensus = consensus
                    if motif_id != None:
                        motif.id = motif_id
                    if strand != None:
                        motif.strand = strand
                    seqalign.addMotif( motif)
                else:
                    raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' is missing required attributes")
                    
            else:
                raise ParsingException( "BedSeqAlignmentStatsCommStruct.getAlignmentMotifs : The motifs of the alignment of '" + bedseq.toString() + "' contains an unauthorized element : '" + node_motif.tag.lower() +  "'")
Beispiel #4
0
    def executePipelines(self):

        result = True

        while len(self.serverQueue) > 0:

            params = self.serverQueue[0]
            pipelines_filepath = params[0]
            pipeline_options = params[1]
            try:
                verbosity = int(params[2])
            except ValueError:
                verbosity = 1
            resume = (params[3].lower() == "true")
            working_dir = params[4]

            # Modifies the config if required and initialize logs and output directory
            if working_dir != None and len(working_dir) > 0:
                self.config[PFConstants.BASE_OUTPUT_DIR_PARAM] = working_dir

            # Verify the base output dir and the output dir are created and create them if not
            FileUtils.createDirectory(
                self.config[PFConstants.BASE_OUTPUT_DIR_PARAM], 0777)
            self.config[PFConstants.OUTPUT_DIR_PARAM] = os.path.join(
                self.getParameter(PFConstants.BASE_OUTPUT_DIR_PARAM),
                PFConstants.OUTPUT_DIR_NAME)
            FileUtils.createDirectory(
                self.config[PFConstants.OUTPUT_DIR_PARAM], 0777)

            # Switch log location
            Log.switchFiles(self.getParameter(PFConstants.OUTPUT_DIR_PARAM),
                            verbosity)

            # Parse the XML file to retrieve the pipelines definition
            Log.trace(
                "#################################################################################"
            )
            Log.trace(
                "# PipelineManager.executePipelines : Reading pipelines from : "
                + pipelines_filepath)
            Log.trace(
                "#################################################################################"
            )

            try:
                pipelines = PipelineXMLParser.getPipelines(pipelines_filepath)
                OptionManager.applyOptions(pipelines, pipeline_options)
                PipelineXMLParser.toXMLFile(
                    self.config[PFConstants.OUTPUT_DIR_PARAM], pipelines)
            except SyntaxError, syn_exce:
                raise ParsingException(
                    "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '"
                    + pipelines_filepath + "'. From:\n\t---> " + str(syn_exce))
            except ParsingException, par_exce:
                raise ParsingException(
                    "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '"
                    + pipelines_filepath + "'. From:\n\t---> " + str(par_exce))
Beispiel #5
0
 def getParam( node_param, component):
     
     param_name = PipelineXMLParser.getAttribute( node_param, PipelineXMLParser.PARAM_NAME_ATT,)
     param_value = PipelineXMLParser.getAttribute( node_param, PipelineXMLParser.PARAM_VALUE_ATT)
     if param_name != None and len( param_name) > 0:
        if param_value != None and len( param_value) > 0:
            component.addParameters( param_name, param_value)
        else:
            raise ParsingException( "PipelineXMLParser.getParam : Malformed parameter - unable to retrieve parameter value in component '" +  component.processorName + "'")
     else:
         raise ParsingException( "PipelineXMLParser.getParam : Malformed parameter - unable to retrieve parameter name in component '" +  component.processorName + "'")
Beispiel #6
0
    def execute(self, input_comm_structs):

        # Retrieve the processor parameters
        bed_filepath = self.getParameter(BEDProcessor.INPUT_BED_FILE_PARAM)
        species = self.getParameter(BEDProcessor.REFERENCE_SPECIES_PARAM)
        peak_filepath = self.getParameter(BEDProcessor.INPUT_PEAK_FILE, False)
        peak_number = self.getParameterAsint(BEDProcessor.PEAK_NUMBER, False)
        extension_5p = self.getParameterAsint(BEDProcessor.EXTENSION_5P, False)
        if extension_5p == None:
            extension_5p = 0
        extension_3p = self.getParameterAsint(BEDProcessor.EXTENSION_3P, False)
        if extension_3p == None:
            extension_3p = 0

        # Parse the BED file and get the BED sequences ordered by species and chromosom
        bedseq_dictionnary = BEDParser.getBEDSequenceDictionnary(
            species, bed_filepath, extension_5p, extension_3p)

        # Extract the desired number of peak if a limit has been defined
        if peak_number != None:
            bedseq_dictionnary = self.extractPeaks(bedseq_dictionnary,
                                                   peak_number)

        # Parse the peak info file if exists
        if peak_filepath != None and len(peak_filepath) > 0:
            chrom_col = 0
            max_peak_col = 10
            id_col = 13

            try:
                input_file = open(peak_filepath)
                for line in input_file:
                    tokens = line.split()
                    if len(tokens) > id_col:
                        chrom = tokens[chrom_col]
                        max_peak = self.getTokenAsint(tokens[max_peak_col])
                        id = tokens[id_col]
                        if chrom != None and max_peak != None and id != None:
                            for bed_seq in bedseq_dictionnary[species + "." +
                                                              chrom]:
                                if bed_seq.id == id:
                                    bed_seq.referenceIndex = max_peak
            except ParsingException, par_exce:
                raise ParsingException(
                    "BEDProcessor.execute : An error occured while parsing peak information file : '"
                    + peak_filepath + "'. From:\n\t---> " + str(par_exce))
            except IOError, io_exce:
                raise ParsingException(
                    "BEDProcessor.execute : Unable to open peak information file : '"
                    + peak_filepath + "'. From:\n\t---> " + str(io_exce))
Beispiel #7
0
    def parseFile(self, file_name, is_chrom_file):

        try:
            input_file = open(file_name, 'r')

            # Verify if the token '##maf' indicating a MAF file is found in the first lines
            is_maf_file = False
            while 1:
                line = input_file.readline()
                if len(line) == 0:
                    break
                elif not line.isspace():
                    tokens = line.split()
                    if tokens != None and len(
                            tokens) > 0 and tokens[0] == "##maf":
                        is_maf_file = True
                        break

            # if it is a maf file, verify if an index file exists
            if is_maf_file == True:
                indexed = False
                try:
                    index_path = file_name + "index"
                    input_index_file = open(index_path, "r")
                    indexed = True

                except IOError:
                    pass

                if indexed == True:
                    Log.trace("MAFProcessor.parseFile : parsing file '" +
                              file_name + "' using index '" + index_path + "'")
                    self.parseBlockListWithIndex(input_index_file, input_file)
                    self.closeFile(input_index_file)
                else:
                    Log.trace("MAFProcessor.parseFile : parsing file '" +
                              file_name + "'")
                    self.parseBlockListWithoutIndex(input_file, is_chrom_file)

                self.closeFile(input_file)
                return

            else:
                self.closeFile(input_file)
                raise ParsingException("MAFProcessor.parseFile : The file '" +
                                       file_name + "' is not a MAF file")
        except IOError, io_exec:
            raise ParsingException(
                "MAFProcessor.parseFile : Enable to open file '" + file_name +
                "'. From:\n\t---> " + str(io_exec))
Beispiel #8
0
 def getComponent( node_component, prefix):
     
     processor_name = PipelineXMLParser.getAttribute( node_component, PipelineXMLParser.COMPONENT_PROCESSOR_ATT)
     if processor_name != None and len( processor_name) > 0:
         PipelineXMLParser.RANK += 1
         component = Component( processor_name, str(PipelineXMLParser.RANK), prefix)
         if component != None:
             for node in node_component:
                 if node.tag.lower() == PipelineXMLParser.PARAM_TAG:
                     PipelineXMLParser.getParam( node, component)
             return component
         else:
             raise ParsingException( "PipelineXMLParser.getComponent : Unable to create Component '" + processor_name)            
     else:
         raise ParsingException( "PipelineXMLParser.getComponent : Malformed component - unable to retrieve processor name")            
Beispiel #9
0
    def getBEDSequence(node_bedseq, comm_struct):

        species = CommStruct.getAttribute(node_bedseq,
                                          BedSeqCommStruct.BEDSEQ_SPECIES_ATT)
        chrom = CommStruct.getAttribute(node_bedseq,
                                        BedSeqCommStruct.BEDSEQ_CHROM_ATT)
        start = CommStruct.getAttributeAsint(node_bedseq,
                                             BedSeqCommStruct.BEDSEQ_START_ATT)
        end = CommStruct.getAttributeAsint(node_bedseq,
                                           BedSeqCommStruct.BEDSEQ_END_ATT)
        score = CommStruct.getAttributeAsint(node_bedseq,
                                             BedSeqCommStruct.BEDSEQ_SCORE_ATT,
                                             False)
        max = CommStruct.getAttributeAsint(
            node_bedseq, BedSeqCommStruct.BEDSEQ_MAX_PEAK_ATT, False)
        id = CommStruct.getAttribute(node_bedseq,
                                     BedSeqCommStruct.BEDSEQ_ID_ATT, False)

        if species != None and chrom != None and start != None and end != None:
            bed_sequence = BEDSequence(species, chrom, start, end)
            if score != None:
                bed_sequence.score = score
            if max != None:
                bed_sequence.referenceIndex = max
            if id != None:
                bed_sequence.id = id

            comm_struct.addBEDSequence(bed_sequence)
            return bed_sequence
        else:
            raise ParsingException(
                "BedSeqCommStruct.getBEDSequence : Malformed BED Sequence - unable to retrieve sequence information"
            )
Beispiel #10
0
 def getStatistics( statistics_node, comm_struct):
     
     for son_node in statistics_node:
         if son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TAG:
             name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_NAME_ATT)
             if name != None and len( name) > 0:
                 motif_id = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_ID_ATT)
                 family = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_FAMILY_ATT)
                 classe = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_CLASS_ATT)
                 motif_type = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_TYPE_ATT)
                 size = CommStruct.getAttributeAsint( son_node, BedSeqAlignmentStatsCommStruct.MOTIF_STATS_SIZE_ATT)
                 
                 motif_stats = MotifStatistics( name)
                 motif_stats.motifID = motif_id
                 motif_stats.motifFamily = family
                 motif_stats.motifClass = classe
                 motif_stats.motifType = motif_type
                 motif_stats.motifSize = size
                 
                 for param_node in son_node:
                     if param_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG:
                         att_name = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT)
                         att_value = CommStruct.getAttribute( param_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT)
                         motif_stats.setAttribute( att_name, att_value)
                 
                 comm_struct.motifStatistics[ name] = motif_stats
                 
         elif son_node.tag.lower() == BedSeqAlignmentStatsCommStruct.PARAM_TAG:
             att_name = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_NAME_ATT)
             att_value = CommStruct.getAttribute( son_node, BedSeqAlignmentStatsCommStruct.PARAM_VALUE_ATT)
             comm_struct.paramStatistics[ att_name] = att_value
         else:
             raise ParsingException( "BedSeqAlignmentStatsCommStruct.getStatistics : The statistics contains an unauthorized element : '" + son_node.tag.lower() +  "'")                    
Beispiel #11
0
    def getIntValue(self, token):

        try:
            return int(token)
        except ValueError, val_exce:
            raise ParsingException(
                "PipelineListener.getIntValue : Unable to get integer value of '"
                + token + "'. From:\n\t---> " + str(val_exce))
Beispiel #12
0
    def getIntValue(self, token):

        try:
            return int(token)
        except ValueError, val_exce:
            raise ParsingException(
                "MAFProcessor : Unable to get integer value of '" + token +
                "'. From:\n\t---> " + str(val_exce))
Beispiel #13
0
class BEDParser:

    _chrom_col = 0
    _startindex_col = 1
    _endindex_col = 2
    _id_col = 3
    _strand_col = 5

    # --------------------------------------------------------------------------------------
    # Parse the given BED file and return a dictionnary of the BED Sequences
    # grouped by sequence keys ('species'.'chromosom')
    @staticmethod
    def getBEDSequenceDictionnary(species, bed_filepath, extension_5p,
                                  extension_3p):

        sequence_dic = {}

        try:
            input_file = open(bed_filepath)
            for line in input_file:
                tokens = line.split()
                if len(tokens) > BEDParser._endindex_col:
                    chrom = tokens[BEDParser._chrom_col].lower()
                    #if chrom[ 0:3] == "chr":
                    if chrom[0:1] != "#":
                        if len(chrom) < 4:
                            chrom = "chr" + chrom
                        start = BEDParser.getTokenAsint(
                            tokens[BEDParser._startindex_col])
                        end = BEDParser.getTokenAsint(
                            tokens[BEDParser._endindex_col])
                        if start < end:
                            start = start - extension_5p
                            if start < 0:
                                start = 0
                            end = end + extension_3p
                            bedsequence = BEDSequence(species, chrom, start,
                                                      end)
                            if len(tokens) > BEDParser._id_col:
                                bedsequence.id = tokens[BEDParser._id_col]
                            bedsequence_key = bedsequence.getKey()
                            if not sequence_dic.has_key(bedsequence_key):
                                sequence_dic[bedsequence_key] = []
                            sequence_dic[bedsequence_key].append(bedsequence)
                        else:
                            Log.log(
                                "BEDParser.getBEDSequenceDictionnary : A sequence has inversed start and end coordinates : "
                                + line)
                else:
                    Log.log("No 'chr' in line :" + line)
        except ParsingException, par_exce:
            raise ParsingException(
                "BEDParser.getBEDSequenceDictionnary : Some attributes are mor numbers. From:\n\t-->  "
                + str(par_exce))
        except IOError, io_exce:
            raise ParsingException(
                "BEDParser.getBEDSequenceDictionnary : Unable to open the file '"
                + bed_filepath + "'. From:\n\t-->  " + str(io_exce))
Beispiel #14
0
 def getAttributeAsint( node, att_name, required = True):
     
     try:
         att_value = int( float( CommStruct.getAttribute( node, att_name, required)))
         return att_value
     except (TypeError, ValueError), val_exce:
         if required:
             raise ParsingException( "CommStruct.getAttributeAsint : Unable to convert the value of attribute :'" + att_name + "'. From:\n\t---> " + str( val_exce))
         else:
             return None
Beispiel #15
0
 def getAttribute( node, att_name, required = True):
     
     try:
         att_value =  node.get( att_name)
         return att_value
     except Exception, exce:
         if required:
             raise ParsingException( "CommStruct.getAttribute : Node '" + node.tag + "' does not know the attribute :'" + att_name + "'. From:\n\t---> " + str( exce))
         else:
             return None
Beispiel #16
0
    def getPipeline( node_pipeline):

        name = PipelineXMLParser.getAttribute( node_pipeline, PipelineXMLParser.PIPELINE_NAME_ATT)
        if name != None and len( name) > 0:
            pipeline = Pipeline()
            pipeline.name = name
            previous_components = []
            PipelineXMLParser.analyseNode( node_pipeline, previous_components, pipeline, "")
            return pipeline
        else:
            raise ParsingException ( "PipelineXMLParser.getPipeline : Malformed pipeline - unable to retrieve pipeline name")
    def getMotifStatistics(node_motif, motif):

        statistics = MotifStatistics()

        for node_param in node_motif:
            if node_param.tag.lower() == MotifStatisticsCommStruct.PARAM_TAG:
                param_name = MotifStatisticsCommStruct.getAttribute(
                    node_param, MotifStatisticsCommStruct.PARAM_NAME_ATT,
                    False)
                param_value = MotifStatisticsCommStruct.getAttribute(
                    node_param, MotifStatisticsCommStruct.PARAM_VALUE_ATT,
                    False)
                if param_name != None and len(param_name) > 0:
                    if param_value != None and len(param_value) > 0:
                        if param_name == MotifStatisticsCommStruct.CHI2_PARAM_NAME:
                            statistics.chi2 = MotifStatisticsCommStruct.getTokenAsfloat(
                                param_value, False)
                        elif param_name == MotifStatisticsCommStruct.HISTOGRAM_GRAPH_PATH_PARAM_NAME:
                            statistics.histogramGraphPath = param_value
                        elif param_name == MotifStatisticsCommStruct.HISTOGRAM_PARAM_NAME:
                            statistics.histogram = param_value.split(
                                MotifStatisticsCommStruct.
                                HISTOGRAM_ENTRY_SEPARATOR_CHAR)
                        elif param_name == MotifStatisticsCommStruct.NULL_HISTOGRAM_PARAM_NAME:
                            statistics.nullHistogram = param_value.split(
                                MotifStatisticsCommStruct.
                                HISTOGRAM_ENTRY_SEPARATOR_CHAR)
                        else:
                            Log.log(
                                "MotifStatisticsCommStruct.getMotifAttributes : Unknown attribute name : "
                                + param_name)
                    else:
                        raise ParsingException(
                            "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter value in motif '"
                            + motif.name + "'")
                else:
                    raise ParsingException(
                        "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter name in motif '"
                        + motif.name + "'")

        return statistics
Beispiel #18
0
    def getTokenAsfloat(self, token, required=True):

        try:
            att_value = float(token)
            return att_value
        except (TypeError, ValueError), val_exce:
            if required:
                raise ParsingException(
                    "Processor.getTokenAsfloat : Unable to convert the token to float :'"
                    + token + "'. From:\n\t---> " + str(val_exce))
            else:
                return None
Beispiel #19
0
 def getCommStructFromXML( commstruct_filepath):
     
     commstruct_file = None
     root_element = None
     
     try:
         commstruct_file = open( commstruct_filepath, "r")
         tree = parse( commstruct_file)
         root_element = tree.getroot()
         commstruct_file.close()
     except IOError, io_exce:
         raise ParsingException( "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : Unable to open/close XML commstruct_file '" + commstruct_filepath, "' : " + str( io_exce))
Beispiel #20
0
    def getTokenAsint(token, required=True):

        try:
            att_value = int(token)
            return att_value
        except (TypeError, ValueError), val_exce:
            if required:
                raise ParsingException(
                    "BEDParser.getTokenAsint : Unable to convert the token to int :'"
                    + token + "'. From:\n\t---> " + str(val_exce))
            else:
                return None
Beispiel #21
0
 def getBEDSequence( node_bedseq, comm_struct):
     
     try:
         species = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SPECIES_ATT)
         chrom = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_CHROM_ATT)
         start = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_START_ATT)
         end = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_END_ATT)
         score = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_SCORE_ATT, False)
         peak_max = CommStruct.getAttributeAsint( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_MAX_PEAK_ATT, False)
         peak_id = CommStruct.getAttribute( node_bedseq, BedSeqAlignmentStatsCommStruct.BEDSEQ_ID_ATT, False)
     except ParsingException,  par_exce:
         raise ParsingException ( "BedSeqAlignmentStatsCommStruct.getBEDSequence : Malformed BED Sequence - some attributes are not numbers. From:\n\t---> " + str( par_exce))
Beispiel #22
0
    def getStatistics(statistics_node, comm_struct):

        for son_node in statistics_node:
            if son_node.tag.lower() == BedSeqCommStruct.PARAM_TAG:
                att_name = CommStruct.getAttribute(
                    son_node, BedSeqCommStruct.PARAM_NAME_ATT)
                att_value = CommStruct.getAttribute(
                    son_node, BedSeqCommStruct.PARAM_VALUE_ATT)
                comm_struct.paramStatistics[att_name] = att_value
            else:
                raise ParsingException(
                    "BedSeqCommStruct.getStatistics : The statistics contains an unauthorized element : '"
                    + son_node.tag.lower() + "'")
    def getCommStructFromXML(commstruct_filepath):

        file = None
        root_element = None

        try:
            file = open(commstruct_filepath, "r")
            tree = parse(file)
            root_element = tree.getroot()
            file.close()
        except IOError, io_exce:
            raise ParsingException(
                "MotifStatisticsCommStruct.getCommStructFromXML : Unable to open/close XML file '"
                + commstruct_filepath, "' : " + str(io_exce))
Beispiel #24
0
 def getPipelines( pipelines_filepath):
     
     PipelineXMLParser.RANK = 0
     
     file = None
     root_element = None
     
     try:
         file = FileUtils.openFile( pipelines_filepath)
         tree = parse( file)
         root_element = tree.getroot()
         file.close()
     except IOError,  io_exce:
         raise ParsingException( "PipelineXMLParser.getPipelines : unable to open/close XML file '" + pipelines_filepath + "'. From:\n\t---> " + str( io_exce))
Beispiel #25
0
    def getMotifsDetailsFromJaspar():

        matrix_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX.txt")
        matrix_annotation_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX_ANNOTATION.txt")

        names = {}
        id = {}
        family = {}
        type = {}
        classe = {}
        
        try:
            matrix_file = open( matrix_path, "r")
            matrix_annotation_file = open( matrix_annotation_path,  "r")
            
            for line in matrix_file:
                tokens = line.split()
                if len( tokens) >= 5:
                    current_num = tokens[ 0]
                    current_name = tokens[ 2] + "." + tokens[3]
                    current_id = "".join( tokens[ 4:])
                    names[ current_num] = current_name
                    id[ current_name] = current_id
                else:
                    raise ParsingException( "MotifUtils.getMotifsDetailsFromJaspar : Matrix file is not correctly formatted: 5 columns required while " + str( len( tokens)) + " columns are found")
            
            for line in matrix_annotation_file:
                tokens = line.split()
                current_num = tokens[ 0]
                if current_num in names.keys():
                    current_key = tokens[ 1]
                    current_value = "".join( tokens[2:])
                    if current_key == "family":
                        family[ names[ current_num]] = current_value
                    elif current_key == "class":
                        classe[ names[ current_num]] = current_value
                    elif  current_key == "type":
                        type[ names[ current_num]] = current_value
                else:
                    Log.log( "MotifUtils.getMotifsDetailsFromJaspar : Motif number was not detected in matrix file : " + current_num)
            matrix_annotation_file.close()
            matrix_file.close()
        except (IOError, ParsingException),  exce:
            Log.log( "MotifUtils.getMotifsDetailsFromJaspar : unable to read motifs definition. From:\n\t---> " + str( exce))
    def getMotif(node_motif, comm_struct):

        name = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_NAME_ATT)
        consensus = CommStruct.getAttribute(
            node_motif, MotifStatisticsCommStruct.MOTIF_CONSENSUS_ATT, False)

        if name != None:
            motif = Motif(0, 0, name, None)
            if consensus != None:
                motif.consensus = consensus
            comm_struct.addMotif(motif)
            statistics = MotifStatisticsCommStruct.getMotifStatistics(
                node_motif, motif)
            comm_struct.addMotifStatistics(motif, statistics)
        else:
            raise ParsingException(
                "MotifStatisticsCommStruct.getAlignmentMotifs : The motif is missing required attribute 'name'"
            )
Beispiel #27
0
    def getBEDSequenceDictionnary(species, bed_filepath, extension_5p,
                                  extension_3p):

        sequence_dic = {}

        try:
            input_file = open(bed_filepath)
            for line in input_file:
                tokens = line.split()
                if len(tokens) > BEDParser._endindex_col:
                    chrom = tokens[BEDParser._chrom_col].lower()
                    #if chrom[ 0:3] == "chr":
                    if chrom[0:1] != "#":
                        if len(chrom) < 4:
                            chrom = "chr" + chrom
                        start = BEDParser.getTokenAsint(
                            tokens[BEDParser._startindex_col])
                        end = BEDParser.getTokenAsint(
                            tokens[BEDParser._endindex_col])
                        if start < end:
                            start = start - extension_5p
                            if start < 0:
                                start = 0
                            end = end + extension_3p
                            bedsequence = BEDSequence(species, chrom, start,
                                                      end)
                            if len(tokens) > BEDParser._id_col:
                                bedsequence.id = tokens[BEDParser._id_col]
                            bedsequence_key = bedsequence.getKey()
                            if not sequence_dic.has_key(bedsequence_key):
                                sequence_dic[bedsequence_key] = []
                            sequence_dic[bedsequence_key].append(bedsequence)
                        else:
                            Log.log(
                                "BEDParser.getBEDSequenceDictionnary : A sequence has inversed start and end coordinates : "
                                + line)
                else:
                    Log.log("No 'chr' in line :" + line)
        except ParsingException, par_exce:
            raise ParsingException(
                "BEDParser.getBEDSequenceDictionnary : Some attributes are mor numbers. From:\n\t-->  "
                + str(par_exce))
Beispiel #28
0
    def getConfigFromFile(self):

        config = {}

        try:
            output_path = self.getConfigFilePath()
            config_file = FileUtils.openFile(output_path)
            for line in config_file:
                tokens = line.split("=")
                if tokens != None and len(tokens) == 2:
                    if tokens[1][-1] == "\n":
                        value = tokens[1][:-1]
                    else:
                        value = tokens[1]
                    config[tokens[0]] = value
                else:
                    raise ParsingException(
                        "Component.getConfigFromFile : Wrongly formatted config file. Should have '<param_name> = <param_value>' instead of "
                        + line)
            config_file.close()
        except IOError:
            return None

        return config
Beispiel #29
0
 def toXMLFile( self, output_filepath):
     
     raise ParsingException( "The method toXMLFile must be implemented at the inherited class level")
         ) == BedSeqAlignmentStatsCommStruct.ALIGNMENT_TAG:
             seqalign = BedSeqAlignmentStatsCommStruct.getAlignment(
                 align_node, bedseq, comm_struct)
             for sub_node in align_node:
                 if sub_node.tag.lower(
                 ) == BedSeqAlignmentStatsCommStruct.SEQUENCES_TAG:
                     BedSeqAlignmentStatsCommStruct.getAlignmentSequences(
                         sub_node, bedseq, seqalign)
                 elif sub_node.tag.lower(
                 ) == BedSeqAlignmentStatsCommStruct.MOTIFS_TAG:
                     BedSeqAlignmentStatsCommStruct.getAlignmentMotifs(
                         sub_node, bedseq, seqalign)
                 else:
                     raise ParsingException(
                         "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The alignment of '"
                         + bedseq.toString() +
                         "' contains an unauthorized element : '" +
                         sub_node.tag.lower() + "'")
         else:
             raise ParsingException(
                 "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The BED Sequence '"
                 + bedseq.toString() +
                 "' contains an unauthorized element : '" +
                 align_node.tag.lower() + "'")
 elif root_son.tag.lower(
 ) == BedSeqAlignmentStatsCommStruct.STATISTICS_TAG:
     BedSeqAlignmentStatsCommStruct.getStatistics(
         root_son, comm_struct)
 else:
     raise ParsingException(
         "BedSeqAlignmentStatsCommStruct.getCommStructFromXML : The data contains an unauthorized element : '"