Exemple #1
0
 def fromXMLFile( input_filepath):
     
     try:
         return BedSeqAlignmentStatsCommStruct.getCommStructFromXML( input_filepath)
     except ParsingException, par_exce:
         Log.log( "BedSeqAlignmentStatsCommStruct.fromXMLFile : Unable to get CommStruct from XML file '" + input_filepath + "'. From:\n\t---> " + str( par_exce))
         return None
Exemple #2
0
    def toXMLFile( outpath, pipelines):
        
        pipelines_element = Element( PipelineXMLParser.PIPELINES_TAG)
        
        for pipeline in pipelines:
            pipeline_element = Element( PipelineXMLParser.PIPELINE_TAG)
            pipelines_element.append( pipeline_element)
            pipeline_element.attrib[ PipelineXMLParser.PIPELINE_NAME_ATT] = pipeline.name
            
            for component in pipeline.componentList:
                component_element = Element( PipelineXMLParser.COMPONENT_TAG)
                pipeline_element.append( component_element)
                component_element.attrib[ PipelineXMLParser.COMPONENT_PROCESSOR_ATT] = component.processorName
                for param_name, param_value in component.parameters.iteritems():
                    param_element = Element( PipelineXMLParser.PARAM_TAG)
                    component_element.append( param_element)
                    param_element.attrib[ PipelineXMLParser.PARAM_NAME_ATT] = str( param_name)
                    param_element.attrib[ PipelineXMLParser.PARAM_VALUE_ATT] = str( param_value)

        try:
            PipelineXMLParser.indent( pipelines_element, 0)
	    outfile = os.path.join( outpath, pipeline.name + ".xml")
            ElementTree( pipelines_element).write( outfile)
        except IOError, exce:
            Log.log( "PipelineXMLParser.toXMLFile : Unable to write Pipelines to XML file. From:\n\t---> " + str( exce))
Exemple #3
0
    def closeFile(self, file):

        try:
            file.close()
        except IOError, exce:
            Log.log("MAFProcessor.closeFile : Enable to close file '" + file +
                    "'. From:\n\t--> " + str(exce))
    def createLogos(self, input_commstruct):

        db_file_path = []
        for index in range(len(self.dbFiles)):
            db_file_path.append(os.path.join(self.dbPath, self.dbFiles[index]))

        motif_name_list = input_commstruct.motifStatistics.keys()
        motif_definition = MotifUtils.getMotifsDefinitionFromTF(
            motif_name_list, db_file_path)
        logos_path = os.path.join(self.outPath,
                                  FinalOutputProcessor.LOGOS_DIR_NAME)
        FileUtils.createDirectory(logos_path)

        for motif_name in motif_name_list:
            if motif_name in motif_definition.keys():
                file_name = motif_name + ".tf"
                def_file_path = os.path.join(logos_path, file_name)
                def_file = open(def_file_path, "w")
                for line in motif_definition[motif_name]:
                    def_file.write(line)
                    def_file.flush
                def_file.close()
                RSATUtils.createLogoFromTF(logos_path, file_name, motif_name)
            else:
                Log.log(
                    "FinalOutputProcessor.createLogos : No definition found to create logo for motif : "
                    + motif_name)
Exemple #5
0
    def verifyConfig(self):

        previous_config = self.getConfigFromFile()

        if previous_config == None:
            Log.trace(
                "Component.verifyConfig : No config file found for processor '"
                + self.processorName + "'. Executing it")
            return False

        same = False
        if len(previous_config) == len(self.parameters):
            if len(previous_config) == 0:
                same = True
            else:
                for param_name in previous_config.keys():
                    if param_name in self.parameters.keys():
                        if previous_config[param_name] == self.parameters[
                                param_name]:
                            same = True
                        else:
                            same = False
                            break
                    else:
                        same = False
                        break

        if not same:
            Log.trace("Component.verifyConfig : Configuration of processor '" +
                      self.processorName +
                      "' has changed since previous run: Previous = " +
                      str(previous_config) + " while current = " +
                      str(self.parameters) + ". Executing processor")

        return same
Exemple #6
0
 def toXMLFile( self, output_filepath):
     
     try:
         root_element = self.convertCommStructToElementTree()
         self.indent( root_element,  0)
         ElementTree( root_element).write( output_filepath)
     except IOError, exce:
         Log.log( "BedSeqAlignmentStatsCommStruct.toXMLFile : Unable to write CommStruct to XML file. From:\n\t---> " + str( exce))
Exemple #7
0
    def finalizeSequences(self, keep_gaps=False):

        # Compute the length of the longest sequence in the MSA
        for species in self.sequences.keys():
            sequence = self.sequences[species]
            if len(sequence) > self.totalLength:
                self.totalLength = len(sequence)

        # Add insertion characters at the end of sequence that does not have the right length
        for species in self.sequences.keys():
            sequence = self.sequences[species]
            if len(sequence) != self.totalLength:
                Log.info(
                    "SequenceAlignment.finalizeSequences : Sequence does not have the right lenght for this alignment : Alignement length = "
                    + str(self.totalLength) + " DNA sequence length = " +
                    str(len(sequence)) + " for species= " + species +
                    ". Completing sequence")
                for fix_index in range(self.totalLength - len(sequence)):
                    sequence.append(Constants.SEQUENCE_INSERTION_CHAR)

        # Analyse each position of the MSA and decide if the column should be kept or removed
        species_list = self.sequences.keys()
        removed_char = 0
        for index in range(self.totalLength):
            all_insertion = True
            # if the residus on the reference species sequence indicates a missing information, it means that
            # the MAF files provide no information for this column. So the column is kept
            char = self.sequences[self.referenceSpecies][index - removed_char]
            if char == Constants.SEQUENCE_INIT_CHAR:
                continue

            # if keep_gaps is False, remove the column if the residu on the reference species sequence
            # is an insertion character
            if keep_gaps == False:
                if char == Constants.SEQUENCE_INSERTION_CHAR:
                    for index_sub_species in range(len(species_list)):
                        species = species_list[index_sub_species]
                        self.sequences[species].pop(index - removed_char)
                    removed_char += 1
                continue

            # remove the columns that contains only insertion characters or initialization characters
            for index_species in range(len(species_list)):
                species = species_list[index_species]
                char = self.sequences[species][index - removed_char]
                if char != Constants.SEQUENCE_INSERTION_CHAR and char != Constants.SEQUENCE_INIT_CHAR:
                    all_insertion = False
                    break
            if all_insertion == True:
                for index_sub_species in range(len(species_list)):
                    species = species_list[index_sub_species]
                    self.sequences[species].pop(index - removed_char)
                removed_char += 1

        self.totalLength = self.totalLength - removed_char
    def computeStartIndex(self, tokens,  strand):
        
        # If current sequence direction is inserved, coordinates must be transformed
        if strand != Constants.POSITIVE_STRAND:
            Log.trace( "MAFIndexer : Negative strand detected")
            source_size = self.getIntValue( tokens[ MAFIndexerProcessor._source_size_col])
            text_length = self.getIntValue( tokens[ MAFIndexerProcessor._textlength_col])
            rev_start = self.getIntValue( tokens[ MAFIndexerProcessor._startindex_col])
            bp_start = source_size + 1 - (text_length + rev_start)
        else:
            bp_start = self.getIntValue( tokens[ MAFIndexerProcessor._startindex_col])

        return bp_start
    def addSites(self, output_commstruct):

        # Retrieve the algorithm parameters
        site_number = self.getParameterAsint(
            ImplantSitesProcessor.SITE_NUMBER_PARAM)

        if site_number <= 0:
            Log.trace(
                "ImplantSitesProcessor.addSites : Motif sites implantation not requested"
            )
            return

        motif_list_line = self.getParameter(
            ImplantSitesProcessor.MOTIF_LIST_PARAM)
        motif_name_list = motif_list_line.split()

        optimize_motif = (self.getParameter(
            ImplantSitesProcessor.OPTIMIZE_MOTIF_PARAM).lower() == "true")

        database_file_path = self.getParameter(
            ImplantSitesProcessor.DATABASE_FILE_PATH_PARAM)

        distribution_mode = self.getParameter(
            ImplantSitesProcessor.DISTRIBUTION_MODE_PARAM)
        distribution_mode = distribution_mode.lower()

        # Retrieve the motifs PWM
        motif_def_list = self.getMotifDefinitions(motif_name_list,
                                                  database_file_path)

        # Prepare output directory
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        shutil.rmtree(dir_path, True)
        os.mkdir(dir_path)

        # Generate the motif sites
        motif_sites = {}
        for motif in motif_def_list:
            if optimize_motif == False:
                motif_file_path = self.outputMotifDefinition(motif, dir_path)
                motif_sites[motif] = self.generateRandomSites(
                    motif, motif_file_path, site_number)
            else:
                motif_sites[motif] = self.generateOptimalSites(
                    motif, site_number)

        # Implant sites in the MSA
        self.implantSites(motif_sites, distribution_mode, output_commstruct,
                          dir_path)
Exemple #10
0
    def parseFile(self, file_name, is_chrom_file):

        try:
            input_file = open(file_name, 'r')

            # Verify if the token '##maf' indicating a MAF file is found in the first lines
            is_maf_file = False
            while 1:
                line = input_file.readline()
                if len(line) == 0:
                    break
                elif not line.isspace():
                    tokens = line.split()
                    if tokens != None and len(
                            tokens) > 0 and tokens[0] == "##maf":
                        is_maf_file = True
                        break

            # if it is a maf file, verify if an index file exists
            if is_maf_file == True:
                indexed = False
                try:
                    index_path = file_name + "index"
                    input_index_file = open(index_path, "r")
                    indexed = True

                except IOError:
                    pass

                if indexed == True:
                    Log.trace("MAFProcessor.parseFile : parsing file '" +
                              file_name + "' using index '" + index_path + "'")
                    self.parseBlockListWithIndex(input_index_file, input_file)
                    self.closeFile(input_index_file)
                else:
                    Log.trace("MAFProcessor.parseFile : parsing file '" +
                              file_name + "'")
                    self.parseBlockListWithoutIndex(input_file, is_chrom_file)

                self.closeFile(input_file)
                return

            else:
                self.closeFile(input_file)
                raise ParsingException("MAFProcessor.parseFile : The file '" +
                                       file_name + "' is not a MAF file")
        except IOError, io_exec:
            raise ParsingException(
                "MAFProcessor.parseFile : Enable to open file '" + file_name +
                "'. From:\n\t---> " + str(io_exec))
Exemple #11
0
    def startNewThread(self, file_queue, specialized_file, thread_list):

        if not file_queue.empty():
            file = file_queue.get()
            my_thread = threading.Thread(None, self.parseFile, file, (
                file,
                specialized_file,
            ))
            thread_list.append(my_thread)
            Log.trace(
                "MAFProcessor.startNewThread : Starting new thread to parse file : '"
                + file + "'. Number of active Thread = " +
                str(len(thread_list)))
            my_thread.start()
Exemple #12
0
    def addSequence(self, species, sequence):

        if species != None and sequence != None:
            self.sequences[species] = sequence
            seq_length = len(sequence)
            if self.totalLength == 0:
                self.totalLength = seq_length
            else:
                if seq_length != self.totalLength:
                    Log.log(
                        "SequenceAlignment.addSequence : Added sequence does not have the right lenght for this alignment : Alignement length = "
                        + str(self.totalLength) + " DNA sequence length = " +
                        str(seq_length))
                    for fix_index in range(self.totalLength - len(sequence)):
                        sequence.append(Constants.SEQUENCE_INSERTION_CHAR)
    def outputClassification(self, input_commstruct, analysis, limit_value,
                             parameter_dic):

        try:
            # Create and write to file the XML element
            root_element = self.toXML(input_commstruct, analysis, limit_value,
                                      parameter_dic)
            self.indent(root_element, 0)
            # Output the XML to file
            doc = ET.ElementTree(root_element)
            classification_file_path = os.path.join(
                self.outPath,
                self.component.pipelineName + "_MotifClassification.xml")
            outfile = open(classification_file_path, 'w')
            outfile.write('<?xml version="1.0" encoding="utf-8"?>\n')
            outfile.write(
                '<?xml-stylesheet type="text/xsl" href="classification.xsl"?>\n'
            )
            doc.write(outfile)
            outfile.close()
            # Copy the XSL file in the same directory than the XML
            shutil.copy(
                os.path.join(
                    self.component.getParameter(Constants.INSTALL_DIR_PARAM),
                    "resources/xsl/classification/classification.xsl"),
                self.outPath)
            shutil.copy(
                os.path.join(
                    self.component.getParameter(Constants.INSTALL_DIR_PARAM),
                    "resources/xsl/classification/RSAT_menu.js"), self.outPath)
            shutil.copy(
                os.path.join(
                    self.component.getParameter(Constants.INSTALL_DIR_PARAM),
                    "resources/xsl/classification/jquery.dataTables.js"),
                self.outPath)
            shutil.copy(
                os.path.join(
                    self.component.getParameter(Constants.INSTALL_DIR_PARAM),
                    "resources/xsl/classification/results.css"), self.outPath)
            shutil.copy(
                os.path.join(
                    self.component.getParameter(Constants.INSTALL_DIR_PARAM),
                    "resources/xsl/classification/peak-footprints.css"),
                self.outPath)
        except IOError, exce:
            Log.log(
                "ClassificationProcessor.outputClassification : Unable to write classification to XML file. From:\n\t---> "
                + str(exce))
 def execute( self, input_commstructs):
     
     source_maffile = self.getParameter( MAFIndexerProcessor.INPUT_MAF_FILE_PARAM)
     self.referenceSpecies = self.getParameter( MAFIndexerProcessor.REFERENCE_SPECIES_PARAM)
     
     # look for MAF files to parse
     maf_file_list = FileUtils.getFileList( source_maffile, "maf", self.referenceSpecies)
     if maf_file_list == None:
         raise ExecutionException( "MAFIndexerProcessor.execute : The path '" + source_maffile + "' does not point to a MAF file or a directory containing MAF files and does not contain a subdirectory '" + self.referenceSpecies + "' containing MAF files.")
     
     count_file = 0
     for maf_file_path in maf_file_list:
         Log.trace( "MAFIndexerProcessor.execute : Indexing " + maf_file_path)
         self.parseFile( maf_file_path)
         count_file += 1
         ProgressionManager.setComponentProgression( self.component, count_file/float( len( maf_file_list)))
    def executePipelines(self):

        result = True

        while len(self.serverQueue) > 0:

            params = self.serverQueue[0]
            pipelines_filepath = params[0]
            pipeline_options = params[1]
            try:
                verbosity = int(params[2])
            except ValueError:
                verbosity = 1
            resume = (params[3].lower() == "true")
            working_dir = params[4]

            # Modifies the config if required and initialize logs and output directory
            if working_dir != None and len(working_dir) > 0:
                self.config[PFConstants.BASE_OUTPUT_DIR_PARAM] = working_dir

            # Verify the base output dir and the output dir are created and create them if not
            FileUtils.createDirectory(
                self.config[PFConstants.BASE_OUTPUT_DIR_PARAM], 0777)
            self.config[PFConstants.OUTPUT_DIR_PARAM] = os.path.join(
                self.getParameter(PFConstants.BASE_OUTPUT_DIR_PARAM),
                PFConstants.OUTPUT_DIR_NAME)
            FileUtils.createDirectory(
                self.config[PFConstants.OUTPUT_DIR_PARAM], 0777)

            # Switch log location
            Log.switchFiles(self.getParameter(PFConstants.OUTPUT_DIR_PARAM),
                            verbosity)

            # Parse the XML file to retrieve the pipelines definition
            Log.trace(
                "#################################################################################"
            )
            Log.trace(
                "# PipelineManager.executePipelines : Reading pipelines from : "
                + pipelines_filepath)
            Log.trace(
                "#################################################################################"
            )

            try:
                pipelines = PipelineXMLParser.getPipelines(pipelines_filepath)
                OptionManager.applyOptions(pipelines, pipeline_options)
                PipelineXMLParser.toXMLFile(
                    self.config[PFConstants.OUTPUT_DIR_PARAM], pipelines)
            except SyntaxError, syn_exce:
                raise ParsingException(
                    "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '"
                    + pipelines_filepath + "'. From:\n\t---> " + str(syn_exce))
            except ParsingException, par_exce:
                raise ParsingException(
                    "PipelineManager.executePipelines : Unable to read definition of pipelines from XML file: '"
                    + pipelines_filepath + "'. From:\n\t---> " + str(par_exce))
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException(
                "ImplantSitesProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # Implant TF Motif binding sites in mSA Sequences
        Log.trace("ImplantSitesProcessor.execute : Implanting motif sites")
        ProgressionManager.setTaskProgression("Implanting motif sites",
                                              self.component, 0.0)
        self.addSites(input_commstruct)
        ProgressionManager.setTaskProgression("Implanting motif sites",
                                              self.component, 1.0)

        return input_commstruct
Exemple #17
0
    def execute(self, input_commstructs):

        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException("BlockProcessor.execute : No inputs")

        input_commstruct = input_commstructs[0]

        # retrieve the processor parameters
        self.windowSize = self.getParameterAsint(
            BlockProcessor.WINDOW_SIZE_PARAM)
        self.residuConservationLimit = self.getParameterAsfloat(
            BlockProcessor.RESIDU_CONSERVATION_LIMIT_PARAM)
        self.windowConservationLimit = self.getParameterAsfloat(
            BlockProcessor.WINDOW_CONSERVATION_LIMIT_PARAM)
        algo = self.getParameter(BlockProcessor.ALGORITHM_PARAM, False)
        if algo != None:
            self.algorithm = algo.lower()

        referenceSpecies = self.getParameter(
            BlockProcessor.REFERENCE_SPECIES_PARAM)

        desired_species_line = self.getParameter(
            BlockProcessor.DESIRED_SPECIES_LIST_PARAM, False)
        Log.trace("BlockProcessor.execute : Chosen Algorithm is '" +
                  self.algorithm + "'")

        self.desiredSpeciesList = []
        self.desiredSpeciesList.append(referenceSpecies)
        if desired_species_line != None:
            self.desiredSpeciesList.extend(desired_species_line.split())

        # Analyze the conserved region in each MSA
        # If 'None' algorithm is chosen, the entire MSA is considered as conserved
        for bed_seq in input_commstruct.bedToMA.keys():
            for alignment in input_commstruct.bedToMA[bed_seq]:
                pwm = PWM()
                pwm.initFromAlignment(alignment, self.desiredSpeciesList)
                if self.algorithm != BlockProcessor.ALGORITHM_NONE_VALUE:
                    self.analyzeConservedBlocks(pwm, alignment)
                else:
                    new_block = Motif(0, alignment.totalLength, "", pwm)
                    new_block.composeName(alignment.name)
                    alignment.addMotif(new_block, True)

        return input_commstruct
Exemple #18
0
    def getMotifsDetailsFromJaspar():

        matrix_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX.txt")
        matrix_annotation_path = os.path.join( MotifUtils.JASPAR_FLAT_DB_PATH, "MATRIX_ANNOTATION.txt")

        names = {}
        id = {}
        family = {}
        type = {}
        classe = {}
        
        try:
            matrix_file = open( matrix_path, "r")
            matrix_annotation_file = open( matrix_annotation_path,  "r")
            
            for line in matrix_file:
                tokens = line.split()
                if len( tokens) >= 5:
                    current_num = tokens[ 0]
                    current_name = tokens[ 2] + "." + tokens[3]
                    current_id = "".join( tokens[ 4:])
                    names[ current_num] = current_name
                    id[ current_name] = current_id
                else:
                    raise ParsingException( "MotifUtils.getMotifsDetailsFromJaspar : Matrix file is not correctly formatted: 5 columns required while " + str( len( tokens)) + " columns are found")
            
            for line in matrix_annotation_file:
                tokens = line.split()
                current_num = tokens[ 0]
                if current_num in names.keys():
                    current_key = tokens[ 1]
                    current_value = "".join( tokens[2:])
                    if current_key == "family":
                        family[ names[ current_num]] = current_value
                    elif current_key == "class":
                        classe[ names[ current_num]] = current_value
                    elif  current_key == "type":
                        type[ names[ current_num]] = current_value
                else:
                    Log.log( "MotifUtils.getMotifsDetailsFromJaspar : Motif number was not detected in matrix file : " + current_num)
            matrix_annotation_file.close()
            matrix_file.close()
        except (IOError, ParsingException),  exce:
            Log.log( "MotifUtils.getMotifsDetailsFromJaspar : unable to read motifs definition. From:\n\t---> " + str( exce))
Exemple #19
0
    def fixIndex(self, text_index):

        if text_index < 0:
            return text_index

        if self.referenceSpecies in self.sequences.keys():
            limit = min(text_index + 1,
                        len(self.sequences[self.referenceSpecies]))
            count = 0
            for index in range(limit):
                if self.sequences[self.referenceSpecies][
                        index] == Constants.SEQUENCE_INSERTION_CHAR:
                    count += 1
            return text_index - count
        else:
            Log.log(
                "SequenceAlignement.fixIndex : Reference species is not set for Sequence Alignement : "
                + self.name)
            return text_index
Exemple #20
0
    def getBEDSequenceDictionnary(species, bed_filepath, extension_5p,
                                  extension_3p):

        sequence_dic = {}

        try:
            input_file = open(bed_filepath)
            for line in input_file:
                tokens = line.split()
                if len(tokens) > BEDParser._endindex_col:
                    chrom = tokens[BEDParser._chrom_col].lower()
                    #if chrom[ 0:3] == "chr":
                    if chrom[0:1] != "#":
                        if len(chrom) < 4:
                            chrom = "chr" + chrom
                        start = BEDParser.getTokenAsint(
                            tokens[BEDParser._startindex_col])
                        end = BEDParser.getTokenAsint(
                            tokens[BEDParser._endindex_col])
                        if start < end:
                            start = start - extension_5p
                            if start < 0:
                                start = 0
                            end = end + extension_3p
                            bedsequence = BEDSequence(species, chrom, start,
                                                      end)
                            if len(tokens) > BEDParser._id_col:
                                bedsequence.id = tokens[BEDParser._id_col]
                            bedsequence_key = bedsequence.getKey()
                            if not sequence_dic.has_key(bedsequence_key):
                                sequence_dic[bedsequence_key] = []
                            sequence_dic[bedsequence_key].append(bedsequence)
                        else:
                            Log.log(
                                "BEDParser.getBEDSequenceDictionnary : A sequence has inversed start and end coordinates : "
                                + line)
                else:
                    Log.log("No 'chr' in line :" + line)
        except ParsingException, par_exce:
            raise ParsingException(
                "BEDParser.getBEDSequenceDictionnary : Some attributes are mor numbers. From:\n\t-->  "
                + str(par_exce))
Exemple #21
0
    def parseBlockListWithoutIndex(self, input_file, is_chrom_file):

        # search for the next line starting with 'a' (meaning new alignment lbock)
        counter = 0
        while 1:
            line = input_file.readline()
            if len(line) == 0:
                break
            elif not line.isspace():
                tokens = line.split()
                if tokens != None and len(tokens) > 0 and tokens[
                        MAFProcessor._lineType_col] == "a":
                    counter += 1
                    if counter % 100000 == 0:
                        Log.trace(
                            "MAFIndexerProcessor.execute : Number of MSA already parsed : "
                            + str(counter))
                    parsed = self.parseBlock(input_file)
                    if not parsed and is_chrom_file:
                        return
    def getMotifStatistics(node_motif, motif):

        statistics = MotifStatistics()

        for node_param in node_motif:
            if node_param.tag.lower() == MotifStatisticsCommStruct.PARAM_TAG:
                param_name = MotifStatisticsCommStruct.getAttribute(
                    node_param, MotifStatisticsCommStruct.PARAM_NAME_ATT,
                    False)
                param_value = MotifStatisticsCommStruct.getAttribute(
                    node_param, MotifStatisticsCommStruct.PARAM_VALUE_ATT,
                    False)
                if param_name != None and len(param_name) > 0:
                    if param_value != None and len(param_value) > 0:
                        if param_name == MotifStatisticsCommStruct.CHI2_PARAM_NAME:
                            statistics.chi2 = MotifStatisticsCommStruct.getTokenAsfloat(
                                param_value, False)
                        elif param_name == MotifStatisticsCommStruct.HISTOGRAM_GRAPH_PATH_PARAM_NAME:
                            statistics.histogramGraphPath = param_value
                        elif param_name == MotifStatisticsCommStruct.HISTOGRAM_PARAM_NAME:
                            statistics.histogram = param_value.split(
                                MotifStatisticsCommStruct.
                                HISTOGRAM_ENTRY_SEPARATOR_CHAR)
                        elif param_name == MotifStatisticsCommStruct.NULL_HISTOGRAM_PARAM_NAME:
                            statistics.nullHistogram = param_value.split(
                                MotifStatisticsCommStruct.
                                HISTOGRAM_ENTRY_SEPARATOR_CHAR)
                        else:
                            Log.log(
                                "MotifStatisticsCommStruct.getMotifAttributes : Unknown attribute name : "
                                + param_name)
                    else:
                        raise ParsingException(
                            "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter value in motif '"
                            + motif.name + "'")
                else:
                    raise ParsingException(
                        "MotifStatisticsCommStruct.getMotifAttributes : Malformed parameter - unable to retrieve parameter name in motif '"
                        + motif.name + "'")

        return statistics
    def generateRandomSites(self, motif, motif_file_path, site_number):

        # Retrieve method required parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        output_path = os.path.join(dir_path, motif + "_sites.fasta")

        # Execute the RSAT random-seq command
        cmd = os.path.join(RSAT_PATH, "python-scripts/random-sites")
        cmd += " -m " + motif_file_path
        cmd += " -n " + str(site_number)
        cmd += " -o " + output_path

        # Execute the command
        cmd_result = commands.getstatusoutput(cmd)
        if cmd_result[0] != 0:
            Log.log(
                "ImplantSitesProcessor.generateSites : status returned is :" +
                str(cmd_result[0]) + " for command '" + cmd + "'")
            Log.log(
                "ImplantSitesProcessor.generateSites : command output is = \n"
                + str(cmd_result[1]))
            raise ExecutionException(
                "ImplantSitesProcessor.generateSites : Cannot execute random-sites commands. See logs for more details"
            )

        # Parse the result of the command
        sites = []
        try:
            site_file = open(output_path, "r")
            for line in site_file:
                if not line.isspace() and line[0] != ">":
                    sites.append(line.split()[0].upper())
            site_file.close()
        except IOError, io_exce:
            raise ExecutionException(
                "ImplantSitesProcessor.generateSites : Unable to read motif sites from file '"
                + output_path + "'. From:\n\t---> " + str(io_exce))
    def readConfig(self, param_file):

        try:
            config_file = FileUtils.openFile(param_file)
            for line in config_file:
                if line.isspace() or line[0] == PFConstants.COMMENT_CHAR:
                    continue
                tokens = line.split("=")
                if tokens != None and len(tokens) == 2:
                    if tokens[1][-1] == "\n":
                        value = tokens[1][:-1]
                    else:
                        value = tokens[1]
                    self.config[tokens[0].lower()] = value
                else:
                    raise ConfigException(
                        "PipelineManager.readConfig : wrongly formated parameter line in config file '"
                        + param_file +
                        "'. Should be '<param_name>=<param_value>' instead of '"
                        + line + "'")
        except IOError, io_exce:
            Log.info(
                "PipelineManager.readConfig : unable to read parameters from config file '"
                + param_file + "'. From:\n\t---> " + str(io_exce))
Exemple #25
0
    def getInputCommStructs(self):

        authorized_input_classes = self.getAuthorizedInputClasses()

        input_commstructs = []
        if authorized_input_classes != None:
            input_file = self.getParameter(Component.INPUT_FILE_PARAM, False)
            if input_file == None:
                #Compares the list of authorized inputs to outputs of previous components
                for component in self.previousComponents:
                    previous_result_class = component.resultClass
                    if previous_result_class in authorized_input_classes:
                        input_commstruct = previous_result_class.fromXMLFile(
                            component.getOutputFilePath())
                        if input_commstruct != None:
                            input_commstructs.append(input_commstruct)
                    else:
                        raise ExecutionException(
                            "Component.getInputCommStructs : input is not of the right class. Class is '"
                            + previous_result_class +
                            "' but waited classes are " +
                            str(authorized_input_classes))
            else:
                #Try to read the input file using classes authorized as input
                for input_class in authorized_input_classes:
                    try:
                        Log.trace(
                            "Component.getInputCommStructs : Trying to load data from file : "
                            + input_file)
                        input_commstruct = input_class.fromXMLFile(input_file)
                        if input_commstruct != None:
                            input_commstructs.append(input_commstruct)
                        Log.trace(
                            "Component.getInputCommStructs : Data correctly loaded"
                        )
                    except Exception, exce:
                        Log.trace(
                            "Component.getInputCommStructs : Data not loaded using class '"
                            + str(input_class) + "' : " + str(exce))
                        pass
                if len(input_commstructs) == 0:
                    raise ExecutionException(
                        "Component.getInputCommStructs : The provided input file does not contain information the processor '"
                        + self.processorName + "' can manage : " + input_file)
class MotifStatisticsCommStruct(CommStruct):

    # --------------------------------------------------------------------------------------
    def __init__(self):

        CommStruct.__init__(self)
        self.motifList = []
        self.motifToStatistics = {}

    # --------------------------------------------------------------------------------------
    def addMotif(self, motif):

        if motif != None:
            self.motifList.append(motif)

    # --------------------------------------------------------------------------------------
    def addMotifStatistics(self, motif, statistics):

        if motif != None and statistics != None:
            self.motifToStatistics[motif] = statistics

    # --------------------------------------------------------------------------------------
    def toXMLFile(self, output_filepath):

        try:
            root_element = self.convertCommStructToElementTree()
            self.indent(root_element, 0)
            ElementTree(root_element).write(output_filepath)
        except IOError, exce:
            Log.log(
                "MotifStatisticsCommStruct.toXMLFile : Unable to write CommStruct to XML file. From:\n\t---> "
                + str(exce))
        except ParsingException, par_exce:
            Log.log(
                "MotifStatisticsCommStruct.toXMLFile : Unable to save CommStruct to XML file. From:\n\t---> "
                + str(par_exce))
Exemple #27
0
    def execute(self, comm_struct, pipeline):

        Log.log(
            "The method 'execute' must be implemented at the inherited class level"
        )
        return None
Exemple #28
0
    def getDisplayName():

        Log.log(
            "The method 'getOutputCommStructClass' must be implemented at the inherited class level"
        )
        return Processor.__class__.__name__ + " (no display name defined)"
Exemple #29
0
    def getRequiredParameters():

        Log.log(
            "The method 'getRequiredParameters' must be implemented at the inherited class level"
        )
        return None
Exemple #30
0
    def getOutputCommStructClass():

        Log.log(
            "The method 'getOutputCommStructClass' must be implemented at the inherited class level"
        )
        return ("Not defined", )