Example #1
0
    def finalizeSequences(self, keep_gaps=False):

        # Compute the length of the longest sequence in the MSA
        for species in self.sequences.keys():
            sequence = self.sequences[species]
            if len(sequence) > self.totalLength:
                self.totalLength = len(sequence)

        # Add insertion characters at the end of sequence that does not have the right length
        for species in self.sequences.keys():
            sequence = self.sequences[species]
            if len(sequence) != self.totalLength:
                Log.info(
                    "SequenceAlignment.finalizeSequences : Sequence does not have the right lenght for this alignment : Alignement length = "
                    + str(self.totalLength) + " DNA sequence length = " +
                    str(len(sequence)) + " for species= " + species +
                    ". Completing sequence")
                for fix_index in range(self.totalLength - len(sequence)):
                    sequence.append(Constants.SEQUENCE_INSERTION_CHAR)

        # Analyse each position of the MSA and decide if the column should be kept or removed
        species_list = self.sequences.keys()
        removed_char = 0
        for index in range(self.totalLength):
            all_insertion = True
            # if the residus on the reference species sequence indicates a missing information, it means that
            # the MAF files provide no information for this column. So the column is kept
            char = self.sequences[self.referenceSpecies][index - removed_char]
            if char == Constants.SEQUENCE_INIT_CHAR:
                continue

            # if keep_gaps is False, remove the column if the residu on the reference species sequence
            # is an insertion character
            if keep_gaps == False:
                if char == Constants.SEQUENCE_INSERTION_CHAR:
                    for index_sub_species in range(len(species_list)):
                        species = species_list[index_sub_species]
                        self.sequences[species].pop(index - removed_char)
                    removed_char += 1
                continue

            # remove the columns that contains only insertion characters or initialization characters
            for index_species in range(len(species_list)):
                species = species_list[index_species]
                char = self.sequences[species][index - removed_char]
                if char != Constants.SEQUENCE_INSERTION_CHAR and char != Constants.SEQUENCE_INIT_CHAR:
                    all_insertion = False
                    break
            if all_insertion == True:
                for index_sub_species in range(len(species_list)):
                    species = species_list[index_sub_species]
                    self.sequences[species].pop(index - removed_char)
                removed_char += 1

        self.totalLength = self.totalLength - removed_char
Example #2
0
    def readConfig(self, param_file):

        try:
            config_file = FileUtils.openFile(param_file)
            for line in config_file:
                if line.isspace() or line[0] == PFConstants.COMMENT_CHAR:
                    continue
                tokens = line.split("=")
                if tokens != None and len(tokens) == 2:
                    if tokens[1][-1] == "\n":
                        value = tokens[1][:-1]
                    else:
                        value = tokens[1]
                    self.config[tokens[0].lower()] = value
                else:
                    raise ConfigException(
                        "PipelineManager.readConfig : wrongly formated parameter line in config file '"
                        + param_file +
                        "'. Should be '<param_name>=<param_value>' instead of '"
                        + line + "'")
        except IOError, io_exce:
            Log.info(
                "PipelineManager.readConfig : unable to read parameters from config file '"
                + param_file + "'. From:\n\t---> " + str(io_exce))
Example #3
0
    def start(self, pipeline, pipeline_out, runtime_params, resume=False):

        self.outputDir = pipeline_out
        self.runtimeParameters = runtime_params

        if resume == True:
            # Test if the previous component were all resumed
            if self.canResume():
                self.resumed = False
                # test if the Component parameters have changed since the previous run. If so, the processor cannot
                # be resumed and must be re-run
                if self.verifyConfig():
                    # Test if an output file of a previous run of the associated processor can be retrieved
                    # If so (or if the processor has output no files), the Component is declared as resumed and returns True
                    try:
                        output_filepath = self.getOutputFilePath()
                        if os.path.isfile(output_filepath):
                            authorized_output_classes = self.getAuthorizedOutputClasses(
                            )
                            if authorized_output_classes != None:
                                for output_class in authorized_output_classes:
                                    try:
                                        output_commstruct = output_class.fromXMLFile(
                                            output_filepath)
                                        if output_commstruct != None:
                                            self.resultClass = output_class
                                            self.resumed = True
                                            self.executed = False
                                            ProgressionManager.setComponentStatus(
                                                self, ProgressionManager.
                                                RESUMED_STATUS)
                                            Log.trace(
                                                "Component.execute : Resuming data from file : "
                                                + output_filepath)
                                            output_commstruct = None
                                            gc.collect()
                                            return True
                                    except BaseException, exce:
                                        Log.info(
                                            "Component.execute : Tried to resume output file with class '"
                                            + str(output_class) + "' : " +
                                            str(exce))
                                        pass
                            else:
                                self.resumed = True
                                self.executed = False
                                ProgressionManager.setComponentStatus(
                                    self, ProgressionManager.RESUMED_STATUS)
                                return True
                    except IOError, io_exce:
                        Log.trace(
                            "Component.execute : Unable to open output file to resume processor '"
                            + self.processorName + "'. From\n\t---> " +
                            str(io_exce))

                    # Here, the processor cannot be resumed, for any reason linked to outfiles,
                    Log.trace(
                        "Component.execute : No output file found for processor '"
                        + self.processorName + "': executing it")

                self.removePreviousOutputs()

            # If the processor does not have to be resumed because previous components were not resumed,
            # removes all old output files and the processor is executed
            else:
                Log.trace(
                    "Component.execute : Processor '" + self.processorName +
                    "' cannot be resumed since previous components have been executed."
                )
                self.removePreviousOutputs()
Example #4
0
    def parseBlockListWithIndex(self, index_file, input_file):

        is_chrom_file = False
        ordered = False
        spec_chrom = None

        # Read the index file header to know if the file is chromosom specialized and ordered
        while 1:
            line = index_file.readline()
            if len(line) == 0:
                Log.log("MAFProcessor.parseBlockListWithIndex : index file '" +
                        index_file.name + "' has no header line : skipping it")
                return
            else:
                tokens = line.split()
                if tokens != None and tokens[0] == Constants.COMMENT_CHAR:
                    if len(tokens) > 1 and tokens[1] != Constants.MIXED:
                        is_chrom_file = True
                        spec_chrom = tokens[1]
                        if len(tokens) > 2 and tokens[2] == Constants.ORDERED:
                            ordered = True
                break

        # If the file is specialized by chromosom, get once for all the bed sequences concerned
        # by this species and chromosom
        if is_chrom_file == True:
            bed_sequences = self.getAssociatedBEDSequences(spec_chrom)
            if bed_sequences == None or len(bed_sequences) == 0:
                Log.info(
                    "MAFProcessor.parseBlockListWithIndex : No BED sequences matching for file :"
                    + index_file.name)
                return
        else:
            bed_sequences = None

        # If file is ordered, compute the peaks extremum in order to optimize the parsing
        if ordered == True:
            min_start = 1000000000
            max_end = 0
            for bed_sequence in bed_sequences:
                if bed_sequence.indexStart < min_start:
                    min_start = bed_sequence.indexStart
                if bed_sequence.indexEnd > max_end:
                    max_end = bed_sequence.indexEnd

        # Parse the index file
        while 1:
            line = index_file.readline()
            if len(line) == 0:
                break
            else:
                tokens = line.split()
                if tokens != None and len(tokens) == 4:
                    # retrieve the index information
                    spec_chrom = tokens[0]
                    start = self.getIntValue(tokens[1])
                    end = self.getIntValue(tokens[2])
                    position = self.getIntValue(tokens[3])

                    if ordered == True:
                        # If the file is ordered and the indexes are less than the BED indexes, skip the line
                        if end <= min_start:
                            continue
                        # If the file is ordered and the indexes are greater than the BED indexes, skip the file
                        elif start >= max_end:
                            break
                        # If the indexes are at least in one of the BED sequences index range,
                        # the corresponding MSA block is parsed
                        else:
                            for bed_sequence in bed_sequences:
                                if end > bed_sequence.indexStart and start < bed_sequence.indexEnd:
                                    input_file.seek(position, 0)
                                    result = self.parseBlock(input_file, True)
                                    if result == False:
                                        raise ExecutionException(
                                            "MAFFile.parseBlockListWithIndex : Indexed MSA block seems not correct. You should have not updated indexes. Please see logs for more information"
                                        )
                                    break
                    else:
                        # If the file is not chromosom specialized, the bed sequence list must be
                        # retrieve for each new index
                        if is_chrom_file == False:
                            bed_sequences = self.getAssociatedBEDSequences(
                                spec_chrom)
                            if (bed_sequences == None
                                    or len(bed_sequences) == 0):
                                continue

                        # If the indexes are at least in one of the BED sequences index range,
                        # the corresponding MSA block is parsed
                        for bed_sequence in bed_sequences:
                            if end > bed_sequence.indexStart and start < bed_sequence.indexEnd:
                                input_file.seek(position, 0)
                                result = self.parseBlock(input_file, True)
                                if result == False:
                                    Log.log(
                                        "MAFFile.parseBlockListWithIndex : Indexed MSA block seems not correct. You should have not updated indexes"
                                    )
                                    raise ExecutionException(
                                        "MAFFile.parseBlockListWithIndex : Indexed MSA block seems not correct. You should have not updated indexes. Please, see logs for more information"
                                    )
                                break
Example #5
0
    def generateRandomMSA(self, msa_length, bedseq_number, max_length,
                          output_commstruct):

        # Retrieve method required parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        file_path = os.path.join(dir_path, "random_sequences.txt")

        try:
            # Execute the RSAT random-seq command
            cmd = os.path.join(RSAT_PATH, "perl-scripts/random-seq")
            cmd += " -l " + str(int(max_length * 1.5))
            cmd += " -n " + str(bedseq_number)
            cmd += " -a a:t 0.3 c:g 0.2"
            cmd += " -type DNA"
            cmd += " -format multi"
            cmd += " -o " + file_path

            Log.info(
                "GenerateMSAProcessor.generateMSA : starting random sequence generation. Command used is : "
                + cmd)

            # Execute the command
            cmd_result = commands.getstatusoutput(cmd)
            if cmd_result[0] != 0:
                Log.log(
                    "GenerateMSAProcessor.generateMSA : status returned is :" +
                    str(cmd_result[0]) + " for command '" + cmd + "'")
                Log.log(
                    "GenerateMSAProcessor.generateMSA : command output is = \n"
                    + str(cmd_result[1]))
                raise ExecutionException(
                    "GenerateMSAProcessor.generateMSA : Cannot execute random-seq commands. See logs for more details"
                )

            # Read the output file to get the random sequences
            sequence_list = []
            sequence_file = open(file_path, "r")
            for line in sequence_file:
                sequence_list.append(line.split()[0])

            # Generate  the species list
            species_list = []
            species_list.append(output_commstruct.baseSpecies)
            for index in range(msa_length - 1):
                species_list.append("Species" + str(index + 1))

            # Create and fill the MSA for each BED sequence
            count_seq = 0
            for chrom in output_commstruct.bedSequencesDict.keys():
                for bedseq in output_commstruct.bedSequencesDict[chrom]:
                    msa = SequenceAlignment()
                    msa.name = bedseq.name + "_1"
                    msa.referenceSpecies = output_commstruct.baseSpecies
                    seq_length = bedseq.indexEnd - bedseq.indexStart
                    sequence = list(sequence_list[count_seq][:seq_length])
                    for index in range(msa_length):
                        msa.addSequence(species_list[index], sequence)
                        #msa.addSequence( species_list[index], list(['.'] * len( sequence)))
                    msa.finalizeSequences()
                    output_commstruct.addSequenceAlignment(bedseq, msa)
                    count_seq += 1

        except IOError, io_exce:
            raise ExecutionException(
                "GenerateMSAProcessor.generateMSA : Unable to save/read random sequences file. From:\n\t---> "
                + str(io_exce))
Example #6
0
    def buildHistogramsAndGraphs(self, input_commstruct, histogram_interval):

        # Retrieve the algorithm parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)

        # Compute the statistics of the motifs
        Log.info(
            "HistogramProcessor.buildHistogramsAndGraphs : collecting motifs statistics"
        )
        statistics = self.computeMotifStatistics(input_commstruct, )

        hits_distances = statistics[0]
        motif_size_min = statistics[1]
        motif_size_max = statistics[2]
        hits_peakscore = statistics[3]

        #print "motif_size_max = " + str( motif_size_max)

        # Compute the uniform distribution probabilities
        Log.info(
            "HistogramProcessor.buildHistogramsAndGraphs : computing uniform distribution"
        )
        uniform_distributions = self.computeUniformDistributions(
            input_commstruct, histogram_interval, motif_size_min,
            motif_size_max)

        # Build the output CommStruct
        Log.info(
            "HistogramProcessor.buildHistogramsAndGraphs : building histogram and graphs"
        )

        # Execute the RSAT commands and computations
        try:
            # Prepare the output directories
            dir_path = os.path.join(self.component.outputDir,
                                    self.component.getComponentPrefix())
            shutil.rmtree(dir_path, True)
            FileUtils.createDirectory(dir_path, 0777)

            # Parse the motif list and execute the computations and commands for each of them
            ProgressionManager.setTaskProgression("Building motifs histogram",
                                                  self.component, 0.0)
            total_motif_number = len(hits_distances.keys())
            count_motif = 0
            for motif_name in hits_distances.keys():
                count_motif += 1
                motif_stats = input_commstruct.motifStatistics[motif_name]
                motif_id = motif_stats.motifID
                motif_size = motif_stats.motifSize
                hit_number = motif_stats.getAttributeAsint(
                    MotifStatistics.MOTIF_HIT_SCORE)

                # Initialize the motif prefix ID
                #if motif_id != None and len( motif_id) > 0:
                #    prefix_id = "_" + motif_id
                #else:
                #    prefix_id = ""
                prefix_id = ""

                # save the stats to a tabbed file for classfreq command
                input_path = os.path.join(
                    dir_path, motif_name + prefix_id + "_Distances.tab")
                self.outputMotifStatistics(hits_distances[motif_name],
                                           input_path)

                # execute the classfreq command
                histo_path = os.path.join(
                    dir_path,
                    motif_name + prefix_id + "_Distances_histogram.tab")

                cmd = os.path.join(RSAT_PATH, "perl-scripts/classfreq")
                cmd += " -i '" + input_path + "'"
                cmd += " -col 1"
                cmd += " -ci " + str(histogram_interval)
                cmd += " -o '" + histo_path + "'"

                cmd_result = commands.getstatusoutput(cmd)
                if cmd_result[0] != 0:
                    Log.log(
                        "HistogramProcessor.buildHistogramsAndGraphs : status returned is :"
                        + str(cmd_result[0]) + " for command '" + cmd + "'")
                    Log.log("  Command output is = \n" + str(cmd_result[1]))
                    continue

                # retrieve the classfreq results from output file
                motif_distribution = self.parseClassfreqResults(histo_path)

                # compute the homogen distribution for the current motif
                null_distribution = self.computeMotifNullDistribution(
                    uniform_distributions[motif_size], hit_number)

                # Save both histograms to same file to create a common graph
                all_histo_path = os.path.join(
                    dir_path,
                    motif_name + prefix_id + "_Distances_histograms.tab")
                label1 = motif_name
                label2 = "Homogeneous model"
                self.outputAllHistograms(motif_distribution, label1,
                                         null_distribution, label2,
                                         histogram_interval, all_histo_path)
                motif_stats.setAttribute(
                    MotifStatistics.MOTIF_DISTANCE_HISTOGRAM, all_histo_path)

                # Execute a chi2 test on the motif distribution against the motif homogen distribution
                chi2_test = RSATUtils.executeChi2Test(all_histo_path, 4, 5)
                if chi2_test != None:
                    motif_stats.setAttribute(MotifStatistics.MOTIF_CHI2,
                                             chi2_test[0])
                    motif_stats.setAttribute(MotifStatistics.MOTIF_CHI2_PVALUE,
                                             chi2_test[1])
                else:
                    motif_stats.setAttribute(MotifStatistics.MOTIF_CHI2, "0.0")
                    motif_stats.setAttribute(MotifStatistics.MOTIF_CHI2_PVALUE,
                                             "1.0")

                # Build the PNG graph corresponding to all histograms using RSAT XYGraph command
                graph_path = os.path.join(
                    dir_path, motif_name + prefix_id + "_Distances.png")
                cmd = os.path.join(RSAT_PATH, "perl-scripts/XYgraph")
                cmd += " -i '" + all_histo_path + "'"
                cmd += " -title1 '" + self.component.pipelineName + "'"
                cmd += " -title2 ''"
                #cmd += " -xcol 3 -ycol 4,5"
                cmd += " -xcol 3 -ycol 4"
                cmd += " -xleg1 'Distance to peak maximum'"
                cmd += " -yleg1 'Number of motif hits'"
                cmd += " -legend -header -format png -fhisto"
                cmd += " -o '" + graph_path + "'"

                cmd_result = commands.getstatusoutput(cmd)
                if cmd_result[0] != 0:
                    Log.log(
                        "HistogramProcessor.buildHistogramsAndGraphs : status returned is :"
                        + str(cmd_result[0]) + " for command '" + cmd + "'")
                    Log.log("  Command output is = \n" + str(cmd_result[1]))
                    continue

                motif_stats.setAttribute(
                    MotifStatistics.MOTIF_DISTANCE_HISTOGRAM_GRAPH, graph_path)

                # Build the PDF graph corresponding to all histograms using RSAT XYGraph command
                graph_path_pdf = os.path.join(
                    dir_path, motif_name + prefix_id + "_Distances.pdf")
                cmd = os.path.join(RSAT_PATH, "perl-scripts/XYgraph")
                cmd += " -i '" + all_histo_path + "'"
                cmd += " -title1 '" + self.component.pipelineName + "'"
                cmd += " -title2 ''"
                #cmd += " -xcol 3 -ycol 4,5"
                cmd += " -xcol 3 -ycol 4"
                cmd += " -xleg1 'Distance to peak maximum'"
                cmd += " -yleg1 'Number of motif hits'"
                cmd += " -legend -header -format pdf -fhisto"
                cmd += " -o '" + graph_path_pdf + "'"

                cmd_result = commands.getstatusoutput(cmd)
                if cmd_result[0] != 0:
                    Log.log(
                        "HistogramProcessor.buildHistogramsAndGraphs : status returned is :"
                        + str(cmd_result[0]) + " for command '" + cmd + "'")
                    Log.log("  Command output is = \n" + str(cmd_result[1]))
                    continue

                motif_stats.setAttribute(
                    MotifStatistics.MOTIF_DISTANCE_HISTOGRAM_GRAPH_PDF,
                    graph_path_pdf)

                # Output the histogram of motif peak scores
                if len(hits_peakscore[motif_name]) > 1:
                    valuable = False
                    for value in hits_peakscore[motif_name]:
                        if value != 0:
                            valuable = True
                            break
                    if valuable:
                        score_histo_prefix = motif_name + prefix_id + "_PeakScores"
                        title1 = self.component.pipelineName
                        title2 = "Distribution of peak score for " + motif_name + prefix_id
                        legendx = "Peak Score"
                        legendy = "Number of occurence"
                        pathes = RSATUtils.outputHistogram(
                            hits_peakscore[motif_name], histogram_interval,
                            dir_path, score_histo_prefix, title1, title2,
                            legendx, legendy, None, True)
                        motif_stats.setAttribute(
                            MotifStatistics.MOTIF_PEAK_SCORE_HISTOGRAM,
                            pathes[0])
                        motif_stats.setAttribute(
                            MotifStatistics.MOTIF_PEAK_SCORE_HISTOGRAM_GRAPH,
                            pathes[1])

                # Update the progression
                if count_motif % 10 == 0:
                    ProgressionManager.setTaskProgression(
                        "Building motifs histogram", self.component,
                        count_motif / float(total_motif_number))

        except IOError, io_exce:
            raise ExecutionException(
                "HistogramProcessor.buildHistogramsAndGraphs : Unable to build histogram and graph. From:\n\t---> "
                + str(io_exce))
    def execute(self, input_commstructs):
        
        if input_commstructs == None or len(input_commstructs) == 0:
            raise ExecutionException("BEDOutputProcessor.execute : No inputs")
        
        input_commstruct = input_commstructs[0]
        
        # Retrieve the processor parameters
        reference_motif = self.getParameter(BEDOutputProcessor.REFERENCE_MOTIF)
                
        color_method = self.getParameter(BEDOutputProcessor.COLOR_METHOD, False)
        if color_method == None:
            color_method = BEDOutputProcessor.COLOR_METHOD_SCORE
        else:
            color_method = color_method.lower()
            if color_method != BEDOutputProcessor.COLOR_METHOD_SCORE and color_method != BEDOutputProcessor.COLOR_METHOD_FAMILY:
                color_method = BEDOutputProcessor.COLOR_METHOD_SCORE
                
        score_min = self.getParameterAsfloat(BEDOutputProcessor.SCORE_MIN)
        score_max = self.getParameterAsfloat(BEDOutputProcessor.SCORE_MAX)
        
        # Prepare the processor output dir
        out_path = os.path.join(self.component.outputDir, self.component.getComponentPrefix())
        shutil.rmtree(out_path, True)
        FileUtils.createDirectory( out_path, 0777)

        # Retrieve the JASPAR motifs details
        motif_details = MotifUtils.getMotifsDetailsFromJaspar()
        motif_id = motif_details[ 0]
        motif_family = motif_details[ 1]
        family_rgb = {}

        # build the bed output file path
        bed_file_path = os.path.join(out_path, self.component.pipelineName + "_Motifs.bed")

        try:
            bed_file = open(bed_file_path, "w")

            #bed_file.write("track name='" + self.component.pipelineName + "' visibility=3 itemRgb='On' use_score=1\n")
            #bed_file.write("browser dense RSAT\n")
            #bed_file.write("browser dense\n") 
            #bed_file.write("## seq_name	start	end	feature_name	score	strand	thickStart	thickEnd	itemRgb	blockCount	blockSizes	blckStarts\n")

            current_color = None
            bedseq_list = input_commstruct.bedToMA.keys()
            bedseq_list.sort(BEDSequence.compare)
            previous_line_start = 0
            previous_line_key = ""
            for bed_seq in bedseq_list:
                for msa in input_commstruct.bedToMA[ bed_seq]:
                    for motif in msa.motifs:
                        motif_name = motif.name
                        if not input_commstruct.motifStatistics.has_key(motif_name):
                            continue
                        if motif_name in motif_id.keys():
                            out_name = motif_id[ motif_name]
                            chromosom = bed_seq.chromosom
                            start_position = bed_seq.indexStart + msa.fixIndex(motif.indexStart)
                            end_position = bed_seq.indexStart + msa.fixIndex(motif.indexEnd)
                            score = motif.score
                            
                            # Commented : Black is assigned to the reference motif
                            #if motif_name == reference_motif:
                            #    item_rgb = "0,0,0"
                            # for the other motif, color depends on the chosen method
                            #else:
                            if color_method == BEDOutputProcessor.COLOR_METHOD_FAMILY:
                                if motif_name in motif_family.keys():
                                    #print("-----------------------------")
                                    #print "Current color = " + str(current_color)
                                    #print "Motif name=" + motif_name
                                    #print "Motif family=" + motif_family[ motif_name]
                                    family_rgb = self.updateFamilyRGB(motif_family[ motif_name], family_rgb, current_color)
                                    #print "Family RGB = " + str(family_rgb)
                                    item_rgb = family_rgb[ motif_family[ motif_name]]
                                    #print "Item rgb = ", str(item_rgb)
                                    current_color = item_rgb
                                else:
                                    item_rgb = BEDOutputProcessor.COLORS[ 0]
                            else:
                                item_rgb = self.getColorForScore(score, score_min, score_max)
                            
                            # Write the lines to output file
                            if len( chromosom) <4:
                                line_out = "chr" + chromosom
                            else:
                                line_out = chromosom
                            line_out += "\t" + str(start_position)
                            line_out += "\t" + str(end_position)
                            line_out += "\t" + out_name
                            line_out += "\t" + str(int(score * 1000))
                            line_out += "\t" + motif.strand
                            line_out += "\t" + str(start_position)           # ThickStart
                            line_out += "\t" + str(end_position)            # ThickEnd
                            line_out += "\t" + item_rgb        # itemRGB
                            #line_out += "\t" + "0"            # BlockCount
                            #line_out += "\t" + "0"            # BlockSizes
                            #line_out += "\t" + "0"            # BlockStarts
                            
                            # Build a key that represent the motif chrom,  name and positions
                            line_key = chromosom + ":" + str(start_position) + ":" + str(end_position) + ":" + out_name
                            
                            # If the new line has the same key has the previous one, we must keep only one of the two lines
                            # i.e. the one with the highest score (the tell() and seek() method permits to overwrite the old line
                            # line if required.
                            # If the new line and the previous one has different keys the new line is simply written
                            if previous_line_key != line_key:
                                previous_line_start = bed_file.tell()
                                bed_file.write(line_out)
                                bed_file.write("\n")
                                bed_file.flush
                                previous_line_key = line_key
                                previous_score = score
                            else:
                                if score > previous_score:
                                    bed_file.seek(previous_line_start)
                                    bed_file.write(line_out)
                                    bed_file.write("\n")
                                    bed_file.flush
                                    previous_score = score     

            bed_file.close()
            input_commstruct.paramStatistics[ BedSeqAlignmentStatsCommStruct.BED_OUTPUT_PATH] = bed_file_path
            
            # Sort bed_file (used for bigBed conversion)
            sorted_bed_file_path = os.path.join(out_path, self.component.pipelineName + "_Motifs_sorted.bed")
            cmd = "sort -k1,1 -k2,2n"
            cmd += " " + bed_file_path
            cmd += " > " + sorted_bed_file_path
            
            Log.info( "BEDOuputProcessor.execute : Sorting BED file")
            Log.info( "BEDOuputProcessor.execute  : command used is : " + cmd)
            
            cmd_result = commands.getstatusoutput( cmd)
            Log.trace( "BEDOuputProcessor.execute : " + threading.currentThread().getName() + " : status returned is :" + str( cmd_result[0]))
            if cmd_result[0] != 0:
                Log.log( "BEDOuputProcessor.execute : status returned is :" + str( cmd_result[0]) + " for command '" + cmd + "'" )
                Log.log( "BEDOuputProcessor.execute : command output is = \n" + str( cmd_result[1]))
                return input_commstruct
                        
            # Fetch the chrom sizes that will be use to convert BED file to bigBed file
            chrom_sizes_path = os.path.join(out_path, self.component.pipelineName + "_chrom_size.txt")
            
            RSAT_PATH = self.component.getParameter( Constants.RSAT_DIR_PARAM)
            cmd = os.path.join( RSAT_PATH , "contrib/peak-footprints/tools/fetchChromSizes")
            cmd += " " + input_commstruct.paramStatistics[ BedSeqAlignmentStatsCommStruct.REFERENCE_SPECIES]
            cmd += " > " + chrom_sizes_path
            
            Log.info( "BEDOuputProcessor.execute : Fetching Chrom sizes for species : " + input_commstruct.paramStatistics[ BedSeqAlignmentStatsCommStruct.REFERENCE_SPECIES])
            Log.info( "BEDOuputProcessor.execute  : command used is : " + cmd)
            
            cmd_result = commands.getstatusoutput( cmd)
            Log.trace( "BEDOuputProcessor.execute : " + threading.currentThread().getName() + " : status returned is :" + str( cmd_result[0]))
            if cmd_result[0] != 0:
                Log.log( "BEDOuputProcessor.execute : status returned is :" + str( cmd_result[0]) + " for command '" + cmd + "'" )
                Log.log( "BEDOuputProcessor.execute : command output is = \n" + str( cmd_result[1]))
                return input_commstruct
            
            # Build the bigBed file
            # sudo ln -s /lib/x86_64-linux-gnu/libssl.so.1.0.0 /usr/lib/libssl.so.10
            # sudo ln -s /lib/x86_64-linux-gnu/libcrypto.so.1.0.0 /usr/lib/libcrypto.so.10
            
            big_bed_path = os.path.join(out_path, self.component.pipelineName + "_Motifs.bb")
            
            RSAT_PATH = self.component.getParameter( Constants.RSAT_DIR_PARAM)
            cmd = os.path.join( RSAT_PATH , "contrib/peak-footprints/tools/bedToBigBed")
            cmd += " " + sorted_bed_file_path
            cmd += " " + chrom_sizes_path
            cmd += " " + big_bed_path
            
            Log.info( "BEDOuputProcessor.execute : Converting BED file to bigBed file")
            Log.info( "BEDOuputProcessor.execute  : command used is : " + cmd)
            
            cmd_result = commands.getstatusoutput( cmd)
            Log.trace( "BEDOuputProcessor.execute : " + threading.currentThread().getName() + " : status returned is :" + str( cmd_result[0]))
            if cmd_result[0] != 0:
                Log.log( "BEDOuputProcessor.execute : status returned is :" + str( cmd_result[0]) + " for command '" + cmd + "'" )
                Log.log( "BEDOuputProcessor.execute : command output is = \n" + str( cmd_result[1]))
                return input_commstruct
                        
            input_commstruct.paramStatistics[ BedSeqAlignmentStatsCommStruct.BIGBED_OUTPUT_PATH] = big_bed_path
            
        except IOError, io_exce:
            Log.log("BEDOutputProcessor.execute : Unable to save the BED file of recognized motifs : " + str(io_exce))