Exemple #1
0
    def parseClustalWResult(self, file_path, desired_species_list):

        try:
            result = {}
            length = 0
            file = open(file_path, "r")
            for line in file:
                tokens = line.split()
                if tokens != None and len(tokens) == 2:
                    species = tokens[0]
                    if desired_species_list == None or len(
                            desired_species_list
                    ) == 0 or species in desired_species_list:
                        if not species in result.keys():
                            result[species] = []
                        result[species].extend(tuple(tokens[1]))
                        length = len(result[species])

            result = self.removeFirstAndLastNoInfoColumns(result, length)

            alignment = SequenceAlignment()
            for species in result:
                alignment.addSequence(species, result[species])
            file.close()
            return alignment

        except IOError, io_exce:
            raise ExecutionException(
                "MSAProcessor.parseClustalWResult : Unable to open the ClustalW result file : '"
                + file_path + "'. From:\n\t---> " + str(io_exce))
Exemple #2
0
    def generateTrivialMSA(self, msa_length, bedseq_number, output_commstruct):

        # Generate  the species list
        species_list = []
        species_list.append(output_commstruct.baseSpecies)
        for index in range(msa_length - 1):
            species_list.append("Species" + str(index + 1))

        # Create and fill the MSA for each BED sequence
        for chrom in output_commstruct.bedSequencesDict.keys():
            for bedseq in output_commstruct.bedSequencesDict[chrom]:
                msa = SequenceAlignment()
                msa.name = bedseq.name + "_1"
                seq_length = bedseq.indexEnd - bedseq.indexStart
                sequence = list(['.'] * seq_length)
                for index in range(msa_length):
                    msa.addSequence(species_list[index], sequence)
                msa.finalizeSequences()
                output_commstruct.addSequenceAlignment(bedseq, msa)
Exemple #3
0
    def generateRandomMSA(self, msa_length, bedseq_number, max_length,
                          output_commstruct):

        # Retrieve method required parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        file_path = os.path.join(dir_path, "random_sequences.txt")

        try:
            # Execute the RSAT random-seq command
            cmd = os.path.join(RSAT_PATH, "perl-scripts/random-seq")
            cmd += " -l " + str(int(max_length * 1.5))
            cmd += " -n " + str(bedseq_number)
            cmd += " -a a:t 0.3 c:g 0.2"
            cmd += " -type DNA"
            cmd += " -format multi"
            cmd += " -o " + file_path

            Log.info(
                "GenerateMSAProcessor.generateMSA : starting random sequence generation. Command used is : "
                + cmd)

            # Execute the command
            cmd_result = commands.getstatusoutput(cmd)
            if cmd_result[0] != 0:
                Log.log(
                    "GenerateMSAProcessor.generateMSA : status returned is :" +
                    str(cmd_result[0]) + " for command '" + cmd + "'")
                Log.log(
                    "GenerateMSAProcessor.generateMSA : command output is = \n"
                    + str(cmd_result[1]))
                raise ExecutionException(
                    "GenerateMSAProcessor.generateMSA : Cannot execute random-seq commands. See logs for more details"
                )

            # Read the output file to get the random sequences
            sequence_list = []
            sequence_file = open(file_path, "r")
            for line in sequence_file:
                sequence_list.append(line.split()[0])

            # Generate  the species list
            species_list = []
            species_list.append(output_commstruct.baseSpecies)
            for index in range(msa_length - 1):
                species_list.append("Species" + str(index + 1))

            # Create and fill the MSA for each BED sequence
            count_seq = 0
            for chrom in output_commstruct.bedSequencesDict.keys():
                for bedseq in output_commstruct.bedSequencesDict[chrom]:
                    msa = SequenceAlignment()
                    msa.name = bedseq.name + "_1"
                    msa.referenceSpecies = output_commstruct.baseSpecies
                    seq_length = bedseq.indexEnd - bedseq.indexStart
                    sequence = list(sequence_list[count_seq][:seq_length])
                    for index in range(msa_length):
                        msa.addSequence(species_list[index], sequence)
                        #msa.addSequence( species_list[index], list(['.'] * len( sequence)))
                    msa.finalizeSequences()
                    output_commstruct.addSequenceAlignment(bedseq, msa)
                    count_seq += 1

        except IOError, io_exce:
            raise ExecutionException(
                "GenerateMSAProcessor.generateMSA : Unable to save/read random sequences file. From:\n\t---> "
                + str(io_exce))