Beispiel #1
0
 def getAlignment( align_node, bedseq, comm_struct):
     
     name = CommStruct.getAttribute( align_node, BedSeqAlignmentStatsCommStruct.ALIGNMENT_NAME_ATT)
     seq_align = SequenceAlignment()
     seq_align.name = name
     seq_align.referenceSpecies = bedseq.species
     comm_struct.addSequenceAlignment( bedseq, seq_align)
     
     return seq_align
Beispiel #2
0
    def generateRandomMSA(self, msa_length, bedseq_number, max_length,
                          output_commstruct):

        # Retrieve method required parameters
        RSAT_PATH = self.component.getParameter(Constants.RSAT_DIR_PARAM)
        dir_path = os.path.join(self.component.outputDir,
                                self.component.getComponentPrefix())
        file_path = os.path.join(dir_path, "random_sequences.txt")

        try:
            # Execute the RSAT random-seq command
            cmd = os.path.join(RSAT_PATH, "perl-scripts/random-seq")
            cmd += " -l " + str(int(max_length * 1.5))
            cmd += " -n " + str(bedseq_number)
            cmd += " -a a:t 0.3 c:g 0.2"
            cmd += " -type DNA"
            cmd += " -format multi"
            cmd += " -o " + file_path

            Log.info(
                "GenerateMSAProcessor.generateMSA : starting random sequence generation. Command used is : "
                + cmd)

            # Execute the command
            cmd_result = commands.getstatusoutput(cmd)
            if cmd_result[0] != 0:
                Log.log(
                    "GenerateMSAProcessor.generateMSA : status returned is :" +
                    str(cmd_result[0]) + " for command '" + cmd + "'")
                Log.log(
                    "GenerateMSAProcessor.generateMSA : command output is = \n"
                    + str(cmd_result[1]))
                raise ExecutionException(
                    "GenerateMSAProcessor.generateMSA : Cannot execute random-seq commands. See logs for more details"
                )

            # Read the output file to get the random sequences
            sequence_list = []
            sequence_file = open(file_path, "r")
            for line in sequence_file:
                sequence_list.append(line.split()[0])

            # Generate  the species list
            species_list = []
            species_list.append(output_commstruct.baseSpecies)
            for index in range(msa_length - 1):
                species_list.append("Species" + str(index + 1))

            # Create and fill the MSA for each BED sequence
            count_seq = 0
            for chrom in output_commstruct.bedSequencesDict.keys():
                for bedseq in output_commstruct.bedSequencesDict[chrom]:
                    msa = SequenceAlignment()
                    msa.name = bedseq.name + "_1"
                    msa.referenceSpecies = output_commstruct.baseSpecies
                    seq_length = bedseq.indexEnd - bedseq.indexStart
                    sequence = list(sequence_list[count_seq][:seq_length])
                    for index in range(msa_length):
                        msa.addSequence(species_list[index], sequence)
                        #msa.addSequence( species_list[index], list(['.'] * len( sequence)))
                    msa.finalizeSequences()
                    output_commstruct.addSequenceAlignment(bedseq, msa)
                    count_seq += 1

        except IOError, io_exce:
            raise ExecutionException(
                "GenerateMSAProcessor.generateMSA : Unable to save/read random sequences file. From:\n\t---> "
                + str(io_exce))