コード例 #1
0
    def orf_prodigal_train(self):
        """Runs PRODIGAL to find open reading frames using a training file from complete genomes references."""
        training_file = os.path.join(self.training_file)

        if os.path.exists(training_file):
            quality = " -t {} ".format(training_file)
            filename = os.path.basename(self.input_file)
            stdout = "2> /dev/null"
            cmd = "prodigal -q -m -a {trans_file} -i {input_file} -o  {output_file} -d {nuc_file} -s {potential_genes} {quality} {stdout}" \
               .format(
              trans_file=os.path.join(self.working_directory, "{}.temp.contig.fsa".format(filename)),
              input_file=self.input_file,
              output_file=os.path.join(self.working_directory, "{}.temp.draft".format(filename)),
              quality=quality,
              stdout=stdout,
              nuc_file= os.path.join(self.working_directory,  "{}.temp.contigToORF.fsa".format(filename)),
              potential_genes= os.path.join(self.working_directory,  "{}.temp.potentialGenes".format(filename))
             )
            # logger.debug(cmd)
            os.system(cmd)

            if self.clean == True:
                os.remove(
                    os.path.join(self.working_directory,
                                 "{}.temp.draft".format(filename)))
        else:
            logger.error("Missing training file: {} ".format(training_file))
            exit()
コード例 #2
0
    def worker(self, input_fasta):
        o_f_path, o_f_name = os.path.split(os.path.abspath(input_fasta))
        cmd = "prodigal -p meta -q -m -i {input_fasta} -d {wd}/{tmp_name}.temp.contigToORF.fsa \
		-a {wd}/{tmp_name}.temp.contig.fsa \
		-o {wd}/{tmp_name}.temp.draft \
		-s {wd}/{tmp_name}.temp.potentialGenes 2> /dev/null".format(
            input_fasta=input_fasta,
            wd=self.working_directory,
            tmp_name=o_f_name)
        # logger.debug(cmd)
        os.system(cmd)
コード例 #3
0
    def orf_prodigal(self):
        """Runs PRODIGAL to find open reading frames."""
        quality = "-n -p single"

        minimum_sequence_length, maximum_sequence_length, number_of_sequences = self.min_max_sequence_length(
        )
        logger.info(
            "minimum sequence length: {}, maximun sequence length {}, number of sequences: {}"
            .format(minimum_sequence_length, maximum_sequence_length,
                    number_of_sequences))

        if number_of_sequences > 1 and self.split_prodigal_jobs == True:
            # TODO validate if fasta file doesn't contain gaps
            self.orf_prodigal_multi()
        else:
            if self.low_quality == True or minimum_sequence_length < 20000:
                quality = "-p meta"

            filename = os.path.basename(self.input_file)

            stdout = "2> /dev/null"

            cmd = "prodigal -q -m -a {trans_file} -i {input_file} -o  {output_file} -d {nuc_file} -s {potential_genes} {quality} {stdout}" \
            .format(
              trans_file=os.path.join(self.working_directory, "{}.temp.contig.fsa".format(filename)),
              input_file=self.input_file,
              output_file=os.path.join(self.working_directory, "{}.temp.draft".format(filename)),
              quality=quality,
              stdout=stdout,
              nuc_file= os.path.join(self.working_directory,  "{}.temp.contigToORF.fsa".format(filename)),
              potential_genes= os.path.join(self.working_directory,  "{}.temp.potentialGenes".format(filename))
             )

            # logger.debug(cmd)
            os.system(cmd)

            # format the contig file headers to remove space
            #format_fasta_headers(working_directory+"/"+filename+".contig.fsa")

            if self.clean == True:
                os.remove(
                    os.path.join(self.working_directory,
                                 "{}.temp.draft".format(filename)))
コード例 #4
0
    def orf_prodigal(self):
        """Runs PRODIGAL to find open reading frames."""
        quality = "-n -p single"

        _min, _max = self.min_max_sequence_length()
        logger.info(
            "minimum sequence length: {}, maximun sequence length {}".format(
                _min, _max))

        if self.low_quality == True or _min < 20000:
            quality = "-p meta"

        filename = os.path.basename(self.input_file)

        stdout = "2> /dev/null"

        cmd = "prodigal -q -m -a {trans_file} -i {input_file} -o  {output_file} -d {nuc_file} -s {potential_genes} {quality} {stdout}" \
           .format(
          trans_file=os.path.join(self.working_directory, "{}.temp.contig.fsa".format(filename)),
          input_file=self.input_file,
          output_file=os.path.join(self.working_directory, "{}.temp.draft".format(filename)),
          quality=quality,
          stdout=stdout,
          nuc_file= os.path.join(self.working_directory,  "{}.temp.contigToORF.fsa".format(filename)),
          potential_genes= os.path.join(self.working_directory,  "{}.temp.potentialGenes".format(filename))
         )

        logger.info(cmd)
        os.system(cmd)

        # format the contig file headers to remove space
        #format_fasta_headers(working_directory+"/"+filename+".contig.fsa")

        if self.clean == True:
            os.remove(
                os.path.join(self.working_directory,
                             "{}.temp.draft".format(filename)))