Ejemplo n.º 1
0
    def __init__(self, input_file, output_dir, index_name, threads):
        which = Cmd("which salmon")
        which.run()

        self.input_file = input_file
        self.output_dir = output_dir
        self.index_name = index_name
        self.threads = threads
Ejemplo n.º 2
0
 def build_index(self) -> None:
     """Builds the salmon index."""
     logger.debug("Build salmon index.")
     # TODO: Implement check to avoid duplicate runs
     indexing = Cmd(
         f"salmon index -p {self.threads} -t {self.input_file} -i {self.index_name} --keepDuplicates"
     )
     indexing.run()
Ejemplo n.º 3
0
    def __init__(self, input_file, index_name, threads):
        which = Cmd("which hisat2")
        which.run()

        # Check if indexing already run
        self.index_build_has_run = (True if len(
            glob.glob(f"{index_name}.*.ht2")) == 8 else False)

        self.input_file = input_file
        self.index_name = index_name
        self.threads = threads
Ejemplo n.º 4
0
 def build_index(self):
     """ Build the Hisat2 index. """
     if not self.index_build_has_run:
         logger.debug("Build Hisat2 index.")
         indexing = Cmd(
             f"hisat2-build -q -p {self.threads} {self.input_file} {self.index_name}"
         )
         indexing.run()
         self.index_build_has_run = True
     else:
         logger.debug("Skipping index building.")
Ejemplo n.º 5
0
 def update_database(self, database_dir, busco_group):
     """
     Updates the dammit database.
     """
     logger.info("Update dammit database.")
     self.database_dir = database_dir
     self.busco_group = busco_group
     database = Cmd(
         f"dammit databases --install --n_threads {self.threads} --database-dir {self.database_dir} --busco-group {self.busco_group}"
     )
     database.run()
Ejemplo n.º 6
0
 def run(self, reads):
     """ Run the Hisat2 mapping with the given reads. """
     logger.debug("Perform Hisat2 mapping.")
     if len(reads) == 1:  # single end reads
         hisat = Cmd(
             f"hisat2 -q --threads {self.threads} -k 1 -x {self.index_name} -U {reads[0]} --no-unal | \
                     samtools view --threads {self.threads} -hS -F 4 -q 1 -O SAM"
         )
     elif len(reads) == 2:  # paired end reads
         hisat = Cmd(
             f"hisat2 -q --threads {self.threads} -k 1 -x {self.index_name} -1 {reads[0]} -2 {reads[1]} --no-unal | \
                         samtools view --threads {self.threads} -hS -F 4 -q 1 -O SAM"
         )
     hisat.run()
     self.mapping_has_run = True
     return (entry for entry in hisat.stdout.split("\n")[:-1]
             if not entry.startswith("@"))
Ejemplo n.º 7
0
 def run(self, graph_file, output_file):
     """
     MCL: The input is then a file or stream in which each line encodes an
     edge in terms of two labels (the 'A' and the 'B') and a numerical value
      (the 'C'),  all separated by white space.
     A B 20
     A C 10
     The output is then a file where each line is a cluster of tab-separated
     labels.
     """
     logger.debug("MCL clustering...")
     if os.path.exists(output_file):
         os.remove(output_file)
     mcl = Cmd(
         f"mcl {graph_file} -I {self.inflation} --abc -o {output_file} -te {self.threads} -resource 4 -V all"
     )
     mcl.run()
Ejemplo n.º 8
0
    def run_pipe(self, graph_file):
        """
        Runs the MCL command, but uses stdin as input and stdout as output. Is a
         lot faster than writing and reading a lot of files.
         MCL: The input is then a file or stream in which each line encodes an
         edge in terms of two labels (the 'A' and the 'B') and a numerical value
          (the 'C'), all separated by white space.
        A B 20
        A C 10
        The output is then a file where each line is a cluster of tab-separated
        labels.
        """
        logger.debug("MCL clustering...")

        mcl = Cmd(
            f"mcl - -I {self.inflation} --abc -o - -te {self.threads} -resource 4 -V all"
        )
        mcl.run(in_stream=graph_file)
        return mcl.stdout
Ejemplo n.º 9
0
    def run(self, reads: list) -> None:
        """Run the salmon mapping with the given reads.

        Args:
            reads: List of reads. Either paired end or single end.

        """
        logger.debug("Perform salmon mapping.")
        if not os.path.exists(f"{self.output_dir}/aux_info/eq_classes.txt"):
            if len(reads) == 1:  # single end reads
                salmon = Cmd(
                    f"salmon quant --libType A --validateMappings --dumpEq -p {self.threads} -i {self.index_name} --unmatedReads {reads[0]} -o {self.output_dir}"
                )
            elif len(reads) == 2:  # paired end reads
                salmon = Cmd(
                    f"salmon quant --libType A --validateMappings --dumpEq -p {self.threads} -i {self.index_name} -1 {reads[0]} -2 {reads[1]} -o {self.output_dir}"
                )
            salmon.run()
        else:
            logger.info("Skipping mapping.")
Ejemplo n.º 10
0
    def run(self):
        """
        Executes the dammit annotation for the original and reduced fasta file.
        """
        logger.info("Run dammit annotation.")
        for name, transcriptome in self.transcriptomes.items():

            output_dir = f"{self.output_dir}/{name}"
            annotation_file = (
                f"{output_dir}/{os.path.basename(transcriptome)}.dammit.gff3")
            self.gff_files[name] = annotation_file
            namemap_file = (
                f"{output_dir}/{os.path.basename(transcriptome)}.dammit.namemap.csv"
            )
            self.namemaps[name] = namemap_file
            if not (os.path.exists(annotation_file)
                    and os.path.exists(namemap_file)):
                dammit = Cmd(
                    f"dammit annotate {transcriptome} -o {output_dir} --database-dir {self.database_dir} --busco-group {self.busco_group} --n_threads {self.threads}"
                )
                dammit.run()
Ejemplo n.º 11
0
 def __init__(self, threads, inflation):
     self.threads = threads
     self.inflation = inflation
     mcl = Cmd("which mcl")
     mcl.run()
"""
Benjamin Weeks
CS472 Project 2
November, 1st, 2015

Command Line Interface
"""
from cmd import Cmd

cmd = Cmd()
cmd.run()