Ejemplo n.º 1
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")
        #sample_list = self.sample_data["samples"]
        for sample in sample_list:  # Getting list of samples out of samples_hash
            if 'use_fasta' in list(self.params.keys()):
                if "fasta.nucl" not in list(self.sample_data[sample].keys()):
                    raise AssertionExcept("No Nucleotide FASTA in: \n", sample)
            else:
                if len({"fastq.F", "fastq.R"}
                       & set(self.sample_data[sample].keys())) == 1:
                    raise AssertionExcept(
                        "Sample has only forward or reverse reads. It must have either pairs or single reads\n",
                        sample)

                if len({"fastq.F", "fastq.R", "fastq.S"}
                       & set(self.sample_data[sample].keys())) == 3:
                    raise AssertionExcept(
                        "Kaiju is not defined for mixed paired and single reads\n",
                        sample)
Ejemplo n.º 2
0
    def step_specific_init(self):
        """ Called on intiation
            Good place for parameter testing.
            Wrong place for sample data testing
        """
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        self.file_tag = ".kaiju.out"

        if "-t" not in self.params["redir_params"]:
            raise AssertionExcept(
                "Please supply Name of nodes.dmp file via '-t' argument (in redirects)"
            )
        if "-n" not in self.params["redir_params"]:
            raise AssertionExcept(
                "Please supply Name of names.dmp file via '-n' argument (in redirects)"
            )

        if "-r" in self.params["redir_params"]:
            if isinstance(self.params["redir_params"]["-r"], list):
                self.levels = self.params["redir_params"]["-r"]
            elif isinstance(self.params["redir_params"]["-r"], str):
                self.levels = re.split("\s*,\s*",
                                       self.params["redir_params"]["-r"])
            else:
                raise AssertionExcept(
                    "Unknown format of '-r' redirects. Must be either string or list"
                )
            self.params["redir_params"].pop("-r")
        else:
            self.levels = [
                "phylum", "class", "order", "family", "genus", "species"
            ]

        if "scope" not in self.params:
            self.params["scope"] = "sample"
Ejemplo n.º 3
0
    def step_specific_init(self):
        self.shell = "bash"  # Can be set to "bash" by inheriting instances

        # Read YAML of plugin arguments
        with open(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "qiime2_arguments_index.yml"), "r") as fileh:
            filelines = fileh.readlines()

        self.qiime_args = yaml.load("".join(filelines), Loader=yaml.Loader)

        # extract qiime path, plugin name and method/pipeline/visualization from script_path
        self.qiime_path = self.params["script_path"].split(" ")[0]
        self.plugin = self.params["script_path"].split(" ")[1]
        self.method = self.params["script_path"].split(" ")[2]

        # Check plugin and method are recognized
        if self.plugin not in self.qiime_args:
            raise AssertionExcept(
                "Plugin '{plugin}' is not one of: {plugins}!".format(
                    plugin=self.plugin,
                    plugins=", ".join(list(self.qiime_args.keys()))))
        if self.method not in self.qiime_args[self.plugin]:
            raise AssertionExcept(
                "Plugin '{method}' is not one of: {methods}!".format(
                    method=self.method,
                    methods=", ".join(list(
                        self.qiime_args[self.plugin].keys()))))
        # Get argument index for method
        self.method_index = self.qiime_args[self.plugin][self.method]
Ejemplo n.º 4
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """


        sample_has_nucl = project_has_nucl = False
        if "scope" not in self.params:
            # If all samples have fasta.nucl:
            if all(["fasta.nucl" in self.sample_data[x] for x in self.sample_data["samples"]]):
                sample_has_nucl = True
            if "fasta.nucl" in self.sample_data:
                project_has_nucl = True
            if sample_has_nucl and project_has_nucl:
                raise AssertionExcept("Both sample and project fasta exists. You must specify 'scope'")
            elif sample_has_nucl:
                self.params["scope"] = "sample"
            elif project_has_nucl:
                self.params["scope"] = "project"
            else:
                raise AssertionExcept("No fasta exists in either samples or project!")
            
            
        if self.params["scope"] == "sample":
            # Assert that all samples have nucleotide fasta files:
            for sample in self.sample_data["samples"]:    
                try:
                    self.sample_data[sample]["fasta.nucl"]
                except KeyError:
                    raise AssertionExcept("Sample does not have a fasta file\n", sample)
        elif self.params["scope"] == "project":
            try:
                self.sample_data["project_data"]["fasta.nucl"]
            except KeyError:
                raise AssertionExcept("Project does not have a fasta file\n")
Ejemplo n.º 5
0
    def step_specific_init(self):
        """ Called on intiation
            Good place for parameter testing.
            Wrong place for sample data testing
        """
        self.shell = "bash"      # Can be set to "bash" by inheriting instances
        self.file_tag = ".msh"

        if self.params["scope"] not in ["sample","project"]:
            raise AssertionExcept("'scope' must be either 'sample' or 'project'")

        if "src_scope" in self.params:
            if self.params["src_scope"] not in ["sample", "project"]:
                raise AssertionExcept("'scope' must be either 'sample' or 'project'")
            if self.params["src_scope"] == "project" and self.params["scope"] == "sample":
                raise AssertionExcept("Project 'src_scope' not defined for 'scope' sample.")

        else:
            self.params["src_scope"] = self.params["scope"]

        if "type" not in self.params:
            self.params["type"] = ["fastq","fasta"]
        else:
            if isinstance(self.params["type"], str):
                self.params["type"] = [self.params["type"]]
Ejemplo n.º 6
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """
        if "type2del" not in self.params:
            raise AssertionExcept("You must pass a 'type2del' param!")

        type2del = self.params["type2del"]

        if "scope" not in self.params:
            raise AssertionExcept("You must pass a 'scope' param!")
        if self.params["scope"] == "sample":
            for sample in self.sample_data["samples"]:
                if type2del not in self.sample_data[sample]:
                    raise AssertionExcept(
                        "type %s does not exist for sample." % type2del,
                        sample)
                else:
                    del self.sample_data[sample][type2del]
        elif self.params["scope"] == "project":
            if type2del not in self.sample_data:
                raise AssertionExcept("type %s does not exist for project." %
                                      type2del)
            else:
                del self.sample_data[type2del]
        else:
            raise AssertionExcept(
                "'scope' param must be 'sample' or 'project'")
Ejemplo n.º 7
0
    def step_specific_init(self):
        """ Called on intiation
            Good place for parameter testing.
            Wrong place for sample data testing
        """
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        self.file_tag = ".kraken.out"

        # Checking this once and then applying to each sample:
        if "--db" not in list(self.params["redir_params"].keys()):
            raise AssertionExcept("--db not set.\n")

        if "scope" in self.params:
            if self.params["scope"] == "project":
                pass
            elif self.params["scope"] == "sample":
                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash
                    pass
            else:
                raise AssertionExcept(
                    "'scope' must be either 'sample' or 'project'")
        else:
            self.write_warning("No 'scope' specified. Using 'sample' scope")
            self.params["scope"] = "sample"

        # For backwards comaptibility:
        if "ktImportTaxonomy_path" in list(self.params):
            self.params["ktImportTaxonomy"] = dict()
            self.params["ktImportTaxonomy"]["path"] = self.params[
                "ktImportTaxonomy_path"]
            self.params["ktImportTaxonomy"]["redirects"] = ""
Ejemplo n.º 8
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if "scope" not in list(self.params.keys()):
            # Try guessing scope:
            try:  # Does a nucl fasta exist for project?
                self.sample_data["project_data"]["fasta.nucl"]
            except KeyError:
                self.params["scope"] = "sample"
            else:
                self.params["scope"] = "project"
        else:
            # Check scope is legitimate
            if not self.params["scope"] in ["project", "sample"]:
                raise AssertionExcept(
                    "Scope must be either 'sample' or 'project'\n")

        if self.params["scope"] == "sample":
            for sample in self.sample_data[
                    "samples"]:  # Getting list of samples out of samples_hash
                try:
                    self.sample_data[sample]["fasta.nucl"]
                except KeyError:
                    raise AssertionExcept(
                        "Sample does not have a nucl fasta defined. Can't build index\n",
                        sample)
Ejemplo n.º 9
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if not "scope" in self.params:
            raise AssertionExcept("No 'scope' specified.")

        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")

        # Creating holder for output:
        for sample in sample_list:  # Getting list of samples out of samples_hash
            # Make sure a file exists in the sample equivalent to dbtype:
            try:
                # In version 1.0.2, nucl and prot slots have been renamed to fasta.nucl and fasta.prot
                self.sample_data[sample]["fasta." + self.dbtype]
            except KeyError:
                raise AssertionExcept(
                    "No file exists in sample for specified -dbtype (%s)\n" %
                    self.dbtype, sample)
Ejemplo n.º 10
0
 def step_sample_initiation(self):
     """ A place to do initiation stages following setting of sample_data
     """
     if "reference" not in self.params:
         if "scope" not in self.params:
             raise AssertionExcept(
                 "Please supply a scope parameter: either 'sample' or 'project'!"
             )
         elif self.params["scope"] == "sample":
             for sample in self.sample_data["samples"]:
                 if "fasta.nucl" not in self.sample_data[sample]:
                     raise AssertionExcept(
                         "No fasta.nucl defined for sample", sample)
         elif self.params["scope"] == "project":
             if "fasta.nucl" not in self.sample_data["project_data"]:
                 raise AssertionExcept("No fasta.nucl defined for project")
         else:
             raise AssertionExcept(
                 "Please supply a scope parameter: either 'sample' or 'project'!"
             )
     else:
         if "scope" not in self.params:
             self.params["scope"] = "project"
         elif self.params["scope"] == "sample":
             self.write_warning(
                 "It makes no sense to define a sample-scope external reference!"
             )
         elif self.params["scope"] == "project":
             pass
         else:
             self.params["scope"] = "project"
Ejemplo n.º 11
0
    def step_specific_init(self):
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        self.file_tag = "BUSCO"

        if "scope" not in self.params:
            raise AssertionExcept(
                "Please specify a 'scope': Either 'sample' or 'project'.")

        for redir2remove in ["-i", "--in", "-o", "--out", "-t", "--tmp"]:
            if redir2remove in self.params["redir_params"]:
                del self.params["redir_params"][redir2remove]
                self.write_warning(
                    "You are not supposed to specify %s in redirects. We set it automatically"
                    % redir2remove)

        # Transfering redirected "-m" into "--mode"
        if "-m" in self.params["redir_params"]:
            self.params["redir_params"]["--mode"] = self.params[
                "redir_params"]["-m"]
            del self.params["redir_params"]["-m"]
        # Checking --mode is legitimate (is udes to choose fasta.prot or fasta.nucl
        if "--mode" not in self.params[
                "redir_params"] and "-m" not in self.params["redir_params"]:
            raise AssertionExcept("""
You must specify a 'mode': 
- geno or genome, for genome assemblies (DNA)
- tran or transcriptome, for transcriptome assemblies (DNA)
- prot or proteins, for annotated gene sets (protein)\n\n""")
Ejemplo n.º 12
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept("'scope' must be either 'sample' or 'project'")

        # if self.params["scope"] == "project":
        #     # Initializing project bowtie2 slot
        #     try:
        #         self.sample_data["project_data"]["fasta.nucl"]
        #     except KeyError:
        #         raise AssertionExcept("Project does not have a nucl fasta defined. Check your 'scope'\n", sample)
        #     # else:
        #         # if "bowtie2.index" in self.sample_data.keys():
        #             # raise AssertionExcept("bowtie2 index already seems to exist.\n")
            
                

        # elif self.params["scope"] == "sample":
        for sample in sample_list:      # Getting list of samples out of samples_hash
            try:
                self.sample_data[sample]["fasta.nucl"]
            except KeyError:
                raise AssertionExcept("No 'fasta.nucl' defined. Can't build index\n", sample)
            else:
                if "bowtie2.index" in list(self.sample_data[sample].keys()):
                    raise AssertionExcept("bowtie2 index already exists for sample.\n", sample)
Ejemplo n.º 13
0
    def set_bed(self, action_numbered, sample):

        if self.params[action_numbered] and \
                isinstance(self.params[action_numbered],dict) and \
                "bed" in self.params[action_numbered]:
            # If 1. params exist, 2. it is a dictionary and 3. it has a 'bed' defined
            if self.params[action_numbered]["bed"] == "sample":
                if "bed" in self.sample_data[sample]:
                    bed = self.sample_data[sample]["bed"]
                else:
                    raise AssertionExcept(
                        "No 'bed' defined for sample in '{action}'".format(
                            action=action_numbered), sample)
            elif self.params[action_numbered]["bed"] == "project":
                if "bed" in self.sample_data["project_data"]:
                    bed = self.sample_data["project_data"]["bed"]
                else:
                    raise AssertionExcept(
                        "No 'bed' defined for project in '{action}'".format(
                            action=action_numbered))
            elif not self.params[action_numbered]["bed"]:
                raise AssertionExcept(
                    "Value for 'bed' in {action} must be 'sample', 'project' or a path"
                    .format(action=action_numbered))
            else:
                bed = self.params[action_numbered]["bed"]
        else:
            bed = ""
        return bed
Ejemplo n.º 14
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        if self.params["redir_params"]["--mode"] in [
                'geno', 'genome', 'tran', 'transcriptome'
        ]:
            self.type = "nucl"
        elif self.params["redir_params"]["--mode"] in ['prot', 'proteins']:
            self.type = "prot"
        else:
            raise AssertionExcept(
                "The value you passed to --mode ({mode}) is not a valid value".
                format(mode=self.params["redir_params"]["--mode"]))

        if self.params["scope"] == "sample":
            # Check that "fasta" and "assembly" exist (signs that trinity has been executed)
            for sample in self.sample_data["samples"]:
                if ("fasta.%s" % self.type) not in self.sample_data[sample]:
                    raise AssertionExcept(
                        "It seems there is no sample-wide %s fasta file." %
                        self.type, sample)
        elif self.params["scope"] == "project":
            if ("fasta.%s" %
                    self.type) not in self.sample_data["project_data"]:
                raise AssertionExcept(
                    "It seems there is no project-wide %s fasta file." %
                    self.type)
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'.")
Ejemplo n.º 15
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if self.params["scope"] == "project":
            # Initializing project bwa slot
            try:
                self.sample_data["project_data"]["fasta.nucl"]
            except KeyError:
                raise AssertionExcept(
                    "Project does not have a nucl fasta defined. Check your 'scope'\n",
                    sample)
            else:
                if "bwa_index" in list(self.sample_data.keys()):
                    raise AssertionExcept(
                        "bwa index already seems to exist.\n")

        elif self.params["scope"] == "sample":
            for sample in self.sample_data[
                    "samples"]:  # Getting list of samples out of samples_hash
                try:
                    self.sample_data[sample]["fasta.nucl"]
                except KeyError:
                    raise AssertionExcept(
                        "Sample does not have a nucl fasta defined. Can't build index\n",
                        sample)
                else:
                    if "bwa_index" in list(self.sample_data[sample].keys()):
                        raise AssertionExcept(
                            "bwa index already exists for sample.\n", sample)

        else:
            raise AssertionExcept("Scope must be either 'sample' or 'project'")
 def step_sample_initiation(self):
     """ A place to do initiation stages following setting of sample_data
     """
     if 'fasta.nucl' not in list(self.sample_data["project_data"].keys()):
         raise AssertionExcept("No Project level FASTA Nucleotide File was Found")
     if 'isoform.raw_counts' not in list(self.sample_data["project_data"].keys()):
         raise AssertionExcept("No Project level Isoform Raw Counts File was Found")
Ejemplo n.º 17
0
    def step_specific_init(self):
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        # self.file_tag = "Bowtie_mapper"

        # Check if you can split the script_path (by space) into path + mod:
        try:
            # try splitting script_path by space and extracting the mod from the snd element:
            mod = re.split("\s+", self.params["script_path"])[1]
        except IndexError:
            mod = None

        # Reduce script_path to path only. The mod is treated separately.
        self.params["script_path"] = re.split("\s+",
                                              self.params["script_path"])[0]

        # Make sure mod is defined only once, and if passed through script_path, add to params.
        if "mod" in self.params:
            if (mod):
                raise AssertionExcept(
                    "You supplied mod as parameter as well as in script path.")
        else:
            if (mod):
                self.params["mod"] = mod
            else:
                raise AssertionExcept(
                    "You must supply a 'mod' parameter or add the mod to the end of the script path.\n\te.g. /path/to/bwa mem"
                )
Ejemplo n.º 18
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        # Assert that all samples have reads files:
        for sample in self.sample_data["samples"]:
            if not {"fastq.F", "fastq.R", "fastq.S"} & set(
                    self.sample_data[sample].keys()):
                raise AssertionExcept("No read files\n", sample)

        if "scope" in self.params:

            if self.params["scope"] == "project":
                pass

            elif self.params["scope"] == "sample":

                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash
                    pass
            else:
                raise AssertionExcept(
                    "'scope' must be either 'sample' or 'project'")
        else:
            raise AssertionExcept("No 'scope' specified.")

        ##########################

        pass
Ejemplo n.º 19
0
    def step_specific_init(self):
        """ Called on intiation
            Good place for parameter testing.
            Wrong place for sample data testing
        """
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        self.file_tag = ".blast.parsed"

        if "blast_merge_path" in self.params:
            raise AssertionExcept(
                "Please convert 'blast_merge_path' into the new path/redirects format!"
            )

        if "blast_merge" in self.params:
            try:
                # Testing existence of "path" and, if empty, extracting from main script_path
                if self.params["blast_merge"]["path"] is None:
                    self.params["blast_merge"]["path"] = re.sub(
                        pattern="parse_blast",
                        repl="compare_blast_parsed_reports",
                        string=self.params["script_path"])
                # Testing existence and stringifying redirects
                if not isinstance(self.params["blast_merge"]["redirects"],
                                  str):
                    self.params["blast_merge"]["redirects"] = " \\\n\t".join([
                        key + " " + (val if val else "") for key, val in
                        self.params["blast_merge"]["redirects"].items()
                    ])
                self.params["blast_merge"][
                    "redirects"] = "\n\t{redirs} \\".format(
                        redirs=self.params["blast_merge"]["redirects"])
            except KeyError:
                raise AssertionExcept(
                    "Please add path and redirects to `blast_merge` block")
Ejemplo n.º 20
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")

        for sample in sample_list:
            if "fasta.nucl" not in self.sample_data[sample]:
                raise AssertionExcept("No 'fasta.nucl' defined!", sample)

            if "Predict" in self.params or re.search(
                    "Predict", self.params["script_path"]):
                # Adding directory from previous run:
                if "transdecoder.dir" not in self.sample_data[sample]:
                    raise AssertionExcept(
                        "Please include a 'LongOrf' TransDecoder step before the 'Predict' step.\n"
                        "You can include 'blatsp' and 'hmmscan' steps in between to make it effective"
                    )
Ejemplo n.º 21
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if "scope" in self.params:

            if self.params["scope"] == "project":
                if not "blast.nucl" in self.sample_data[
                        "project_data"] and not "blast.prot" in self.sample_data[
                            "project_data"]:
                    raise AssertionExcept(
                        "There are no project BLAST results.\n")
            elif self.params["scope"] == "sample":
                # Checking all samples have a 'blast' file-type in sample_data
                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash
                    if not "blast.nucl" in self.sample_data[
                            sample] and not "blast.prot" in self.sample_data[
                                sample]:
                        raise AssertionExcept("There are no BLAST results.\n",
                                              sample)
            else:
                raise AssertionExcept(
                    "'scope' must be either 'sample' or 'project'")
        else:
            raise AssertionExcept("No 'scope' specified.")
Ejemplo n.º 22
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")

        for sample in sample_list:
            if self.params["input"] == "vcf":
                try:
                    self.sample_data[sample]["vcf"]
                except KeyError:
                    raise AssertionExcept("No VCF variants file.", sample)
            elif self.params["input"] == "bcf":
                try:
                    self.sample_data[sample]["bcf"]
                except KeyError:
                    raise AssertionExcept("No BCF variants file.", sample)
            else:
                try:
                    self.sample_data[sample]["gzVCF"]
                except KeyError:
                    raise AssertionExcept(
                        "No 'gzVCF' (gzipped VCF) variants file.", sample)
Ejemplo n.º 23
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        if "scope" not in self.params:
            raise AssertionExcept("No 'scope' specified.")
        elif self.params["scope"] == "project":
            if "fasta.nucl" not in self.sample_data[
                    "project_data"] or "gtf" not in self.sample_data[
                        "project_data"]:
                raise AssertionExcept(
                    "Project does not have fasta.nucl and gtf files.")

        elif self.params["scope"] == "sample":

            for sample in self.sample_data[
                    "samples"]:  # Getting list of samples out of samples_hash
                if "fasta.nucl" not in self.sample_data[
                        sample] or "gtf" not in self.sample_data[sample]:
                    raise AssertionExcept(
                        "Sample does not have fasta.nucl and gtf files.",
                        sample)
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")
Ejemplo n.º 24
0
    def step_specific_init(self):
        self.shell = "bash"  # Can be set to "bash" by inheriting instances

        if "type" not in self.params:
            raise AssertionExcept(
                "Please specify the fasta type to use: type = nucl or prot")
        if self.params["type"] not in ["nucl", "prot"]:
            raise AssertionExcept("'type' must be 'nucl' or 'prot'")

        if "output_type" not in self.params:
            raise AssertionExcept("""
Please specify the output_type to use: 
* tblout     : save parseable table of per-sequence hits 
* domtblout  : save parseable table of per-domain hits 
* pfamtblout : save table of hits and domains in Pfam format 
""")
        if self.params["output_type"] not in [
                "tblout", "domtblout", "pfamtblout"
        ]:
            raise AssertionExcept("""
'output_type' must be one of the following: 
* tblout     : save parseable table of per-sequence hits 
* domtblout  : save parseable table of per-domain hits 
* pfamtblout : save table of hits and domains in Pfam format 
""")

        if "hmmdb" not in self.params:
            raise AssertionExcept("Please specify the hmmdb to use!")
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        if "trinity" not in [
                self.pipe_data["names_index"][step]
                for step in self.get_depend_list()
        ]:
            self.write_warning(
                "No trinity in history. Are you sure of what you are attempting to do?"
            )

        if "scope" in self.params:

            if self.params["scope"] == "project":
                if not "fasta.nucl" in self.sample_data["project_data"]:
                    raise AssertionExcept(
                        "No fasta file of type 'nucl' in project\n")

            elif self.params["scope"] == "sample":

                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash
                    if not "fasta.nucl" in self.sample_data[sample]:
                        raise AssertionExcept("No fasta file of type 'nucl'\n",
                                              sample)

            else:
                raise AssertionExcept(
                    "'scope' must be either 'sample' or 'project'")
        else:
            raise AssertionExcept("No 'scope' specified.")
Ejemplo n.º 26
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
        """

        if "scope" in list(self.params.keys()):
            if self.params["scope"] == "project":
                try:  # Is there a mega-assembly?
                    self.sample_data["project_data"]["fasta.nucl"]
                except KeyError:  # No. Check if all samples have assemblies:
                    raise AssertionExcept("No project wide assembly!")
                else:
                    pass

                if "compare_mode" in list(self.params.keys()):
                    self.write_warning(
                        "Ignoring 'compare_mode' in project scope")

            elif self.params["scope"] == "sample":
                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash

                    # Make sure each sample has a ["fasta.nucl"] slot
                    try:
                        self.sample_data[sample]["fasta.nucl"]
                    except KeyError:
                        raise AssertionExcept(
                            "You are trying to run QUAST with no assembly.\n",
                            sample)
                    else:
                        pass
            else:
                raise AssertionExcept(
                    "'scope' must be either 'project' or 'sample'")

        else:
            self.write_warning("'scope' not passed. Will try guessing...")

            try:  # Is there a mega-assembly?
                self.sample_data["project_data"]["fasta.nucl"]
            except KeyError:  # No. Check if all samples have assemblies:
                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash

                    # Make sure each sample has a ["fasta.nucl"] slot
                    try:
                        self.sample_data[sample]["fasta.nucl"]

                    except KeyError:
                        raise AssertionExcept(
                            "You are trying to run QUAST with no assembly.\n",
                            sample)

                self.params["scope"] = "sample"

            else:
                self.write_warning(
                    "There is a project-wide assembly. Using it.\n")

                self.params["scope"] = "project"
Ejemplo n.º 27
0
    def step_sample_initiation(self):
        """ A place to do initiation stages following setting of sample_data
            Here you should do testing for dependency output. These will NOT exist at initiation of this instance. They are set only following sample_data updating
        """

        if "scope" in self.params:

            if self.params["scope"] == "project":
                if "fasta.nucl" not in self.sample_data["project_data"]:
                    raise AssertionExcept(
                        "Project does not have a nucl fasta.")
                if "fasta.prot" not in self.sample_data["project_data"]:
                    raise AssertionExcept(
                        "Project does not have a prot fasta.")
                if "hmmscan.prot" not in self.sample_data["project_data"]:
                    raise AssertionExcept(
                        "Project does not have a prot hmmscan output file.")
                for type in [
                        "gene_trans_map", "transcripts.fasta.nucl",
                        "fasta.prot"
                ]:
                    if type not in self.sample_data["project_data"]:
                        raise AssertionExcept(
                            "Project does not have a {type} file.".format(
                                type=type))
            #
            # trans_map = self.sample_data["project_data"]["gene_trans_map"],
            # trans_fa = self.sample_data["project_data"]["transcripts.fasta.nucl"],
            # pep_fa = self.sample_data["project_data"]["fasta.prot"])

            elif self.params["scope"] == "sample":

                for sample in self.sample_data[
                        "samples"]:  # Getting list of samples out of samples_hash
                    if "fasta.nucl" not in self.sample_data[sample]:
                        raise AssertionExcept(
                            "Project does not have a nucl fasta.")
                    if "fasta.prot" not in self.sample_data[sample]:
                        raise AssertionExcept(
                            "Project does not have a prot fasta.")
                    if "hmmscan.prot" not in self.sample_data[sample]:
                        raise AssertionExcept(
                            "Project does not have a prot hmmscan output file."
                        )
                    for type in [
                            "gene_trans_map", "transcripts.fasta.nucl",
                            "fasta.prot"
                    ]:
                        if type not in self.sample_data[sample]:
                            raise AssertionExcept(
                                "Project does not have a {type} file.".format(
                                    type=type))

            else:
                raise AssertionExcept(
                    "'scope' must be either 'sample' or 'project'")
        else:
            raise AssertionExcept("No 'scope' specified.")
Ejemplo n.º 28
0
    def step_specific_init(self):
        """ Called on intiation
            Good place for parameter testing.
            Wrong place for sample data testing
        """
        self.shell = "bash"  # Can be set to "bash" by inheriting instances
        self.file_tag = ".metaphlan.out"

        # self.auto_redirs = "--input_type".split(" ")

        if "--input_type" in self.params["redir_params"]:
            self.write_warning(
                "At the moment metaphlan supports only --input_type fastq. Ignoring the value you passed\n"
            )

        self.params["redir_params"]["--input_type"] = "fastq"

        if "--bowtie2out" in self.params["redir_params"] and self.params[
                "redir_params"]["--bowtie2out"]:
            self.write_warning(
                "Ignoring the value you passed for --bowtie2out.\nWill store data in sample specific location\n"
            )

        if "--biom" in self.params["redir_params"] and self.params[
                "redir_params"]["--biom"]:
            self.write_warning(
                "Ignoring the value you passed for --biom.\nWill store data in sample specific location\n"
            )

        if "merge_metaphlan_tables" in self.params:
            if not isinstance(self.params["merge_metaphlan_tables"], dict):
                if self.params["merge_metaphlan_tables"]:
                    raise AssertionExcept(
                        "'merge_metaphlan_tables' must be empty or a block with 'path' and optionally 'redirects'"
                    )
                else:
                    self.params["merge_metaphlan_tables"] = dict()
            if "path" not in self.params["merge_metaphlan_tables"]:
                self.params["merge_metaphlan_tables"]["path"] = os.sep.join([
                    os.path.basename(self.params["script_path"]),
                    "utils/merge_metaphlan_tables.py"
                ])
                self.write_warning(
                    "You did not provided a path in 'merge_metaphlan_tables'. Using '{path}'"
                    .format(
                        path=self.params["merge_metaphlan_tables"]["path"]))

        if "ktImportText" in self.params:
            if not isinstance(
                    self.params["ktImportText"],
                    dict) or "path" not in self.params["ktImportText"]:
                raise AssertionExcept(
                    "Please include a 'path' in the 'ktImportText' block.")
        else:
            self.write_warning(
                "You did not supply a 'ktImportText' block. Will not create krona reports...\n"
            )
Ejemplo n.º 29
0
    def build_scripts(self):

        # Set list of samples to go over. Either self.sample_data["samples"] for sample scope
        # or ["project_data"] for project scope
        if self.params["scope"] == "project":
            sample_list = ["project_data"]
        elif self.params["scope"] == "sample":
            sample_list = self.sample_data["samples"]
        else:
            raise AssertionExcept(
                "'scope' must be either 'sample' or 'project'")

        for sample in sample_list:  # Getting list of samples out of samples_hash

            # Name of specific script:
            self.spec_script_name = self.set_spec_script_name(sample)
            self.script = ""

            # Make a dir for the current sample:
            sample_dir = self.make_folder_for_sample(sample)

            # This line should be left before every new script. It sees to local issues.
            # Use the dir it returns as the base_dir for this step.
            use_dir = self.local_start(sample_dir)

            self.script += "# Moving into output location\n"
            self.script += "cd %s \n\n" % use_dir

            self.script += self.get_script_const()

            # The results will be put in data/step_name/name/Title
            self.script += "--out %s \\\n\t" % sample
            self.script += "--in %s \\\n\t" % self.sample_data[sample][
                "fasta.%s" % self.type]
            self.script += "--tmp %s \\\n\t" % os.path.join(use_dir, "tmp")

            if "--lineage" not in self.params["redir_params"]:
                if "BUSCO.lineage" in self.sample_data["project_data"]:
                    self.script += "--lineage %s \\\n\t" % self.sample_data[
                        "project_data"]["BUSCO.lineage"]
                else:
                    raise AssertionExcept(
                        "Please supply a lineage, either via redirects (--lineage) or via 'get_lineage'"
                    )

            self.script = self.script.rstrip("\\\n\t")

            # Store results to fasta and assembly slots:
            self.sample_data[sample]["BUSCO"] = os.path.join(
                sample_dir, "run_%s" % sample)

            # Move all files from temporary local dir to permanent base_dir
            self.local_finish(
                use_dir, sample_dir
            )  # Sees to copying local files to final destination (and other stuff)

            self.create_low_level_script()
Ejemplo n.º 30
0
    def get_action_output_type(self, sample, action, redirects):

        if redirects is not None:
            if re.search("\-\w*O\s", redirects):
                type = re.search("\-\w*O\s+(\w+)", redirects)
                if type.group(1).lower() not in ["bam", "sam", "cram"]:
                    raise AssertionExcept(
                        "Bad value for output format ({type})".format(
                            type=type.group(1)))
                else:
                    return type.group(1).lower()

        if action == "view":
            if not redirects:
                return "sam"
            elif re.search("\-\w*b", redirects):
                return "bam"
            elif re.search("\-\w*C", redirects):
                return "cram"
            else:
                return "sam"
        elif action == "sort":
            return "bam"
        elif action == "index":
            if self.active_type == "bam":
                return "bai"
            elif self.active_type == "cram":
                return "crai"
            else:
                raise AssertionExcept(
                    "No 'bam' or 'cram' for 'samtools index'", sample)
        elif action in ["flagstat", "stats", "idxstats", "depth", "bedcov"]:
            return action
        elif action in ["fasta", "fastq"]:
            return self.active_type
        elif action in ["mpileup"]:
            if not redirects:
                return "mpileup"
            elif re.search("\-\w*v", redirects) or re.search(
                    "\-\-VCF", redirects):
                return "vcf"
            elif re.search("\-\w*g", redirects) or re.search(
                    "\-\-BCF", redirects):
                return "bcf"
            else:
                return "mpileup"
        elif action in ["merge"]:
            return "bam"
        elif action in ["addreplacerg"]:
            return "bam"
        elif action == "fixmate":
            return "bam"
        elif action == "markdup":
            return "bam"