コード例 #1
0
ファイル: ui.py プロジェクト: UMNPonyClub/paleomix
 def flush(self):
     """See BaseUI.flush."""
     if BaseUI.flush(self) and self._running_nodes:
         self._print_header(self.states, self.threads)
         for node in sorted(map(str, self._running_nodes)):
             print_info("  - %s" % (node,))
         print_info()
コード例 #2
0
 def flush(self):
     """See BaseUI.flush."""
     if BaseUI.flush(self) and self._running_nodes:
         self._print_header(self.states, self.threads)
         for node in sorted(map(str, self._running_nodes)):
             print_info("  - %s" % (node, ))
         print_info()
コード例 #3
0
ファイル: makefile.py プロジェクト: UMNPonyClub/paleomix
def read_makefiles(options, filenames, commands):
    print_info("Reading makefile(s):")
    steps = frozenset(key for (key, _) in commands)

    makefiles = []
    for filename in filenames:
        makefile = pypeline.common.makefile.read_makefile(filename, _VALIDATION)
        makefile = _mangle_makefile(options, makefile["Makefile"], steps)
        makefiles.append(makefile)
    return makefiles
コード例 #4
0
ファイル: makefile.py プロジェクト: KHanghoj/epiPALEOMIX
def read_makefiles(options, filenames, commands):
    print_info("Reading makefile(s):")
    steps = frozenset(key for (key, _) in commands)

    makefiles = []
    for filename in filenames:
        makefile = pypeline.common.makefile.read_makefile(
            filename, _VALIDATION)
        makefile = _mangle_makefile(options, makefile["Makefile"], steps)
        makefiles.append(makefile)
    return makefiles
コード例 #5
0
ファイル: ui.py プロジェクト: CarlesV/paleomix
    def flush(self):
        """See BaseUI.flush."""
        if not self._running_nodes:
            VerboseUI.flush(self)
            return

        BaseUI.flush(self)
        self._print_header(self.states, self.threads)
        for node in sorted(self._running_nodes, key=str):
            print_info("  - %s" % node)
        print_info()
コード例 #6
0
    def flush(self):
        """See BaseUI.flush."""
        if not self._running_nodes:
            VerboseUI.flush(self)
            return

        BaseUI.flush(self)
        self._print_header(self.states, self.threads)
        for node in sorted(self._running_nodes, key=str):
            print_info("  - %s" % node)
        print_info()
コード例 #7
0
    def finalize(self):
        """Called by the pipeline at the termination of a run. By default,
        this function prints the location of the log-file if one was created
        during the run (e.g. if there were errors)."""
        logfile = pypeline.logger.get_logfile()
        if logfile:
            print_debug("Log-file located at %r" % (logfile, ))

        if self.states[self.ERROR]:
            print_err("Done; but errors were detected ...")
        else:
            print_info("Done ...")
コード例 #8
0
ファイル: ui.py プロジェクト: CarlesV/paleomix
    def finalize(self):
        """Called by the pipeline at the termination of a run. By default,
        this function prints the location of the log-file if one was created
        during the run (e.g. if there were errors)."""
        logfile = pypeline.logger.get_logfile()
        if logfile:
            print_debug("Log-file located at %r" % (logfile,))

        if self.states[self.ERROR]:
            print_err("Done; but errors were detected ...")
        else:
            print_info("Done ...")
コード例 #9
0
ファイル: config.py プロジェクト: CarlesV/paleomix
def parse_config(argv):
    options, args  = _run_config_parser(argv)
    pypeline.ui.set_ui_colors(options.ui_colors)

    if (len(args) < 2) and (args != ["mkfile"]):
        description = _DESCRIPTION.replace("%prog", "phylo_pipeline").strip()
        console.print_info("Phylogeny Pipeline %s\n" % (pypeline.__version__,))
        console.print_info(description)
        return options, args

    commands = select_commands(args[0] if args else ())
    if any((func is None) for (_, func) in commands):
        unknown_commands = ", ".join(repr(key) for (key, func) in commands if func is None)
        raise ConfigError("Unknown analysis step(s): %s" % (unknown_commands,))

    return options, args
コード例 #10
0
ファイル: makefile.py プロジェクト: KHanghoj/epiPALEOMIX
def _check_bam_sequences(options, mkfile, steps):
    """Check that the BAM files contains the reference sequences found in the
    FASTA file, matched by name and length; extra sequences are permitted. This
    check is only done if genotyping is to be carried out, to reduce the
    overhead of reading the BAM file headers.

    """
    if ("genotype" not in steps) and ("genotyping" not in steps):
        return

    print_info("    - Validating BAM files ...")
    bam_files = {}
    for regions in mkfile["Project"]["Regions"].itervalues():
        for sample in mkfile["Project"]["Samples"].itervalues():
            filename = os.path.join(
                options.samples_root,
                "%s.%s.bam" % (sample["Name"], regions["Prefix"]))
            if regions["Realigned"]:
                filename = add_postfix(filename, ".realigned")

            if os.path.exists(filename):
                bam_files[filename] = _collect_fasta_contigs(regions)

    for (filename, contigs) in bam_files.iteritems():
        with pysam.Samfile(filename) as handle:
            bam_contigs = dict(zip(handle.references, handle.lengths))

            for (contig, length) in contigs.iteritems():
                bam_length = bam_contigs.get(contig)

                if bam_length is None:
                    message = ("Reference sequence missing from BAM file; "
                               "BAM file aligned against different prefix?\n"
                               "    BAM file = %s\n    Sequence name = %s") \
                               % (filename, contig)
                    raise MakefileError(message)
                elif bam_length != length:
                    message = ("Length of reference sequence in FASTA differs "
                               "from length of sequence in BAM file; BAM file "
                               "aligned against different prefix?\n"
                               "    BAM file = %s\n"
                               "    Length in FASTA = %s\n"
                               "    Length in BAM = %s") \
                               % (filename, length, bam_length)
                    raise MakefileError(message)
コード例 #11
0
ファイル: makefile.py プロジェクト: UMNPonyClub/paleomix
def _check_bam_sequences(options, mkfile, steps):
    """Check that the BAM files contains the reference sequences found in the
    FASTA file, matched by name and length; extra sequences are permitted. This
    check is only done if genotyping is to be carried out, to reduce the
    overhead of reading the BAM file headers.

    """
    if ("genotype" not in steps) and ("genotyping" not in steps):
        return

    print_info("    - Validating BAM files ...")
    bam_files = {}
    for regions in mkfile["Project"]["Regions"].itervalues():
        for sample in mkfile["Project"]["Samples"].itervalues():
            filename = os.path.join(options.samples_root, "%s.%s.bam" % (sample["Name"], regions["Prefix"]))
            if regions["Realigned"]:
                filename = add_postfix(filename, ".realigned")

            if os.path.exists(filename):
                bam_files[filename] = _collect_fasta_contigs(regions)

    for (filename, contigs) in bam_files.iteritems():
        with pysam.Samfile(filename) as handle:
            bam_contigs = dict(zip(handle.references, handle.lengths))

            for (contig, length) in contigs.iteritems():
                bam_length = bam_contigs.get(contig)

                if bam_length is None:
                    message = (
                        "Reference sequence missing from BAM file; "
                        "BAM file aligned against different prefix?\n"
                        "    BAM file = %s\n    Sequence name = %s"
                    ) % (filename, contig)
                    raise MakefileError(message)
                elif bam_length != length:
                    message = (
                        "Length of reference sequence in FASTA differs "
                        "from length of sequence in BAM file; BAM file "
                        "aligned against different prefix?\n"
                        "    BAM file = %s\n"
                        "    Length in FASTA = %s\n"
                        "    Length in BAM = %s"
                    ) % (filename, length, bam_length)
                    raise MakefileError(message)
コード例 #12
0
def parse_config(argv):
    options, args = _run_config_parser(argv)
    pypeline.ui.set_ui_colors(options.ui_colors)

    if (len(args) < 2) and (args != ["mkfile"]):
        description = _DESCRIPTION.replace("%prog", "phylo_pipeline").strip()
        console.print_info("Phylogeny Pipeline %s\n" %
                           (pypeline.__version__, ))
        console.print_info(description)
        return options, args

    commands = select_commands(args[0] if args else ())
    if any((func is None) for (_, func) in commands):
        unknown_commands = ", ".join(
            repr(key) for (key, func) in commands if func is None)
        raise ConfigError("Unknown analysis step(s): %s" %
                          (unknown_commands, ))

    return options, args
コード例 #13
0
ファイル: config.py プロジェクト: CarlesV/paleomix
    def _write_config_file(self, config, defaults):
        """Writes a basic config files, using the values previously found in the
        config files, and specified on the command-line."""
        defaults_cfg = ConfigParser.SafeConfigParser()
        defaults_cfg.add_section("Defaults")
        for key in defaults:
            value = getattr(config, key)
            if isinstance(value, (types.ListType, types.TupleType)):
                value = ";".join(value)

            defaults_cfg.set("Defaults", key, str(value))

        filename = self._filenames[-1]
        make_dirs(os.path.dirname(filename))
        with open(filename, "w") as handle:
            defaults_cfg.write(handle)

        print_info("Wrote config file %r" % (filename,))
        sys.exit(0)
コード例 #14
0
ファイル: config.py プロジェクト: health1987/paleomix
    def _write_config_file(self, config, defaults):
        """Writes a basic config files, using the values previously found in the
        config files, and specified on the command-line."""
        defaults_cfg = ConfigParser.SafeConfigParser()
        defaults_cfg.add_section("Defaults")
        for key in defaults:
            value = getattr(config, key)
            if isinstance(value, (types.ListType, types.TupleType)):
                value = ";".join(value)

            defaults_cfg.set("Defaults", key, str(value))

        filename = self._filenames[-1]
        make_dirs(os.path.dirname(filename))
        with open(filename, "w") as handle:
            defaults_cfg.write(handle)

        print_info("Wrote config file %r" % (filename, ))
        sys.exit(0)
コード例 #15
0
ファイル: makefile.py プロジェクト: KHanghoj/epiPALEOMIX
def _update_regions(options, mkfile):
    print_info("    - Validating regions of interest ...")
    mkfile["Project"]["Regions"] = mkfile["Project"].pop("RegionsOfInterest")

    for (name, subdd) in mkfile["Project"]["Regions"].iteritems():
        if "Prefix" not in subdd:
            raise MakefileError("No genome specified for regions %r" %
                                (name, ))

        subdd["Name"] = name
        subdd["Desc"] = "{Prefix}.{Name}".format(**subdd)
        subdd["BED"] = os.path.join(options.regions_root,
                                    subdd["Desc"] + ".bed")
        subdd["FASTA"] = os.path.join(options.prefix_root,
                                      subdd["Prefix"] + ".fasta")

        required_files = (("Regions file", subdd["BED"],
                           None), ("Reference sequence", subdd["FASTA"], None),
                          ("Reference sequence index", subdd["FASTA"] + ".fai",
                           "Please index using 'samtools faidx %s'" %
                           (subdd["FASTA"], )))

        for (desc, path, instructions) in required_files:
            if not os.path.isfile(path):
                message = "%s does not exist for %r:\n  Path = %r" \
                                % (desc, name, path)
                if instructions:
                    message = "%s\n%s" % (message, instructions)
                raise MakefileError(message)

        # Collects seq. names / validate regions
        subdd["Sequences"] = {None: _collect_and_validate_regions(subdd)}
        subdd["SubsetFiles"] = {None: ()}

        sampledd = subdd["Genotypes"] = {}
        for sample_name in mkfile["Project"]["Samples"]:
            fasta_file = ".".join((sample_name, subdd["Desc"], "fasta"))
            sampledd[sample_name] = os.path.join(options.destination,
                                                 mkfile["Project"]["Title"],
                                                 "genotypes", fasta_file)
コード例 #16
0
ファイル: makefile.py プロジェクト: UMNPonyClub/paleomix
def _update_regions(options, mkfile):
    print_info("    - Validating regions of interest ...")
    mkfile["Project"]["Regions"] = mkfile["Project"].pop("RegionsOfInterest")

    for (name, subdd) in mkfile["Project"]["Regions"].iteritems():
        if "Prefix" not in subdd:
            raise MakefileError("No genome specified for regions %r" % (name,))

        subdd["Name"] = name
        subdd["Desc"] = "{Prefix}.{Name}".format(**subdd)
        subdd["BED"] = os.path.join(options.regions_root, subdd["Desc"] + ".bed")
        subdd["FASTA"] = os.path.join(options.prefix_root, subdd["Prefix"] + ".fasta")

        required_files = (
            ("Regions file", subdd["BED"], None),
            ("Reference sequence", subdd["FASTA"], None),
            (
                "Reference sequence index",
                subdd["FASTA"] + ".fai",
                "Please index using 'samtools faidx %s'" % (subdd["FASTA"],),
            ),
        )

        for (desc, path, instructions) in required_files:
            if not os.path.isfile(path):
                message = "%s does not exist for %r:\n  Path = %r" % (desc, name, path)
                if instructions:
                    message = "%s\n%s" % (message, instructions)
                raise MakefileError(message)

        # Collects seq. names / validate regions
        subdd["Sequences"] = {None: _collect_and_validate_regions(subdd)}
        subdd["SubsetFiles"] = {None: ()}

        sampledd = subdd["Genotypes"] = {}
        for sample_name in mkfile["Project"]["Samples"]:
            fasta_file = ".".join((sample_name, subdd["Desc"], "fasta"))
            sampledd[sample_name] = os.path.join(
                options.destination, mkfile["Project"]["Title"], "genotypes", fasta_file
            )
コード例 #17
0
def _validate_hg_prefixes(makefiles):
    """Implementation of the checks included in GATK, which require that the
    FASTA for the human genome is ordered 1 .. 23, .
    """
    already_validated = set()
    print_info("  - Validating prefixes ...", file=sys.stderr)
    for makefile in makefiles:
        uses_gatk = "Realigned BAM" in makefile["Options"]["Features"]
        for prefix in makefile["Prefixes"].itervalues():
            path = prefix["Path"]
            if path in already_validated:
                continue

            if not os.path.exists(path + ".fai"):
                print_info("    - Index does not exist for %r; this may "
                           "take a while ..." % (path, ),
                           file=sys.stderr)

                if not os.access(os.path.dirname(path), os.W_OK):
                    message = \
                        "FASTA index for prefix is missing, but folder is\n" \
                        "not writable, so it cannot be created:\n" \
                        "  Prefix = %s\n\n" \
                        "Either change permissions on the folder, or move\n" \
                        "the prefix to different location." % (path,)
                    raise MakefileError(message)

                # Use pysam to index the file
                pysam.Fastafile(path).close()

            contigs = []
            with open(path + ".fai") as handle:
                for line in handle:
                    name, size, _ = line.split('\t', 2)
                    contigs.append((name, int(size)))

            _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk)
            already_validated.add(path)
コード例 #18
0
ファイル: makefile.py プロジェクト: health1987/paleomix
def _validate_hg_prefixes(makefiles):
    """Implementation of the checks included in GATK, which require that the
    FASTA for the human genome is ordered 1 .. 23, .
    """
    already_validated = set()
    print_info("  - Validating prefixes ...", file=sys.stderr)
    for makefile in makefiles:
        uses_gatk = "Realigned BAM" in makefile["Options"]["Features"]
        for prefix in makefile["Prefixes"].itervalues():
            path = prefix["Path"]
            if path in already_validated:
                continue

            if not os.path.exists(path + ".fai"):
                print_info("    - Index does not exist for %r; this may "
                           "take a while ..." % (path,), file=sys.stderr)

                if not os.access(os.path.dirname(path), os.W_OK):
                    message = \
                        "FASTA index for prefix is missing, but folder is\n" \
                        "not writable, so it cannot be created:\n" \
                        "  Prefix = %s\n\n" \
                        "Either change permissions on the folder, or move\n" \
                        "the prefix to different location." % (path,)
                    raise MakefileError(message)

                # Use pysam to index the file
                pysam.Fastafile(path).close()

            contigs = []
            with open(path + ".fai") as handle:
                for line in handle:
                    name, size, _ = line.split('\t', 2)
                    contigs.append((name, int(size)))

            _do_validate_hg_prefix(makefile, prefix, contigs, fatal=uses_gatk)
            already_validated.add(path)
コード例 #19
0
ファイル: makefile.py プロジェクト: UMNPonyClub/paleomix
def _update_and_check_max_read_depth(options, mkfile):
    if any(subdd["VCF_Filter"]["MaxReadDepth"] == "auto" for subdd in mkfile["Genotyping"].itervalues()):
        print_info("    - Determinining max-depth from depth-histograms ...")

    for (key, settings) in mkfile["Genotyping"].iteritems():
        required_keys = set()
        for sample in mkfile["Project"]["Samples"].itervalues():
            if sample["GenotypingMethod"].lower() == "samtools":
                required_keys.add(sample["Name"])

        max_depths = settings["VCF_Filter"]["MaxReadDepth"]
        if isinstance(max_depths, types.DictType):
            # Extra keys are allowed, to make it easier
            # to temporarily disable a sample
            missing_keys = required_keys - set(max_depths)
            if missing_keys:
                missing_keys = "\n    - ".join(sorted(missing_keys))
                message = "MaxReadDepth not specified for the following " "samples for %r:\n    - %s" % (
                    key,
                    missing_keys,
                )
                raise MakefileError(message)

        elif isinstance(max_depths, types.StringTypes):
            assert max_depths.lower() == "auto", max_depths
            prefix = mkfile["Project"]["Regions"][key]["Prefix"]
            max_depths = {}

            for sample in required_keys:
                fname = "%s.%s.depths" % (sample, prefix)
                fpath = os.path.join(options.samples_root, fname)
                max_depths[sample] = _read_max_depths(fpath, prefix, sample)

            settings["VCF_Filter"]["MaxReadDepth"] = max_depths
        else:
            max_depths = dict.fromkeys(required_keys, max_depths)
            settings["VCF_Filter"]["MaxReadDepth"] = max_depths
コード例 #20
0
ファイル: makefile.py プロジェクト: KHanghoj/epiPALEOMIX
def _update_and_check_max_read_depth(options, mkfile):
    if any(subdd["VCF_Filter"]["MaxReadDepth"] == "auto"
           for subdd in mkfile["Genotyping"].itervalues()):
        print_info("    - Determinining max-depth from depth-histograms ...")

    for (key, settings) in mkfile["Genotyping"].iteritems():
        required_keys = set()
        for sample in mkfile["Project"]["Samples"].itervalues():
            if sample["GenotypingMethod"].lower() == "samtools":
                required_keys.add(sample["Name"])

        max_depths = settings["VCF_Filter"]["MaxReadDepth"]
        if isinstance(max_depths, types.DictType):
            # Extra keys are allowed, to make it easier
            # to temporarily disable a sample
            missing_keys = required_keys - set(max_depths)
            if missing_keys:
                missing_keys = "\n    - ".join(sorted(missing_keys))
                message = "MaxReadDepth not specified for the following " \
                          "samples for %r:\n    - %s" % (key, missing_keys)
                raise MakefileError(message)

        elif isinstance(max_depths, types.StringTypes):
            assert max_depths.lower() == "auto", max_depths
            prefix = mkfile["Project"]["Regions"][key]["Prefix"]
            max_depths = {}

            for sample in required_keys:
                fname = "%s.%s.depths" % (sample, prefix)
                fpath = os.path.join(options.samples_root, fname)
                max_depths[sample] = _read_max_depths(fpath, prefix, sample)

            settings["VCF_Filter"]["MaxReadDepth"] = max_depths
        else:
            max_depths = dict.fromkeys(required_keys, max_depths)
            settings["VCF_Filter"]["MaxReadDepth"] = max_depths
コード例 #21
0
ファイル: run.py プロジェクト: KHanghoj/epiPALEOMIX
def _print_usage():
    basename = os.path.basename(sys.argv[0])
    if basename == "run.py":
        basename = "epiPALEOMIX"
    print_info("epiPALEOMIX Pipeline %s\n" % (__version__, ))
    print_info("Usage:")
    print_info("  -- %s help           -- Display this message" % basename)
    print_info("  -- %s dryrun [...]   -- Perform dry run of pipeline on "
               "provided makefiles." % basename)
    print_info("  -- %s makefile [...] -- Generate makefile template to"
               " std.out." % basename)
    print_info("  -- %s run [...]      -- Run pipeline on provided "
               "makefiles." % basename)
コード例 #22
0
ファイル: pipeline.py プロジェクト: KHanghoj/epiPALEOMIX
        try:
            os.makedirs(config.temp_root)
        except OSError, error:
            print_err("ERROR: Could not create temp root:\n\t%s" % (error, ))
            return 1

    if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK):
        print_err("ERROR: Insufficient permissions for temp root: '%s'" %
                  (config.temp_root, ))
        return 1

    # Init worker-threads before reading in any more data
    pipeline = Pypeline(config)

    try:
        print_info("Building BAM pipeline ...", file=sys.stderr)
        makefiles = read_makefiles(config, args)
    except (MakefileError, pypeline.yaml.YAMLError, IOError), error:
        print_err("Error reading makefiles:",
                  "\n  %s:\n   " % (error.__class__.__name__, ),
                  "\n    ".join(str(error).split("\n")),
                  file=sys.stderr)
        return 1

    logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log")
    pypeline.logger.initialize(config, logfile_template)
    logger = logging.getLogger(__name__)

    # Build .fai files for reference .fasta files
    index_references(config, makefiles)
コード例 #23
0
    def finalize(self):
        """Called by the pipeline at the termination of a run. By default,
        this function prints the location of the log-file if one was created
        during the run (e.g. if there were errors), and a summary of all nodes.
        """
        runtime = (self._end_time or 0) - (self._start_time or 0)

        if self.states[self.ERROR]:
            print_err("Done; but errors were detected ...")
        else:
            print_info("Done ...")

        print_info()
        rows = [("  Number of nodes:", sum(self.states)),
                ("  Number of done nodes:", self.states[self.DONE]),
                ("  Number of runable nodes:", self.states[self.RUNABLE]),
                ("  Number of queued nodes:", self.states[self.QUEUED]),
                ("  Number of outdated nodes:", self.states[self.OUTDATED]),
                ("  Number of failed nodes:", self.states[self.ERROR]),
                ("  Pipeline runtime:", _fmt_runtime(round(runtime)))]

        for line in text.padded_table(rows):
            print_info(line)

        print_info()
        print_info("Use --list-output-files to view status of output files.")

        logfile = pypeline.logger.get_logfile()
        if logfile:
            print_debug("Log-file located at %r" % (logfile, ))

        print_info()
コード例 #24
0
ファイル: pipeline.py プロジェクト: UMNPonyClub/paleomix
def _print_usage():
    basename = os.path.basename(sys.argv[0])
    if basename == "paleomix":
        basename = "bam_pipeline"

    print_info("BAM Pipeline %s\n" % (pypeline.__version__,))
    print_info("Usage:")
    print_info("  -- %s help           -- Display this message" % basename)
    print_info("  -- %s makefile [...] -- Generate makefile from 'SampleSheet.csv' files." % basename)
    print_info("  -- %s dryrun [...]   -- Perform dry run of pipeline on provided makefiles." % basename)
    print_info("     %s                   Equivalent to 'bam_pipeline run --dry-run [...]'." % (" " * len(basename),))
    print_info("  -- %s run [...]      -- Run pipeline on provided makefiles." % basename)
コード例 #25
0
ファイル: pipeline.py プロジェクト: UMNPonyClub/paleomix
    if not os.path.exists(config.temp_root):
        try:
            os.makedirs(config.temp_root)
        except OSError, error:
            print_err("ERROR: Could not create temp root:\n\t%s" % (error,))
            return 1

    if not os.access(config.temp_root, os.R_OK | os.W_OK | os.X_OK):
        print_err("ERROR: Insufficient permissions for temp root: '%s'" % (config.temp_root,))
        return 1

    # Init worker-threads before reading in any more data
    pipeline = Pypeline(config)

    try:
        print_info("Building BAM pipeline ...", file=sys.stderr)
        makefiles = read_makefiles(config, args)
    except (MakefileError, pypeline.yaml.YAMLError, IOError), error:
        print_err(
            "Error reading makefiles:",
            "\n  %s:\n   " % (error.__class__.__name__,),
            "\n    ".join(str(error).split("\n")),
            file=sys.stderr,
        )
        return 1

    logfile_template = time.strftime("bam_pipeline.%Y%m%d_%H%M%S_%%02i.log")
    pypeline.logger.initialize(config, logfile_template)
    logger = logging.getLogger(__name__)

    # Build .fai files for reference .fasta files
コード例 #26
0
ファイル: pipeline.py プロジェクト: KHanghoj/epiPALEOMIX
def _print_usage():
    basename = os.path.basename(sys.argv[0])
    if basename == "paleomix":
        basename = "bam_pipeline"

    print_info("BAM Pipeline %s\n" % (pypeline.__version__, ))
    print_info("Usage:")
    print_info("  -- %s help           -- Display this message" % basename)
    print_info(
        "  -- %s makefile [...] -- Generate makefile from 'SampleSheet.csv' files."
        % basename)
    print_info(
        "  -- %s dryrun [...]   -- Perform dry run of pipeline on provided makefiles."
        % basename)
    print_info(
        "     %s                   Equivalent to 'bam_pipeline run --dry-run [...]'."
        % (" " * len(basename), ))
    print_info(
        "  -- %s run [...]      -- Run pipeline on provided makefiles." %
        basename)
コード例 #27
0
ファイル: ui.py プロジェクト: UMNPonyClub/paleomix
    def finalize(self):
        """Called by the pipeline at the termination of a run. By default,
        this function prints the location of the log-file if one was created
        during the run (e.g. if there were errors), and a summary of all nodes.
        """
        runtime = (self._end_time or 0) - (self._start_time or 0)

        if self.states[self.ERROR]:
            print_err("Done; but errors were detected ...")
        else:
            print_info("Done ...")

        print_info()
        rows = [("  Number of nodes:", sum(self.states)),
                ("  Number of done nodes:", self.states[self.DONE]),
                ("  Number of runable nodes:", self.states[self.RUNABLE]),
                ("  Number of queued nodes:", self.states[self.QUEUED]),
                ("  Number of outdated nodes:", self.states[self.OUTDATED]),
                ("  Number of failed nodes:", self.states[self.ERROR]),
                ("  Pipeline runtime:", _fmt_runtime(round(runtime)))]

        for line in text.padded_table(rows):
            print_info(line)

        print_info()
        print_info("Use --list-output-files to view status of output files.")

        logfile = pypeline.logger.get_logfile()
        if logfile:
            print_debug("Log-file located at %r" % (logfile,))

        print_info()
コード例 #28
0
ファイル: makefile.py プロジェクト: KHanghoj/epiPALEOMIX
                    break
            else:
                raise MakefileError("Could not find MaxDepth in "
                                    "depth-histogram: %r" % (filename, ))

    except (OSError, IOError), error:
        raise MakefileError("Error reading depth-histogram (%s): %s" %
                            (filename, error))

    if max_depth == "NA":
        raise MakefileError("MaxDepth is not calculated for sample (%s);\n"
                            "cannot determine MaxDepth values automatically." %
                            (filename, ))
    max_depth = int(max_depth)

    print_info("        - %s.%s = %i" % (sample, prefix, max_depth))
    _DEPTHS_CACHE[filename] = max_depth
    return max_depth


_DEPTHS_CACHE = {}


def _check_indels_and_msa(mkfile):
    msa = mkfile["MultipleSequenceAlignment"]
    regions = mkfile["Project"]["Regions"]
    for (name, subdd) in regions.iteritems():
        msa_enabled = msa[name]["Enabled"]

        if subdd["IncludeIndels"] and not msa_enabled:
            raise MakefileError(
コード例 #29
0
def main(argv):
    options, paths = parse_args(argv)
    records = {}
    for root in paths:
        if os.path.isdir(root):
            filename = os.path.join(root, _FILENAME)
        else:
            root, filename = os.path.split(root)[0], root

        if not os.path.exists(filename):
            print_err("ERROR: Could not find SampleSheet file: %r" % filename,
                      file=sys.stderr)
            return 1

        for record in read_alignment_records(filename):
            libraries = records.setdefault(record["SampleID"], {})
            barcodes = libraries.setdefault(record["Index"], [])

            record["Lane"] = int(record["Lane"])
            path = "%(SampleID)s_%(Index)s_L%(Lane)03i_R{Pair}_*.fastq.gz" \
                % record
            record["Path"] = select_path(os.path.join(root, path))
            barcodes.append(record)

    is_trim_pipeline = os.path.basename(sys.argv[0]) == "trim_pipeline"

    print_header(full_mkfile=not is_trim_pipeline,
                 sample_tmpl=not bool(records),
                 minimal=options.minimal)

    for (sample, libraries) in records.iteritems():
        print("%s:" % sample)
        print("  %s:" % sample)
        for (library, barcodes) in libraries.iteritems():
            print("    %s:" % library)
            for record in barcodes:
                print("      {FCID}_{Lane}: {Path}".format(**record))
            print()
        print()

    if not argv:
        print_info("No directories/files specified, standard makefile printed.", file = sys.stderr)
        print_info("If the reads have associated %s files, these" % (_FILENAME,), file = sys.stderr)
        print_info("may be used to generate a preliminary makefile:", file = sys.stderr)
        print_info("  Usage: bam_pipeline mkfile [filename/directory] [...]", file = sys.stderr)
        print_info("Each directory must contain a '%s' file." % _FILENAME, file = sys.stderr)
    else:
        print_info("Makefile printed. Please check for correctness before running pipeline.", file = sys.stderr)
    return 0
コード例 #30
0
ファイル: makefile.py プロジェクト: UMNPonyClub/paleomix
                    max_depth = row["MaxDepth"]
                    break
            else:
                raise MakefileError("Could not find MaxDepth in " "depth-histogram: %r" % (filename,))

    except (OSError, IOError), error:
        raise MakefileError("Error reading depth-histogram (%s): %s" % (filename, error))

    if max_depth == "NA":
        raise MakefileError(
            "MaxDepth is not calculated for sample (%s);\n"
            "cannot determine MaxDepth values automatically." % (filename,)
        )
    max_depth = int(max_depth)

    print_info("        - %s.%s = %i" % (sample, prefix, max_depth))
    _DEPTHS_CACHE[filename] = max_depth
    return max_depth


_DEPTHS_CACHE = {}


def _check_indels_and_msa(mkfile):
    msa = mkfile["MultipleSequenceAlignment"]
    regions = mkfile["Project"]["Regions"]
    for (name, subdd) in regions.iteritems():
        msa_enabled = msa[name]["Enabled"]

        if subdd["IncludeIndels"] and not msa_enabled:
            raise MakefileError("Regions %r includes indels, but MSA is disabled!" % (name,))