Python which Examples

Programming Language: Python

Namespace/Package Name: cgatcore.iotools

Method/Function: which

Examples at hotexamples.com: 6

Python which - 6 examples found. These are the top rated real world Python examples of cgatcore.iotools.which extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def runControlCPC(infile, outfile):
    # farm.py is called from within cpc.sh
    assert iotools.which(
        "farm.py"), "farm.py needs to be in $PATH for cpc to run"
    # Default cpc parameters don't work with later versions of blast
    E.info("Running cpc with blast version:%s" % iotools.which("blastx"))

    result_evidence = P.snip(outfile, ".result") + ".evidence"
    working_dir = "lncRNA_control/cpc"
    statement = ("%(pipeline_scriptsdir)s/cpc.sh"
                 " %(infile)s"
                 " %(outfile)s"
                 " %(working_dir)s"
                 " %(result_evidence)s")
    P.run()

Example #2

Show file

def check_executables(filenames):
    """check for the presence/absence of executables"""

    missing = []

    for filename in filenames:
        if not iotools.which(filename):
            missing.append(filename)

    if missing:
        raise ValueError("missing executables: %s" % ",".join(missing))

Example #3

Show file

File: report.py Project: kevinrue/cgat-flow

def run_report(clean=True,
               with_pipeline_status=True,
               pipeline_status_format="svg"):
    '''run cgatreport.

    This will also run ruffus to create an svg image of the pipeline
    status unless *with_pipeline_status* is set to False. The image
    will be saved into the export directory.

    '''

    params = P.get_params()

    if with_pipeline_status:
        targetdir = params["exportdir"]
        if not os.path.exists(targetdir):
            os.mkdir(targetdir)

        ruffus.pipeline_printout_graph(
            os.path.join(targetdir, "pipeline.%s" % pipeline_status_format),
            pipeline_status_format, ["full"],
            checksum_level=params["ruffus_checksums_level"])

    dirname, basename = os.path.split(P.get_caller().__file__)

    report_engine = params.get("report_engine", "cgatreport")
    assert report_engine in ('sphinxreport', 'cgatreport')

    docdir = os.path.join(dirname, "pipeline_docs",
                          iotools.snip(basename, ".py"))
    themedir = os.path.join(dirname, "pipeline_docs", "themes")
    relpath = os.path.relpath(docdir)
    trackerdir = os.path.join(docdir, "trackers")

    # use a fake X display in order to avoid windows popping up
    # from R plots.
    xvfb_command = iotools.which("xvfb-run")

    # permit multiple servers using -d option
    if xvfb_command:
        xvfb_command += " -d "
    else:
        xvfb_command = ""

    # if there is no DISPLAY variable set, xvfb runs, but
    # exits with error when killing process. Thus, ignore return
    # value.
    # print os.getenv("DISPLAY"), "command=", xvfb_command
    if not os.getenv("DISPLAY"):
        erase_return = "|| true"
    else:
        erase_return = ""

    if os.path.exists("conf.py"):
        conf_dir = os.path.abspath(".")
    else:
        conf_dir = os.path.join(os.path.dirname(__file__), "configuration")

    # in the current version, xvfb always returns with an error, thus
    # ignore these.
    erase_return = "|| true"

    if clean:
        clean = "rm -rf report _cache _static;"
    else:
        clean = ""

    # with sphinx >1.3.1 the PYTHONPATH needs to be set explicitely as
    # the virtual environment seems to be stripped. It is thus set to
    # the contents of the current sys.path
    syspath = ":".join(sys.path)

    statement = '''
    %(clean)s
    (export SPHINX_DOCSDIR=%(docdir)s;
    export SPHINX_THEMEDIR=%(themedir)s;
    export PYTHONPATH=%(syspath)s;
    %(xvfb_command)s
    %(report_engine)s-build
    --num-jobs=%(report_threads)s
    sphinx-build
    -b html
    -d %(report_doctrees)s
    -c %(conf_dir)s
    -j %(report_threads)s
    %(docdir)s %(report_html)s
    >& report.log %(erase_return)s )
    '''

    P.run(statement)

    E.info(
        'the report is available at %s' %
        os.path.abspath(os.path.join(params['report_html'], "contents.html")))

Example #4

Show file

File: bam2depth.py Project: harmeet1990/cgat-apps

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.ArgumentParser(description=__doc__)

    parser.add_argument(
        "--input-filename-fasta",
        dest="input_filename_fasta",
        type=str,
        help="filename with reference sequence in fasta format ")

    parser.add_argument(
        "--counting-mode",
        dest="counting_mode",
        type=str,
        choices=("all", "pileup_defaults"),
        help="counting mode. all=all reads/bases. pileup-defaults= "
        "use default pileup thresholds. Options will be added to "
        "--mpileup-options. .")

    parser.add_argument("--mpileup-options",
                        dest="mpileup_options",
                        type=str,
                        help="pileup options to use ")

    parser.set_defaults(
        mpileup_options="",
        counting_mode="all",
        input_filename_fasta=None,
        report_step=1000000,
    )

    # add common options (-h/--help, ...) and parse command line
    (args) = E.start(parser, argv=argv, add_output_options=True)

    bamfile = args[0]

    mpileup_options = args.mpileup_options

    if args.counting_mode == "all":
        mpileup_options += " -Q 0 -B -A"

    read_depth_histogram = collections.defaultdict(int)
    base_depth_histogram = collections.defaultdict(int)

    # deletions are marked by something like -2AA at the first
    # position and a '*' for subsequent positions
    rx_deletions = re.compile("([-][0-9]+|[*])")
    report_step = args.report_step
    npositions = 0

    samtools = iotools.which("samtools")

    statement = ("{samtools} mpileup "
                 "-f {reference_fasta} "
                 "{mpileup_options} "
                 "{bamfile} ".format(samtools=samtools,
                                     reference_fasta=args.input_filename_fasta,
                                     mpileup_options=mpileup_options,
                                     bamfile=os.path.abspath(bamfile)))

    E.info("running the following statement: {}".format(statement))

    cmd_args = shlex.split(statement)
    proc = subprocess.Popen(cmd_args,
                            shell=False,
                            stderr=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            cwd=os.path.abspath(os.curdir))

    for line in proc.stdout:
        line = line.decode("utf-8")
        contig, pos, base, read_depth, info, qualities = line[:-1].split("\t")
        read_depth = int(read_depth)
        pos = int(pos)

        if pos % report_step == 0:
            E.info("working on {}: {}".format(contig, pos))

        ndeletions = len(rx_deletions.findall(info))
        base_depth = read_depth - ndeletions

        read_depth_histogram[read_depth] += 1
        base_depth_histogram[base_depth] += 1

    for line in proc.stderr:
        E.warn(line)

    keys = sorted(
        set(read_depth_histogram.keys()).union(base_depth_histogram.keys()))

    args.stdout.write("depth\tread_depth_positions\tbase_depth_positions\n")
    for key in keys:
        args.stdout.write("{}\t{}\t{}\n".format(key, read_depth_histogram[key],
                                                base_depth_histogram[key]))

    E.info("positions tested: {}".format(sum(read_depth_histogram.values())))
    E.stop()

Example #5

Show file

def main(argv=None):
    """script main.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("bedgraph", "wiggle", "bigbed", "bigwig",
                               "bed"),
                      help="output format [default=%default]")

    parser.add_option("-s",
                      "--shift-size",
                      dest="shift",
                      type="int",
                      help="shift reads by a certain amount (ChIP-Seq) "
                      "[%default]")

    parser.add_option("-e",
                      "--extend",
                      dest="extend",
                      type="int",
                      help="extend reads by a certain amount "
                      "(ChIP-Seq) [%default]")

    parser.add_option("-p",
                      "--wiggle-span",
                      dest="span",
                      type="int",
                      help="span of a window in wiggle tracks "
                      "[%default]")

    parser.add_option("-m",
                      "--merge-pairs",
                      dest="merge_pairs",
                      action="store_true",
                      help="merge paired-ended reads into a single "
                      "bed interval [default=%default].")

    parser.add_option("--scale-base",
                      dest="scale_base",
                      type="float",
                      help="number of reads/pairs to scale bigwig file to. "
                      "The default is to scale to 1M reads "
                      "[default=%default]")

    parser.add_option("--scale-method",
                      dest="scale_method",
                      type="choice",
                      choices=(
                          "none",
                          "reads",
                      ),
                      help="scale bigwig output. 'reads' will normalize by "
                      "the total number reads in the bam file that are used "
                      "to construct the bigwig file. If --merge-pairs is used "
                      "the number of pairs output will be used for "
                      "normalization. 'none' will not scale the bigwig file"
                      "[default=%default]")

    parser.add_option("--max-insert-size",
                      dest="max_insert_size",
                      type="int",
                      help="only merge if insert size less that "
                      "# bases. 0 turns of this filter "
                      "[default=%default].")

    parser.add_option("--min-insert-size",
                      dest="min_insert_size",
                      type="int",
                      help="only merge paired-end reads if they are "
                      "at least # bases apart. "
                      "0 turns of this filter. [default=%default]")

    parser.set_defaults(
        samfile=None,
        output_format="wiggle",
        shift=0,
        extend=0,
        span=1,
        merge_pairs=None,
        min_insert_size=0,
        max_insert_size=0,
        scale_method='none',
        scale_base=1000000,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.start(parser, argv=argv, add_output_options=True)

    if len(args) >= 1:
        options.samfile = args[0]
    if len(args) == 2:
        options.output_filename_pattern = args[1]
    if not options.samfile:
        raise ValueError("please provide a bam file")

    # Read BAM file using Pysam
    samfile = pysam.AlignmentFile(options.samfile, "rb")

    # Create temporary files / folders
    tmpdir = tempfile.mkdtemp()
    E.debug("temporary files are in %s" % tmpdir)
    tmpfile_wig = os.path.join(tmpdir, "wig")
    tmpfile_sizes = os.path.join(tmpdir, "sizes")

    # Create dictionary of contig sizes
    contig_sizes = dict(list(zip(samfile.references, samfile.lengths)))
    # write contig sizes
    outfile_size = iotools.open_file(tmpfile_sizes, "w")
    for contig, size in sorted(contig_sizes.items()):
        outfile_size.write("%s\t%s\n" % (contig, size))
    outfile_size.close()

    # Shift and extend only available for bigwig format
    if options.shift or options.extend:
        if options.output_format != "bigwig":
            raise ValueError(
                "shift and extend only available for bigwig output")

    # Output filename required for bigwig / bigbed computation
    if options.output_format == "bigwig":
        if not options.output_filename_pattern:
            raise ValueError(
                "please specify an output file for bigwig computation.")

        # Define executable to use for binary conversion
        if options.output_format == "bigwig":
            executable_name = "wigToBigWig"
        else:
            raise ValueError("unknown output format `%s`" %
                             options.output_format)

        # check required executable file is in the path
        executable = iotools.which(executable_name)
        if not executable:
            raise OSError("could not find %s in path." % executable_name)

        # Open outout file
        outfile = iotools.open_file(tmpfile_wig, "w")
        E.info("starting output to %s" % tmpfile_wig)
    else:
        outfile = iotools.open_file(tmpfile_wig, "w")
        E.info("starting output to stdout")

    # Set up output write functions
    if options.output_format in ("wiggle", "bigwig"):
        # wiggle is one-based, so add 1, also step-size is 1, so need
        # to output all bases
        if options.span == 1:
            outf = lambda outfile, contig, start, end, val: \
                outfile.write(
                    "".join(["%i\t%i\n" % (x, val)
                             for x in range(start + 1, end + 1)]))
        else:
            outf = SpanWriter(options.span)
    elif options.output_format == "bedgraph":
        # bed is 0-based, open-closed
        outf = lambda outfile, contig, start, end, val: \
            outfile.write("%s\t%i\t%i\t%i\n" % (contig, start, end, val))

    # initialise counters
    ninput, nskipped, ncontigs = 0, 0, 0

    # set output file name
    output_filename_pattern = options.output_filename_pattern
    if output_filename_pattern:
        output_filename = os.path.abspath(output_filename_pattern)

    # shift and extend or merge pairs. Output temporay bed file
    if options.shift > 0 or options.extend > 0 or options.merge_pairs:
        # Workflow 1: convert to bed intervals and use bedtools
        # genomecov to build a coverage file.
        # Convert to bigwig with UCSC tools bedGraph2BigWig

        if options.merge_pairs:
            # merge pairs using bam2bed
            E.info("merging pairs to temporary file")
            counter = merge_pairs(samfile,
                                  outfile,
                                  min_insert_size=options.min_insert_size,
                                  max_insert_size=options.max_insert_size,
                                  bed_format=3)
            E.info("merging results: {}".format(counter))
            if counter.output == 0:
                raise ValueError("no pairs output after merging")
        else:
            # create bed file with shifted/extended tags
            shift, extend = options.shift, options.extend
            shift_extend = shift + extend
            counter = E.Counter()

            for contig in samfile.references:
                E.debug("output for %s" % contig)
                lcontig = contig_sizes[contig]

                for read in samfile.fetch(contig):
                    pos = read.pos
                    if read.is_reverse:
                        start = max(0, read.pos + read.alen - shift_extend)
                    else:
                        start = max(0, read.pos + shift)

                    # intervals extending beyond contig are removed
                    if start >= lcontig:
                        continue

                    end = min(lcontig, start + extend)
                    outfile.write("%s\t%i\t%i\n" % (contig, start, end))
                    counter.output += 1

        outfile.close()

        if options.scale_method == "reads":
            scale_factor = float(options.scale_base) / counter.output

            E.info("scaling: method=%s scale_quantity=%i scale_factor=%f" %
                   (options.scale_method, counter.output, scale_factor))
            scale = "-scale %f" % scale_factor
        else:
            scale = ""

        # Convert bed file to coverage file (bedgraph)
        tmpfile_bed = os.path.join(tmpdir, "bed")
        E.info("computing coverage")
        # calculate coverage - format is bedgraph
        statement = """bedtools genomecov -bg -i %(tmpfile_wig)s %(scale)s
        -g %(tmpfile_sizes)s > %(tmpfile_bed)s""" % locals()
        E.run(statement)

        # Convert bedgraph to bigwig
        E.info("converting to bigwig")
        tmpfile_sorted = os.path.join(tmpdir, "sorted")
        statement = ("sort -k 1,1 -k2,2n %(tmpfile_bed)s > %(tmpfile_sorted)s;"
                     "bedGraphToBigWig %(tmpfile_sorted)s %(tmpfile_sizes)s "
                     "%(output_filename_pattern)s" % locals())
        E.run(statement)

    else:

        # Workflow 2: use pysam column iterator to build a
        # wig file. Then convert to bigwig of bedgraph file
        # with UCSC tools.
        def column_iter(iterator):
            start = None
            end = 0
            n = None
            for t in iterator:
                if t.pos - end > 1 or n != t.n:
                    if start is not None:
                        yield start, end, n
                    start = t.pos
                    end = t.pos
                    n = t.n
                end = t.pos
            yield start, end, n

        if options.scale_method != "none":
            raise NotImplementedError(
                "scaling not implemented for pileup method")

        # Bedgraph track definition
        if options.output_format == "bedgraph":
            outfile.write("track type=bedGraph\n")

        for contig in samfile.references:
            # if contig != "chrX": continue
            E.debug("output for %s" % contig)
            lcontig = contig_sizes[contig]

            # Write wiggle header
            if options.output_format in ("wiggle", "bigwig"):
                outfile.write("variableStep chrom=%s span=%i\n" %
                              (contig, options.span))

            # Generate pileup per contig using pysam and iterate over columns
            for start, end, val in column_iter(samfile.pileup(contig)):
                # patch: there was a problem with bam files and reads
                # overextending at the end. These are usually Ns, but
                # need to check as otherwise wigToBigWig fails.
                if lcontig <= end:
                    E.warn("read extending beyond contig: %s: %i > %i" %
                           (contig, end, lcontig))
                    end = lcontig
                    if start >= end:
                        continue

                if val > 0:
                    outf(outfile, contig, start, end, val)
            ncontigs += 1

        # Close output file
        if type(outf) == type(SpanWriter):
            outf.flush(outfile)
        else:
            outfile.flush()

        E.info("finished output")

        # Report counters
        E.info("ninput=%i, ncontigs=%i, nskipped=%i" %
               (ninput, ncontigs, nskipped))

        # Convert to binary formats
        if options.output_format == "bigwig":
            outfile.close()

            E.info("starting %s conversion" % executable)
            try:
                retcode = subprocess.call(" ".join(
                    (executable, tmpfile_wig, tmpfile_sizes,
                     output_filename_pattern)),
                                          shell=True)
                if retcode != 0:
                    E.warn("%s terminated with signal: %i" %
                           (executable, -retcode))
                    return -retcode
            except OSError as msg:
                E.warn("Error while executing bigwig: %s" % msg)
                return 1
            E.info("finished bigwig conversion")
        else:
            with open(tmpfile_wig) as inf:
                sys.stdout.write(inf.read())

    # Cleanup temp files
    shutil.rmtree(tmpdir)

    E.stop()

Example #6

Show file

File: PipelineIDR.py Project: tw7649116/cgat-flow

    def getRunStatement(self, infile, outfile, controlfile):
        """
        Generate a specific run statement for each peakcaller class
        """
        # select location of the spp script to run
        if self.PARAMS_PEAKCALLER["spp_options_idr_script"] == "default":
            executable = iotools.which("run_spp.R")
        elif self.PARAMS_PEAKCALLER["spp_options_idr_script"] == "nodups":
            executable = iotools.which("run_spp_nodups.R")
        else:
            executable = self.PARAMS_PEAKCALLER["spp_options_idr_script"]
            try:
                os.path.exists(executable)
            except:
                raise IOError("SPP script not found: %s" % executable)

        # select the threshold for lax peak calling
        if self.PARAMS_PEAKCALLER["spp_options_npeaks"]:
            if self.PARAMS_PEAKCALLER["spp_options_fdr"]:
                raise Exception("Value specified for both SPP options"
                                " -npeaks and -fdr please select one or"
                                " other option, but not both")
            else:
                threshold = "-npeaks=" + \
                    str(self.PARAMS_PEAKCALLER["spp_options_npeaks"])
        elif self.PARAMS_PEAKCALLER["spp_options_fdr"]:
            threshold = "-fdr=" + \
                str(self.PARAMS_PEAKCALLER["spp_options_fdr"])
        else:
            raise Exception("Must specify a value for either"
                            " spp_options_npeaks or spp_options_fdr,"
                            " but not both")

        # build run statement for spp.
        # -savn is output.npeak.file (passed as NULL,
        #                             means filename based on infile)
        # -out is output.result.file
        # -odir defaults to os.path.dirname( infile )
        # -savn is save narrowpeak file
        # -savr is save regionpeak file
        #  (run_spp.R script throws an error if region peak is not output).
        statement = [("Rscript %(executable)s"
                      " -c=%(infile)s"
                      " -i=%(controlfile)s"
                      " %(threshold)s"
                      " -savn"
                      " -savr")]

        # add additional options
        statement.append(self.PARAMS_PEAKCALLER["spp_options_parameters"])

        # specify outfile
        # MM: this was hard-coded to a non-existent directory
        # changed to stats directory
        statement.append(" -rf"
                         " -out=./stats/phantomPeakStatsReps.tab"
                         " >& %(outfile)s")

        statement = (" ".join(statement) % locals())

        return statement