Example #1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    # stop parsing options at the first argument
    parser.disable_interspersed_args()

    (options, args) = E.Start(parser, add_pipe_options=True)

    if len(args) > 0:

        cmd = args[0]
        if len(args) > 1:
            cmd += " '" + "' '".join(args[1:]) + "'"

        s = subprocess.Popen(cmd, shell=True, cwd=os.getcwd(), close_fds=True)

        (out, err) = s.communicate()
        returncode = s.returncode
    else:
        returncode = 0

    E.Stop()

    sys.exit(returncode)
Example #2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    dir2files = {}
    for root, directory, files in os.walk("."):
        dir2files[root] = files

    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S')
    filename = "CWD_%s" % st
    E.info("outputting directory state to %s" % filename)
    with iotools.openFile(filename, "w") as outf:
        outf.write("##contents of cwd on %s\n\n" % st)
        for directory, files in dir2files.items():
            for file in files:
                path = os.path.join(directory, file)
                outf.write(path + "\n")

    # write footer and output benchmark information.
    E.Stop()
Example #3
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version=
        "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("script", "module"),
                      help="type of tests to create [%default].")

    parser.set_defaults(method="script")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if len(args) == 0:
        raise ValueError(
            "setup_test.py requires one or more command line arguments")

    targetdir = os.path.dirname(__file__)

    counter = E.Counter()

    for arg in args:
        counter.input += 1
        script_dirname, basename = os.path.split(arg)

        dirname = os.path.join(targetdir, basename)

        if os.path.exists(dirname):
            E.warn("%s already exists - skipping" % basename)
            counter.skipped += 1
            continue

        os.mkdir(dirname)

        with open(os.path.join(dirname, "tests.yaml"), "w") as outf:
            outf.write(YAML_TEMPLATE)

        counter.created += 1

    E.info("%s" % str(counter))

    # write footer and output benchmark information.
    E.Stop()
Example #4
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-n",
                      "--dry-run",
                      dest="dry_run",
                      action="store_true",
                      help="dry run, do not delete any files [%default]")

    parser.set_defaults(dry_run=False)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    filenames = args

    c = E.Counter()
    for filename in filenames:
        c.checked += 1
        if os.path.exists(filename + ".log"):
            if iotools.isComplete(filename + ".log"):
                c.complete += 1
                continue

        if iotools.isComplete(filename):
            c.complete += 1
            continue

        c.incomplete += 1
        E.info('deleting %s' % filename)
        if options.dry_run:
            continue
        os.unlink(filename)
        c.deleted += 1

    E.info(c)

    # write footer and output benchmark information.
    E.Stop()
Example #5
0
def main(argv=None):

    parser = farm.getOptionParser()

    (options, args) = E.Start(parser, add_cluster_options=True)

    cmd = args[0]
    if len(args) > 1:
        cmd += " '" + "' '".join(args[1:]) + "'"

    cmd = re.sub("%DIR%", "", cmd)
    retcode = subprocess.call(cmd,
                              shell=True,
                              stdin=sys.stdin,
                              stdout=sys.stdout,
                              cwd=os.getcwd(),
                              close_fds=True)
    E.Stop()
Example #6
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-p",
                      "--pattern-identifier",
                      dest="pattern",
                      type="string",
                      help="jobs matching `pattern` in their job "
                      "description will be killed [default=%default].")

    parser.add_option("-n",
                      "--dry-run",
                      dest="dry_run",
                      action="store_true",
                      help="do dry run, do not kill [default=%default].")

    parser.set_defaults(
        pattern=None,
        dry_run=False,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    output = StringIO.StringIO(
        subprocess.Popen(["qstat", "-xml"],
                         stdout=subprocess.PIPE).communicate()[0])

    tree = xml.etree.ElementTree.ElementTree(file=output)

    ntested = 0
    to_kill = set()

    if options.pattern:
        pattern = re.compile(options.pattern)
    else:
        pattern = None

    for x in tree.getiterator("job_list"):
        ntested += 1
        id = x.find("JB_job_number").text
        name = x.find("JB_name").text
        if pattern and pattern.search(name):
            to_kill.add(id)

    nkilled = len(to_kill)
    if not options.dry_run:
        p = subprocess.Popen(["qdel", ",".join(to_kill)],
                             stdout=subprocess.PIPE)
        stdout, stderr = p.communicate()

    E.info("ntested=%i, nkilled=%i" % (ntested, nkilled))

    # write footer and output benchmark information.
    E.Stop()
Example #7
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-g",
        "--glob",
        dest="glob_pattern",
        type="string",
        help="glob pattern to use for collecting cluster jobs descriptions "
        "[%default]")

    parser.add_option(
        "-i",
        "--input-pattern",
        dest="input_pattern",
        type="string",
        help="regular expression to extract job id from filename [%default].")

    parser.add_option(
        "-o",
        "--output-filename-pattern",
        dest="output_pattern",
        type="string",
        help="string to convert a job id to a filename [%default].")

    parser.set_defaults(
        glob_pattern="job*.qsub",
        input_pattern="(\S+).qsub",
        output_pattern="%s.stdout",
        remove_old=True,
        force=False,
        check_completeness="python",
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if args:
        filenames = args
    elif options.glob_pattern:
        filenames = glob.glob(options.glob_pattern)

    ninput, nrun, nskipped, nerrors = 0, 0, 0, 0
    ndeleted = 0

    if options.check_completeness == "python":
        isComplete = checkPythonRuns

    ##############################################################
    ##############################################################
    ##############################################################
    # decide what to do
    ##############################################################
    jobs = []
    files_to_delete = []

    for filename in filenames:

        ninput += 1
        try:
            job_name = re.search(options.input_pattern, filename).groups()[0]
        except AttributeError:
            options.stderr.write(
                "# could not extract invariant job name from %s\n" % filename)
            nerrors += 1
            continue

        result_filename = options.output_pattern % job_name

        do = False
        status = "up-to-date"

        if options.force:
            status = "force"
            do = True

        if not do:
            if os.path.exists(result_filename):
                if isNewer(filename, result_filename):
                    status = "newer"
                    do = True
                    if options.remove_old:
                        files_to_delete.append(result_filename)
                if not do and not isComplete(result_filename):
                    status = "incomplete"
                    do = True
                    if options.remove_old:
                        files_to_delete.append(result_filename)
            else:
                status = "missing"
                do = True

        E.info("%s->%s (%s)\n" % (filename, result_filename, status))

        if not do:
            nskipped += 1
            continue

        jobs.append(filename)

    ##############################################################
    ##############################################################
    ##############################################################
    # delete old files
    ##############################################################
    for filename in files_to_delete:
        if os.path.exists(filename):
            os.remove(filename)
            ndeleted += 1

    ##############################################################
    ##############################################################
    ##############################################################
    # start jobs
    ##############################################################
    for filename in jobs:

        cmd = "qsub %s" % filename
        try:
            retcode = subprocess.call(cmd, shell=True)
            if retcode != 0:
                if options.loglevel >= 1:
                    options.stdlog.write("# ERROR: failed to execute %s\n" %
                                         cmd)
                nerrors += 1
                continue
        except OSError as e:
            if options.loglevel >= 1:
                options.stdlog.write(
                    "# ERROR: failed to execute %s with msg %s\n" % (cmd, e))
        nrun += 1

    E.info("ninput=%i, nrun=%i, nskipped=%i, ndeleted=%i, nerrors=%i" %
           (ninput, nrun, nskipped, ndeleted, nerrors))

    E.Stop()
Example #8
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version=
        "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-o",
                      "--output-prefix",
                      dest="output_prefix",
                      type="string",
                      help="output filename prefix [default=%default].")

    parser.add_option(
        "-c",
        "--chromosome-table",
        dest="filename_chromosome_table",
        type="string",
        help=
        "filename with tab separated list of chromosome names [default=%default]."
    )

    parser.add_option("--action",
                      dest="action",
                      type="choice",
                      choices=("plot", "run"),
                      help="action to perform [default=%default]")

    parser.add_option(
        "-s",
        "--signal-value",
        dest="signal_value",
        type="string",
        help=
        "use either p.value or sig.value as ranking measure [default=%default]"
    )

    parser.add_option(
        "-r",
        "--overlap-ratio",
        dest="overlap_ratio",
        type="int",
        help=
        "a value between 0 and 1 that controls how much two peaks have to overlap to be called as the same [default=%default]"
    )

    parser.set_defaults(
        action="plot",
        output_prefix="output",
        half_width=None,
        overlap_ratio=0,
        is_broadpeak=False,
        signal_value="signal.value",
        filename_chromosome_table="genome_table.txt",
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if options.action == "plot":
        plotIDR(options.output_prefix + ".pdf", args)
    elif options.action == "run":
        if len(args) != 2:
            raise ValueError("require exactly two replicates")
        runIDR(options, args[0], args[1])

    # write footer and output benchmark information.
    E.Stop()
Example #9
0
def main(argv=sys.argv):

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-l", "--logfile", dest="logfile", type="string",
                      help="name of logfile [default=%default]")

    parser.add_option("-t", "--time", dest="time", type="choice",
                      choices=("seconds", "milliseconds"),
                      help="time to show [default=%default]")

    parser.add_option(
        "--no-reset", dest="reset", action="store_false",
        help="do not reset counters when a new pipeline run started "
        "The default is to reset so that only the counts from the latest "
        "pipeline execution are show "
        "[default=%default]")

    parser.add_option(
        "-f", "--filter-method", dest="filter", type="choice",
        choices=("unfinished", "running", "completed", "all"),
        help="apply filter to output [default=%default]")

    parser.add_option(
        "-i", "--ignore-errors", dest="ignore_errors", action="store_true",
        help="ignore errors [default=%default]")

    parser.set_defaults(sections=[],
                        logfile="pipeline.log",
                        filter="all",
                        reset=True,
                        time="seconds")

    (options, args) = E.Start(parser, argv)

    rx = re.compile("^[0-9]+")

    if options.sections:
        profile_sections = options.sections
    else:
        profile_sections = ("task", "job")

    counts = {}
    for section in profile_sections:
        counts[section] = collections.defaultdict(Counter)

    rootpath = os.path.abspath(".")

    infile = iotools.openFile(options.logfile)

    for line in infile:
        if not rx.match(line):
            continue
        data = line[:-1].split()
        if len(data) < 5:
            continue
        date, time, level, source = data[:4]

        if re.search("output generated by", line):
            if options.reset:
                E.info("resetting counts at line=%s" % line[:-1])
                for section in profile_sections:
                    counts[section] = collections.defaultdict(Counter)
            continue

        if not re.match("task\.", source):
            continue

        dt = datetime.datetime.strptime(
            " ".join((date, time)), "%Y-%m-%d %H:%M:%S,%f")

        msg = "".join(data[4:])

        started_task, completed_task, started_job, completed_job = \
            (None, None, None, None)

        if re.search("task.log_at_level.\d+Task=(\S+)", msg):
            checked_task = re.search(
                "task.log_at_level.\d+Task=(\S+)", msg).groups()[0]
        elif re.search("Job=\[(\S+)->(\S+)\]Missingfile[s]*\[(\S+)\]", msg):
            started_infiles, started_job, missing = re.search(
                "Job=\[(\S+)->(\S+)\]Missingfile[s]*\[(\S+)\]", msg).groups()
        elif re.search("Job=\[(\S+)->(\S+)\]Missingfile[s]*", msg):
            started_infiles, started_job = re.search(
                "Job=\[(\S+)->(\S+)\]Missingfile[s]*", msg).groups()
        elif re.search("Job=\[(\S+)->(\S+)\]\s*\.\.\.", msg):
            # multi-line log messages
            started_infiles, started_job = re.search(
                "Job=\[(\S+)->(\S+)\]\s*\.\.\.", msg).groups()
        elif re.search("Taskentersqueue=(\S+)", msg):
            started_task = re.search("Taskentersqueue=(\S+)", msg).groups()[0]
        elif re.search("Job=\[(\S+)->(\S+)\]completed", msg):
            completed_infiles, completed_job = re.search(
                "Job=\[(\S+)->(\S+)\]completed", msg).groups()
        elif re.search("CompletedTask=(\S+)", msg):
            completed_task = re.search("CompletedTask=(\S+)", msg).groups()[0]
        elif re.search("UptodateTask=(\S+)", msg):
            completed_task = re.search("UptodateTask=(\S+)", msg).groups()[0]
        else:
            continue

        try:
            if started_task:
                counts["task"][started_task].add(True, dt, started_task)
            elif completed_task:
                counts["task"][completed_task].add(False, dt, completed_task)
            elif started_job:
                counts["job"][started_job].add(True, dt, started_job)
            elif completed_job:
                counts["job"][completed_job].add(False, dt, completed_job)
            else:
                raise ValueError("unknown action")
        except ValueError as msg:
            if not options.ignore_errors:
                raise ValueError(str(msg) + "\nat line %s" % line)

    if options.time == "milliseconds":
        f = lambda d: d.seconds + d.microseconds / 1000
    elif options.time == "seconds":
        f = lambda d: d.seconds + d.microseconds / 1000000

    for section in profile_sections:
        options.stdout.write("\t".join(
            ("section", "object", "ncalls",
             "duration", "percall", "running")) + "\n")

        running = []
        for objct, c in counts[section].items():

            # apply filters
            if options.filter in ("unfinished", "running") and c.running == 0:
                continue

            d = f(c.duration)
            if c.calls > 0:
                percall = "%6.3f" % (d / float(c.calls))
            else:
                percall = "na"

            options.stdout.write("\t".join(
                (list(map(str,
                          (section, objct,
                           c.calls,
                           d,
                           percall,
                           c.running,
                           ))))) + "\n")

            running.extend([x for x, y in c._started.items() if y != 0])

        options.stdout.write("#//\n\n")

        if running:
            options.stdout.write("# running %ss\n" % section)
            options.stdout.write("\n".join(map(str, running)) + "\n")
            options.stdout.write("#//\n\n")

    E.Stop()
Example #10
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-g",
        "--glob",
        dest="glob_pattern",
        type="string",
        help="glob pattern to use for collecting files [%default].")

    parser.add_option(
        "-f",
        "--file-pattern",
        dest="file_pattern",
        type="string",
        help="only check files matching this pattern [%default].")

    parser.add_option("-m",
                      "--mode",
                      dest="mode",
                      type="choice",
                      choices=("file", "node"),
                      help="analysis mode [%default].")

    parser.add_option(
        "-r",
        "--recursive",
        action="store_true",
        help="recursively look for logfiles from current directory "
        "[%default].")

    parser.set_defaults(
        truncate_sites_list=0,
        glob_pattern="*.log",
        mode="file",
        recursive=False,
    )

    (options, args) = E.Start(parser)

    if args:
        filenames = args
    elif options.glob_pattern:
        filenames = glob.glob(options.glob_pattern)

    if len(filenames) == 0:
        raise ValueError("no files to analyse")

    if options.mode == "file":
        totals = Logfile.LogFileData()

        options.stdout.write("file\t%s\n" % totals.getHeader())

        for filename in filenames:
            if filename == "-":
                infile = sys.stdin
            elif filename[-3:] == ".gz":
                infile = gzip.open(filename, "r")
            else:
                infile = open(filename, "r")

            subtotals = Logfile.LogFileData()
            for line in infile:
                subtotals.add(line)

            infile.close()

            options.stdout.write("%s\t%s\n" % (filename, str(subtotals)))
            totals += subtotals

        options.stdout.write("%s\t%s\n" % ("total", str(totals)))

    elif options.mode == "node":

        chunks_per_node = {}

        rx_node = re.compile("# job started at .* \d+ on (\S+)")

        for filename in filenames:
            if filename == "-":
                infile = sys.stdin
            elif filename[-3:] == ".gz":
                infile = gzip.open(filename, "r")
            else:
                infile = open(filename, "r")

            data = Logfile.LogFileDataLines()

            for line in infile:

                if rx_node.match(line):
                    node_id = rx_node.match(line).groups()[0]
                    data = Logfile.LogFileDataLines()
                    if node_id not in chunks_per_node:
                        chunks_per_node[node_id] = []
                    chunks_per_node[node_id].append(data)
                    continue

                data.add(line)

        options.stdout.write("node\t%s\n" % data.getHeader())
        total = Logfile.LogFileDataLines()

        for node, data in sorted(chunks_per_node.items()):
            subtotal = Logfile.LogFileDataLines()
            for d in data:
                # options.stdout.write( "%s\t%s\n" % (node, str(d) ) )
                subtotal += d

            options.stdout.write("%s\t%s\n" % (node, str(subtotal)))

            total += subtotal

        options.stdout.write("%s\t%s\n" % ("total", str(total)))

    E.Stop()
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id",
                            usage=globals()["__doc__"])

    parser.add_option("-p",
                      "--path",
                      dest="path",
                      type="string",
                      help="path to scan for files [%default]")

    parser.add_option("-d",
                      "--destination",
                      dest="destination",
                      type="string",
                      help="path to deposit files into [%defaul]")

    parser.set_defaults(path='/ifs/projects/sftp',
                        url='http://www.cgat.org/downloads/',
                        dest='/ifs/projects/overview')

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    statement = "find %s -name 'index.html'" % options.path

    process = subprocess.Popen(statement,
                               shell=True,
                               stdin=subprocess.PIPE,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)

    stdout, stderr = process.communicate()

    files = stdout.split('\n')
    files.sort()

    outfile = iotools.openFile(os.path.join(options.dest, "index.html"), "w")

    outfile.write('''
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>cgat project reports</title>
    <link rel="stylesheet" href="cgat.css" type="text/css" />
    <link rel="stylesheet" href="pygments.css" type="text/css" />
    <link rel="shortcut icon" href="http://cgatwiki.anat.ox.ac.uk/favicon.ico">
    <script type="text/javascript" src="sorttable.js"></script>
</head>

  <body>
    <div class="related">
      <h3>Navigation</h3>
      <ul>
        <li><a href="index.html">cgat Projects Overview</a> &raquo;</li>
      </ul>
    </div>

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          <div class="body">
 <div class="section" id="cgat-pipelines">
<H1>cgat exported project pages</H1>

<p> 
This page is for internal use only. Do not distribute outside of
cgat and do not make this page available on the world wide web.
</p>

<table class="sortable">\n''')

    outfile.write(
        '''<tr><th>Project</th><th>Report</th><th>Title</th></tr>\n''')

    for f in files:
        if f == '':
            continue

        proj = re.search('(proj\d+)', f).groups()[0]
        relpath = re.sub('.*proj\d+/', '', f)
        report = re.sub('^[^/]*/', '', os.path.dirname(relpath))

        lines = iotools.openFile(f).readlines()
        titles = [x for x in lines if "<title>" in x]
        if titles:
            title = re.search("<title>(.*)</title>", titles[0]).groups()[0]
        else:
            title = "NA"

        if title.endswith("documentation"):
            title = title[:-len("documentation")]

        url = os.path.join(options.url, relpath)
        outfile.write(
            '<tr><td>%(proj)s</td><td><a HREF="%(url)s">%(report)s</td><td>%(title)s</td></tr>\n'
            % locals())

    outfile.write('''
</table>

</div>
</div>


          </div>
        </div>
      </div>
      <div class="sphinxsidebar">
        <div class="sphinxsidebarwrapper">
            <p class="logo"><a href="contents.html">
              <img class="logo" src="cgat_logo.png" alt="Logo"/>
            </a></p>





</body>
</html>\n''')

    outfile.close()

    E.info('created output file %s' % outfile.name)
    # write footer and output benchmark information.
    E.Stop()
Example #12
0
def main(argv=None):

    parser = getOptionParser()

    (options, args) = E.Start(parser, add_cluster_options=True)

    if len(args) == 0:
        raise ValueError(
            "command line argument missing - see usage information")

    options.renumber_column = [x.split(":") for x in options.renumber_column]

    cmd = args[0]
    if len(args) > 1:
        cmd += " '" + "' '".join(args[1:]) + "'"

    if options.dry_run:

        cmd = re.sub("%DIR%", "", cmd)
        retcode = subprocess.call(cmd,
                                  shell=True,
                                  stdin=sys.stdin,
                                  stdout=sys.stdout,
                                  cwd=os.getcwd(),
                                  close_fds=True)
        E.Stop()
        sys.exit(0)

    failed_requests = []
    started_requests = []
    niterations = 0

    if not options.collect:
        tmpdir = os.path.abspath(tempfile.mkdtemp(dir=options.tmpdir))

        E.info(" working in directory %s" % tmpdir)

        if options.split_at_lines:
            chunk_iterator = chunk_iterator_lines
            args = (options.split_at_lines, )
        elif options.split_at_column:
            chunk_iterator = chunk_iterator_column
            args = (options.split_at_column - 1, options.max_files)
        elif options.split_at_regex:
            chunk_iterator = chunk_iterator_regex_split
            args = (re.compile(options.split_at_regex), 0, options.chunksize,
                    options.max_lines)
        elif options.group_by_regex:
            chunk_iterator = chunk_iterator_regex_group
            args = (re.compile(options.group_by_regex), 0, options.chunksize)
        else:
            raise ValueError("please specify a way to chunk input data")

        data = [(x, cmd, options, None, options.subdirs)
                for x in chunk_iterator(options.stdin,
                                        args,
                                        prefix=tmpdir,
                                        use_header=options.input_header)]

        started_requests = [(x[0], x[0] + ".out") for x in data]

        if len(data) == 0:
            E.warn("no data received")
            E.Stop()
            sys.exit(0)

        if options.method == "multiprocessing":
            pool = Pool(options.cluster_num_jobs)
            results = pool.map(runCommand, data, chunksize=1)
        elif options.method == "drmaa":
            results = []
            runDRMAA(data, environment=options.environment)
        elif options.method == "threads":
            pool = ThreadPool(options.cluster_num_jobs)
            results = pool.map(runCommand, data, chunksize=1)

        niterations = 0
        for retcode, filename, cmd, logfile, iterations in results:
            niterations += iterations
            if not hasFinished(retcode, filename, options.output_tag, logfile):
                failed_requests.append((filename, cmd))

    else:
        tmpdir = options.collect
        started_requests = [(x[:-4], x) for x in glob.glob(tmpdir + "/*.out")]

        E.info("collecting %i files from %s" % (len(started_requests), tmpdir))

    if failed_requests:
        for fn, cmd in failed_requests:
            E.error("failed request: filename= %s, cmd= %s" % (fn, cmd))
    else:
        E.info("building result from %i parts" % len(started_requests))

        if options.renumber:
            mapper = MapperLocal(pattern=options.renumber)
        else:
            mapper = MapperEmpty()

        # deal with stdout
        name = None
        index = None

        for pattern, column in options.renumber_column:

            if re.search(pattern, "stdout"):
                try:
                    index = int(column) - 1
                except ValueError:
                    name = column
                    break

        if options.binary:
            ResultBuilderBinary()(started_requests, options.stdout, options)
        else:
            regex = None
            if options.output_regex_header:
                regex = re.compile(options.output_regex_header)
            ResultBuilder(mapper=mapper,
                          field_index=index,
                          field_name=name,
                          header_regex=regex)(started_requests, options.stdout,
                                              options)

        # deal with logfiles : combine them into a single file
        rr = re.search("'--log=(\S+)'", cmd) or re.search("'--L\s+(\S+)'", cmd)
        if rr:
            E.info("logging output goes to %s" % rr.groups()[0])
            logfile = iotools.openFile(rr.groups()[0], "a")
            ResultBuilderLog()([(x[0], "%s.log" % x[0])
                                for x in started_requests], logfile, options)
            logfile.close()

        # deal with other files
        if options.subdirs:

            files = glob.glob("%s/*.dir/*" % tmpdir)
            # remove directory
            filenames = set([os.path.basename(x) for x in files])
            xx = len(".out")

            for filename in filenames:

                _, filetype = os.path.splitext(filename)

                name = None
                index = None

                for pattern, column in options.renumber_column:
                    if re.search(pattern, filename):
                        try:
                            index = int(column) - 1
                        except ValueError:
                            name = column
                        break

                if options.binary:
                    builder = ResultBuilderBinary(mapper=mapper)
                elif filetype in (".fa", ".fasta"):
                    builder = ResultBuilderFasta(mapper=mapper)
                elif filetype in (".mali", ):
                    builder = ResultBuilderFasta(mapper=MapperEmpty())
                elif filetype in (".psl"):
                    builder = ResultBuilderPSL(mapper=mapper)
                elif filetype in (".gtf", ".gff"):
                    builder = ResultBuilderGFF(mapper=mapper,
                                               field_index=index,
                                               field_name=name)
                elif filetype in (".png"):
                    builder = ResultBuilderCopies(mapper=mapper)
                else:
                    builder = ResultBuilder(mapper=mapper,
                                            field_index=index,
                                            field_name=name)

                E.debug("chose the following builder for %s: %s: %s" %
                        (filename, filetype, str(builder)))

                E.info("collecting results for %s" % filename)

                input_filenames = []
                for fi, fn in started_requests:
                    fn = fn[:-xx] + ".dir/" + filename
                    if os.path.exists(fn):
                        input_filenames.append((fi, fn))

                E.info("output of %i files goes to %s" %
                       (len(filenames), filename))

                outfile = iotools.openFile(options.output_pattern % filename,
                                           "w")
                builder(input_filenames, outfile, options)
                outfile.close()

    if not options.debug and (not options.resume or not options.collect):
        if len(failed_requests) == 0:
            E.info("removing directory %s" % tmpdir)
            shutil.rmtree(tmpdir)
        else:
            E.info("directory %s not removed due to %i failed jobs" %
                   (tmpdir, len(failed_requests)))

    E.info("job control: nstarted=%i, nfinished=%i, nerrors=%i, nrepeats=%i" %
           (len(started_requests), len(started_requests) -
            len(failed_requests), len(failed_requests), niterations))

    E.Stop()
Example #13
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true",
                      help="do dry run, do not kill [default=%default].")

    parser.add_option("-l", "--ignore-links", dest="ignore_links",
                      action="store_true",
                      help="do not zap symbolic links [default=%default].")

    parser.set_defaults(
        dry_run=False,
        ignore_links=False,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    outfile = options.stdout

    fields = ('st_atime', 'st_blksize', 'st_blocks',
              'st_ctime', 'st_dev', 'st_gid', 'st_ino',
              'st_mode', 'st_mtime', 'st_nlink',
              'st_rdev', 'st_size', 'st_uid')

    outfile.write("filename\tlinkdest\t%s\n" % "\t".join(fields))

    # remove any duplicates and sort
    args = sorted(set(args))

    for fn in args:

        # stat follows times to links
        original = os.stat(fn)

        if os.path.islink(fn):
            if not options.ignore_links:
                linkdest = os.readlink(fn)
                E.info('breaking link from %s to %s' % (fn, linkdest))
                if not options.dry_run:
                    os.unlink(fn)
                    f = open(fn, "w")
                    f.close()
        else:
            E.info('truncating file %s' % fn)
            linkdest = ""
            if not options.dry_run:
                f = open(fn, "w")
                f.truncate()
                f.close()

        outfile.write("%s\t%s\t%s\n" % (
            fn,
            linkdest,
            "\t".join([str(getattr(original, x)) for x in fields])))

        if not options.dry_run:
            # Set original times
            os.utime(fn, (original.st_atime, original.st_mtime))
            os.chmod(fn, original.st_mode)

    # write footer and output benchmark information.
    E.Stop()
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-s",
        "--scratch-dir",
        dest="scratchdir",
        type="string",
        help="the scratch directory on the nodes [default=%default].")

    parser.add_option(
        "-c",
        "--collection",
        dest="collection",
        type="string",
        help="files will be put into collection. This is a directory that "
        "will be created just below the scratch directory [default=%default].")

    parser.set_defaults(
        scratchdir="/scratch",
        collection="",
        nodes=[],
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if len(args) == 0:
        raise ValueError("please specify a collection of files/directories "
                         "that should be mirrored.")

    targetdir = os.path.join(options.scratchdir, options.collection)

    nodes = getNodes(options.nodes)

    E.info("copying to %s on nodes %s" % (targetdir, ",".join(nodes)))

    ninput, noutput, nskipped = 0, 0, 0

    filenames = " ".join(args)

    for node in nodes:
        E.info("copying to node %s" % node)
        ninput += 1
        statement = '''
               ssh %(node)s mkdir %(targetdir)s >& /dev/null;
               rsync --progress -az %(filenames)s %(node)s:%(targetdir)s
        ''' % locals()
        E.run(statement)
        noutput += 1

    E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped))

    E.Stop()
Example #15
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-g",
                      "--gtf",
                      dest="gtf",
                      type="string",
                      help="path to input gtf")

    parser.add_option("-p",
                      "--gtfpath",
                      dest="gtfpath",
                      type="string",
                      help="path to online gtf")

    parser.add_option("-c",
                      "--remove-contigs",
                      dest="remove_contigs",
                      type="string",
                      help="contigs to remove, delimited by |")

    parser.add_option("-k",
                      "--keep-contigs",
                      dest="keep_contigs",
                      type="string",
                      help="""all contigs to keep, delimited by |.  Contigs
                      specified in --remove-contigs will still be removed""")

    parser.add_option("-o",
                      "--outfile",
                      dest="outfile",
                      type="string",
                      help="path to processed output gtf")

    parser.add_option("-f",
                      "--filter",
                      dest="filters",
                      type="string",
                      action="append",
                      help="""List of filters to apply to your GTF""")

    parser.set_defaults(
        remove_contigs=None,
        keep_contigs=None,
    )

    (options, args) = E.Start(parser)

    if options.gtf:
        gtf = options.gtf
    elif options.gtfpath:
        getGTF(options.gtfpath)
        gtf = options.gtfpath.split("/")[-1]
    else:
        raise ValueError("Please provide a GTF or the path to an online GTF")

    if not options.outfile:
        raise ValueError("Please provide an output file name")

    d = 0
    if options.remove_contigs or options.keep_contigs:
        d += 1
        statement = 'zcat %s |' % gtf

        if options.remove_contigs:
            statement += removeNamedContigs(options.remove_contigs)

        if options.keep_contigs:
            statement += keepOnlyNamedContigs(options.keep_contigs)

        if options.outfile.endswith(".gz"):
            outfile = options.outfile
        else:
            outfile = options.outfile + ".gz"

        statement += "gzip > %s " % outfile

        os.system(statement)

    T1 = gtf
    if options.filters:
        d += 1
        for filterstring in options.filters:
            T2 = P.getTempFilename(".")
            T2 = T2 + ".gtf"
            filterGTF(T1, filterstring, T2)
            T1 = T2

        shutil.move(T2, options.outfile)

    if d == 0:
        raise ValueError("No filters provided")
Example #16
0
def main(argv=None):

    # Parse the options
    parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-p", "--params", dest="params", type="string",
                      help="comma separated list of addtional parameter strings")

    parser.add_option("-m", "--module", dest="module", type="string",
                      help="the full path to the module file", default=None)

    parser.add_option("-i", "--input", dest="input_filenames", type="string", action="append",
                      help="input filename")

    parser.add_option("-o", "--output-section", dest="output_filenames", type="string", action="append",
                      help="output filename")

    parser.add_option("-f", "--function", dest="function", type="string",
                      help="the module function", default=None)

    parser.set_defaults(
        input_filenames=[],
        output_filenames=[],
        params=None
    )

    (options, args) = E.Start(parser)

    # Check a module and function have been specified
    if not options.module or not options.function:
        raise ValueError("Both a function and Module must be specified")

    # If a full path was given, add this path to the system path
    location = os.path.dirname(options.module)
    if location != "":
        sys.path.append(location)

    # Establish the module name, accomodating cases where the
    # .py extension has been included in the module name
    module_name = os.path.basename(options.module)
    if module_name.endswith(".py"):
        module_base_name = module_name[:-3]
    else:
        module_base_name = module_name

    # Import the specified module and map the specified fuction
    E.info("importing module '%s' " % module_base_name)
    E.debug("sys.path is: %s" % sys.path)

    module = importlib.import_module(module_base_name)
    try:
        function = getattr(module, options.function)
    except AttributeError as msg:
        raise AttributeError(msg.message + "unknown function, available functions are: %s" %
                             ",".join([x for x in dir(module) if not x.startswith("_")]))

    if options.input_filenames and not options.input_filenames == ["None"]:
        infiles = options.input_filenames
    else:
        infiles = False

    if options.output_filenames and not options.output_filenames == ["None"]:
        outfiles = options.output_filenames
    else:
        outfiles = False

    # Parse the parameters into an array
    if options.params:
        params = [param.strip() for param in options.params.split(",")]
    else:
        params = False

    # deal with single file case
    if infiles and len(infiles) == 1:
        infiles = infiles[0]
    if outfiles and len(outfiles) == 1:
        outfiles = outfiles[0]

    # Make the function call
    if infiles and outfiles and params:
        function(infiles, outfiles, params)
    elif infiles and outfiles and not params:
        function(infiles, outfiles)
    elif params:
        function(params)
    else:
        raise ValueError(
            "Expecting infile+outfile+params or infile+outfile or params")

    E.Stop()
Example #17
0
def main(argv=None):

    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option("-o", "--output_file", type="string", default=None,
                      help="[Optional] Filename to output results to. [default=STDOUT]")
    parser.add_option("-u", "--url", type="string", default="http://www.cbioportal.org/public-portal/webservice.do",
                      help="[Optional] Url to the cBioPortal webservice [default=%default]")

    cqueryopts = optparse.OptionGroup(
        parser, "Common parameters", "Common arguments to the query")
    cqueryopts.add_option("-s", "--study_id", dest="study_id", type="string", default=None,
                          help="[Required/OPtional]  cBioPortal ID for study [default=%default].\n This or study_name required for: getGeneticProfiles, getCaseLists, getProteinArrayInfo, getLink,getOncoprintHTML, getPercentAltered, getTotalAltered")
    cqueryopts.add_option("-n", "--study_name", dest="study_name", type="string", default=None,
                          help="[Required/Optional] cBioPortal Name for study [defualt=%default].\n See above for which commands require this.")
    cqueryopts.add_option("-c", "--case_set_id", dest="case_set_id", type="string", default=None,
                          help="[Required for some] cBioPortal case_set_id specifying the case list to use.\nRequired for getProfileData, getMutationData, getClincalData, getProteinArrayData, getPercentAltered, getTotalAltered. Default is case_set_id for case list 'All Tumours' ")
    cqueryopts.add_option("-g", "--gene_list", dest="gene_list", type="string", default=None,
                          help="[Required for some] Comma seperated list of HUGO gene symbols or Entrez gene IDs.\nRequired for getProfileData, getMutationData, getLink, getOncoprintHTML")
    cqueryopts.add_option("-f", "--gene_list_file", dest="gene_list_file", type="string", default=None,
                          help="[Optional] Filename to read in gene_list from")
    cqueryopts.add_option("-p", "--profile_id", dest="profile_id", type="string",
                          help="[Optional] Comma seperated list of cBioPortal genetic_profile_ids. If none are specified then the list of profiles for the study where display in analysis is True is used.")

    squeryopts = optparse.OptionGroup(
        parser, "Query specific parameters", "Arguments specific to a particular query")
    squeryopts.add_option("--protein_array_type", dest="protein_array_type", type="string", default="protein_level",
                          help="[Optional] Either protein_level or phosphorylation [default=%default]")
    squeryopts.add_option("--protein_array_id", dest="protein_array_id", type="string",
                          help="[Required for some] comma seperated list of one or more protein array IDs")
    squeryopts.add_option("--array_info", dest="protein_array_info", type="int",  default=0,
                          help="[Optional] If 1, antibody infomation will also be exported in a getProteinArrayData query [default=%default]")
    squeryopts.add_option("--output-report", dest="report", type="string", default="full",
                          help="[Optional] Report type to display for getLink. Either full or oncoprint_html [default=%default] ")
    squeryopts.add_option("--threshold", dest="threshold", type="int", default=2,
                          help="[Optional] Threshold for deciding if an alteration is significant for continuous metrics [default=%default]")

    parser.add_option_group(cqueryopts)
    parser.add_option_group(squeryopts)

    (options, args) = E.Start(
        parser, add_pipe_options=False, add_output_options=False, argv=argv)

    portal = CBioPortal(url=options.url, study=options.study_id,
                        study_name=options.study_name, case_list_id=options.case_set_id)

    results = []

    if options.gene_list_file:
        infile = iotools.open_file(options.gene_list_file)
        gene_list = [x.strip() for x in infile]
    elif options.gene_list:
        gene_list = options.gene_list.split(",")

    if options.profile_id:
        profile_id = options.profile_id.split(",")
    else:
        profile_id = None

    if "getCancerStudies" in args:
        results.append(portal.getCancerStudies())

    if "getGeneticProfiles" in args:
        results.append(portal.getGeneticProfiles())

    if "getCaseLists" in args:
        results.append(portal.getCaseLists())

    if "getProfileData" in args:
        results.append(
            portal.getProfileData(gene_list=gene_list,
                                  genetic_profile_id=profile_id))

    if "getMutationData" in args:
        results.append(
            portal.getMutationData(gene_list=gene_list,
                                   genetic_profile_id=profile_id))

    if "getClinicalData" in args:
        results.append(portal.getClinicalData())

    if "getProteinArrayInfo" in args:
        results.append(portal.getProteinArrayInfo(
            gene_list=gene_list,
            protein_array_type=options.protein_array_type))

    if "getProteinArrayData" in args:
        results.append(portal.getProteinArrayData(
            protein_array_id=options.protein_array_id,
            array_info=options.array_info))

    if "getPercentAltered" in args:
        results.append(portal.getPercentAltered(
            gene_list=gene_list, genetic_profile_id=profile_id,
            threshold=options.threshold))

    if "getLink" in args:
        results.append(
            portal.getLink(gene_list=gene_list, report=options.report))

    if "getOncoprintHTML" in args:
        results.append(portal.getOncoprintHTML(gene_list=gene_list))

    if len(results) == 0:
        sys.stderr.write("No recognised query commands provided")
        sys.exit()

    if options.output_file:
        outf = iotools.open_file(options.output_file, "w")
    else:
        outf = sys.stdout

    for result in results:
        try:
            outf.write(tableToString(result))
        except:
            outf.write(result)

    E.Stop()
Example #18
0
'''

TEMPLATE_PIPELINEMODULE = '''
.. automodule:: %(prefix)s
   :members:
   :show-inheritance:
'''

import glob
import os
import cgatcore.experiment as E

if __name__ == "__main__":

    E.Start()

    dirs = (("../scripts/*.py", TEMPLATE_SCRIPT,
             'scripts'), ("../cgat/*.py", TEMPLATE_MODULE, 'modules'),
            ("../cgatPipelines/pipeline*.py", TEMPLATE_PIPELINE,
             'pipelines'), ("../cgatPipelines/[A-Z]*.py",
                            TEMPLATE_PIPELINEMODULE, 'pipelinemodules'))

    ncreated, nskipped = 0, 0

    for glob_expression, template, dest in dirs:

        if not os.path.exists(dest):
            os.mkdir(dest)

        files = glob.glob(os.path.abspath(glob_expression))