Python Experiment.GetFooter Examples

Programming Language: Python

Namespace/Package Name: CGAT

Class/Type: Experiment

Method/Function: GetFooter

Examples at hotexamples.com: 4

Python Experiment.GetFooter - 4 examples found. These are the top rated real world Python examples of CGAT.Experiment.GetFooter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Counter(30)

GetHeader(6)

GetFooter(4)

Example #1

Show file

    print E.GetParams()

    if param_trans:
        parser = PredictionParser.PredictionParserBlatTrans()
    else:
        parser = PredictionParser.PredictionParserBlatCDNA()

    nmatches = 1
    for line in sys.stdin:
        if line[0] == "#": continue
        if not re.match("^[0-9]", line): continue

        try:
            entries = parser.Parse((line, ))
        except PredictionParser.AlignmentError, e:
            print "# %s" % str(e)
            print "#", line[:-1]
            sys.exit(1)

        for entry in entries:
            entry.mPredictionId = nmatches
            nmatches += 1

        print str(entries)

    print E.GetFooter()


if __name__ == "__main__":
    sys.exit(main(sys.argv))

Example #2

Show file

File: Control.py Project: cdrakesmith/CGATPipelines

def main(args=sys.argv):
    """command line control function for a pipeline.

    This method defines command line options for the pipeline and
    updates the global configuration dictionary correspondingly.

    It then provides a command parser to execute particular tasks
    using the ruffus pipeline control functions. See the generated
    command line help for usage.

    To use it, add::

        import CGAT.Pipeline as P

        if __name__ == "__main__":
            sys.exit(P.main(sys.argv))

    to your pipeline script.

    Arguments
    ---------
    args : list
        List of command line arguments.

    """

    global GLOBAL_OPTIONS
    global GLOBAL_ARGS

    parser = E.OptionParser(version="%prog version: $Id$", usage=USAGE)

    parser.add_option("--pipeline-action",
                      dest="pipeline_action",
                      type="choice",
                      choices=("make", "show", "plot", "dump", "config",
                               "clone", "check", "regenerate", "printconfig"),
                      help="action to take [default=%default].")

    parser.add_option("--pipeline-format",
                      dest="pipeline_format",
                      type="choice",
                      choices=("dot", "jpg", "svg", "ps", "png"),
                      help="pipeline format [default=%default].")

    parser.add_option("-n",
                      "--dry-run",
                      dest="dry_run",
                      action="store_true",
                      help="perform a dry run (do not execute any shell "
                      "commands) [default=%default].")

    parser.add_option("-f",
                      "--force-output",
                      dest="force",
                      action="store_true",
                      help="force running the pipeline even if there "
                      "are uncommited changes "
                      "in the repository [default=%default].")

    parser.add_option("-p",
                      "--multiprocess",
                      dest="multiprocess",
                      type="int",
                      help="number of parallel processes to use on "
                      "submit host "
                      "(different from number of jobs to use for "
                      "cluster jobs) "
                      "[default=%default].")

    parser.add_option("-e",
                      "--exceptions",
                      dest="log_exceptions",
                      action="store_true",
                      help="echo exceptions immediately as they occur "
                      "[default=%default].")

    parser.add_option("-i",
                      "--terminate",
                      dest="terminate",
                      action="store_true",
                      help="terminate immediately at the first exception "
                      "[default=%default].")

    parser.add_option("-d",
                      "--debug",
                      dest="debug",
                      action="store_true",
                      help="output debugging information on console, "
                      "and not the logfile "
                      "[default=%default].")

    parser.add_option("-s",
                      "--set",
                      dest="variables_to_set",
                      type="string",
                      action="append",
                      help="explicitly set paramater values "
                      "[default=%default].")

    parser.add_option("-c",
                      "--checksums",
                      dest="ruffus_checksums_level",
                      type="int",
                      help="set the level of ruffus checksums"
                      "[default=%default].")

    parser.add_option("-t",
                      "--is-test",
                      dest="is_test",
                      action="store_true",
                      help="this is a test run"
                      "[default=%default].")

    parser.add_option("--rabbitmq-exchange",
                      dest="rabbitmq_exchange",
                      type="string",
                      help="RabbitMQ exchange to send log messages to "
                      "[default=%default].")

    parser.add_option("--rabbitmq-host",
                      dest="rabbitmq_host",
                      type="string",
                      help="RabbitMQ host to send log messages to "
                      "[default=%default].")

    parser.add_option("--input-validation",
                      dest="input_validation",
                      action="store_true",
                      help="perform input validation before starting "
                      "[default=%default].")

    parser.set_defaults(pipeline_action=None,
                        pipeline_format="svg",
                        pipeline_targets=[],
                        multiprocess=40,
                        logfile="pipeline.log",
                        dry_run=False,
                        force=False,
                        log_exceptions=False,
                        exceptions_terminate_immediately=False,
                        debug=False,
                        variables_to_set=[],
                        is_test=False,
                        ruffus_checksums_level=0,
                        rabbitmq_host="saruman",
                        rabbitmq_exchange="ruffus_pipelines",
                        input_validation=False)

    (options, args) = E.Start(parser, add_cluster_options=True)

    GLOBAL_OPTIONS, GLOBAL_ARGS = options, args
    E.info("Started in: %s" % PARAMS.get("workingdir"))
    # At this point, the PARAMS dictionary has already been
    # built. It now needs to be updated with selected command
    # line options as these should always take precedence over
    # configuration files.

    PARAMS["dryrun"] = options.dry_run
    PARAMS["input_validation"] = options.input_validation

    # use cli_cluster_* keys in PARAMS to ensure highest priority
    # of cluster_* options passed with the command-line
    if options.cluster_memory_default is not None:
        PARAMS["cli_cluster_memory_default"] = options.cluster_memory_default
        PARAMS["cluster_memory_default"] = options.cluster_memory_default
    if options.cluster_memory_resource is not None:
        PARAMS["cli_cluster_memory_resource"] = options.cluster_memory_resource
        PARAMS["cluster_memory_resource"] = options.cluster_memory_resource
    if options.cluster_num_jobs is not None:
        PARAMS["cli_cluster_num_jobs"] = options.cluster_num_jobs
        PARAMS["cluster_num_jobs"] = options.cluster_num_jobs
    if options.cluster_options is not None:
        PARAMS["cli_cluster_options"] = options.cluster_options
        PARAMS["cluster_options"] = options.cluster_options
    if options.cluster_parallel_environment is not None:
        PARAMS[
            "cli_cluster_parallel_environment"] = options.cluster_parallel_environment
        PARAMS[
            "cluster_parallel_environment"] = options.cluster_parallel_environment
    if options.cluster_priority is not None:
        PARAMS["cli_cluster_priority"] = options.cluster_priority
        PARAMS["cluster_priority"] = options.cluster_priority
    if options.cluster_queue is not None:
        PARAMS["cli_cluster_queue"] = options.cluster_queue
        PARAMS["cluster_queue"] = options.cluster_queue
    if options.cluster_queue_manager is not None:
        PARAMS["cli_cluster_queue_manager"] = options.cluster_queue_manager
        PARAMS["cluster_queue_manager"] = options.cluster_queue_manager

    PARAMS["ruffus_checksums_level"] = options.ruffus_checksums_level

    for variables in options.variables_to_set:
        variable, value = variables.split("=")
        PARAMS[variable.strip()] = IOTools.str2val(value.strip())

    if args:
        options.pipeline_action = args[0]
        if len(args) > 1:
            options.pipeline_targets.extend(args[1:])

    # see inputValidation function in Parameters.py
    if options.input_validation:
        inputValidation(PARAMS, sys.argv[0])

    if options.pipeline_action == "check":
        counter, requirements = Requirements.checkRequirementsFromAllModules()
        for requirement in requirements:
            E.info("\t".join(map(str, requirement)))
        E.info("version check summary: %s" % str(counter))
        E.Stop()
        return

    elif options.pipeline_action == "debug":
        # create the session proxy
        startSession()

        method_name = options.pipeline_targets[0]
        caller = getCaller()
        method = getattr(caller, method_name)
        method(*options.pipeline_targets[1:])

    elif options.pipeline_action in ("make", "show", "svg", "plot", "touch",
                                     "regenerate"):

        # set up extra file logger
        handler = logging.FileHandler(filename=options.logfile, mode="a")
        handler.setFormatter(
            MultiLineFormatter(
                '%(asctime)s %(levelname)s %(module)s.%(funcName)s.%(lineno)d %(message)s'
            ))
        logger = logging.getLogger()
        logger.addHandler(handler)
        messenger = None

        try:
            if options.pipeline_action == "make":

                # get tasks to be done. This essentially replicates
                # the state information within ruffus.
                stream = io.StringIO()
                pipeline_printout(
                    stream,
                    options.pipeline_targets,
                    verbose=5,
                    checksum_level=options.ruffus_checksums_level)

                messenger = LoggingFilterRabbitMQ(
                    stream.getvalue(),
                    project_name=getProjectName(),
                    pipeline_name=getPipelineName(),
                    host=options.rabbitmq_host,
                    exchange=options.rabbitmq_exchange)

                logger.addFilter(messenger)

                if not options.without_cluster and HAS_DRMAA:
                    global task
                    # use threading instead of multiprocessing in order to
                    # limit the number of concurrent jobs by using the
                    # GIL
                    #
                    # Note that threading might cause problems with rpy.
                    task.Pool = ThreadPool

                    # create the session proxy
                    startSession()

                elif not options.without_cluster and not HAS_DRMAA:
                    E.critical(
                        "DRMAA API not found so cannot talk to a cluster.")
                    E.critical("Please use --local to run the pipeline"
                               " on this host: {}".format(os.uname()[1]))
                    sys.exit(-1)

                #
                #   make sure we are not logging at the same time in
                #   different processes
                #
                # session_mutex = manager.Lock()
                E.info(E.GetHeader())
                E.info("code location: %s" % PARAMS["pipeline_scriptsdir"])
                E.info("Working directory is: %s" % PARAMS["workingdir"])

                pipeline_run(
                    options.pipeline_targets,
                    multiprocess=options.multiprocess,
                    logger=logger,
                    verbose=options.loglevel,
                    log_exceptions=options.log_exceptions,
                    exceptions_terminate_immediately=options.
                    exceptions_terminate_immediately,
                    checksum_level=options.ruffus_checksums_level,
                )

                E.info(E.GetFooter())

                closeSession()

            elif options.pipeline_action == "show":
                pipeline_printout(
                    options.stdout,
                    options.pipeline_targets,
                    verbose=options.loglevel,
                    checksum_level=options.ruffus_checksums_level)

            elif options.pipeline_action == "touch":
                pipeline_run(options.pipeline_targets,
                             touch_files_only=True,
                             verbose=options.loglevel,
                             checksum_level=options.ruffus_checksums_level)

            elif options.pipeline_action == "regenerate":
                pipeline_run(options.pipeline_targets,
                             touch_files_only=options.ruffus_checksums_level,
                             verbose=options.loglevel)

            elif options.pipeline_action == "svg":
                pipeline_printout_graph(
                    options.stdout.buffer,
                    options.pipeline_format,
                    options.pipeline_targets,
                    checksum_level=options.ruffus_checksums_level)

            elif options.pipeline_action == "plot":
                outf, filename = tempfile.mkstemp()
                pipeline_printout_graph(
                    os.fdopen(outf, "wb"),
                    options.pipeline_format,
                    options.pipeline_targets,
                    checksum_level=options.ruffus_checksums_level)
                execute("inkscape %s" % filename)
                os.unlink(filename)

        except ruffus_exceptions.RethrownJobError as value:

            if not options.debug:
                E.error("%i tasks with errors, please see summary below:" %
                        len(value.args))
                for idx, e in enumerate(value.args):
                    task, job, error, msg, traceback = e

                    if task is None:
                        # this seems to be errors originating within ruffus
                        # such as a missing dependency
                        # msg then contains a RethrownJobJerror
                        msg = str(msg)
                        pass
                    else:
                        task = re.sub("__main__.", "", task)
                        job = re.sub("\s", "", job)

                    if messenger:
                        messenger.send_error(task, job, error, msg)

                    # display only single line messages
                    if len([x for x in msg.split("\n") if x != ""]) > 1:
                        msg = ""

                    E.error("%i: Task=%s Error=%s %s: %s" %
                            (idx, task, error, job, msg))

                E.error("full traceback is in %s" % options.logfile)

                # write full traceback to log file only by removing the stdout
                # handler
                lhStdout = logger.handlers[0]
                logger.removeHandler(lhStdout)
                logger.error("start of error messages")
                logger.error(value)
                logger.error("end of error messages")
                logger.addHandler(lhStdout)

                # raise error
                raise ValueError("pipeline failed with %i errors" %
                                 len(value.args))
            else:
                raise

    elif options.pipeline_action == "dump":
        print(json.dumps(PARAMS))

    elif options.pipeline_action == "printconfig":
        print("Printing out pipeline parameters: ")
        for k in sorted(PARAMS):
            print(k, "=", PARAMS[k])
        printConfigFiles()

    elif options.pipeline_action == "config":
        f = sys._getframe(1)
        caller = f.f_globals["__file__"]
        pipeline_path = os.path.splitext(caller)[0]
        general_path = os.path.join(os.path.dirname(pipeline_path),
                                    "configuration")
        writeConfigFiles(pipeline_path, general_path)

    elif options.pipeline_action == "clone":
        clonePipeline(options.pipeline_targets[0])

    else:
        raise ValueError("unknown pipeline action %s" %
                         options.pipeline_action)

    E.Stop()

Example #3

Show file

File: combine_histograms.py Project: gsc0107/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    try:
        optlist, args = getopt.getopt(sys.argv[1:], param_short_options,
                                      param_long_options)

    except getopt.error as msg:
        print(globals()["__doc__"], msg)
        sys.exit(1)

    for o, a in optlist:
        if o in ("--help", ):
            print(globals()["__doc__"])
            sys.exit(0)
        elif o in ("--version", ):
            print("version=")
            sys.exit(0)
        elif o in ("-h", "--header-names"):
            param_headers = a.split(",")
        elif o in ("-n", "--normalize"):
            param_normalize = 1
        elif o in ("-m", "--missing-value"):
            param_missing_value = a
        elif o == "--no-titles":
            param_titles = False
        elif o == "--no-titles":
            param_titles = False
        elif o in ("-f", "--format"):
            param_format = a
        elif o == "--format-value":
            param_format_value = a
        elif o == "--bin-format":
            param_format_bin = a
        elif o in ("-s", "--method=sort --sort-order"):
            if a in ("numerical", "alphabetic"):
                param_sort = a
            else:
                param_sort = a.split(",")

    if len(args) < 1:
        print(globals()["__doc__"], "please specify at one histogram.")
        sys.exit(1)

    param_filenames = args

    print(E.GetHeader())
    print(E.GetParams())

    histograms = []

    # first
    headers = [
        'bin',
    ]
    if param_headers and headers != "auto":
        headers = [
            param_headers[0],
        ]
        del param_headers[0]

    for x in range(len(param_filenames)):

        filename = param_filenames[x]
        if not os.path.exists(filename):
            print("# skipped because file not present: %s" % filename)
            continue

        file = IOTools.openFile(filename, "r")

        lines = [x for x in file if x[0] != "#"]

        if len(lines) == 0:
            continue

        if param_titles:
            h = lines[0][:-1].split("\t")[1:]
            del lines[0]

        if param_headers == "auto":
            headers.append(os.path.basename(filename))
        elif param_headers:
            headers.append(param_headers[x])
        elif param_titles:
            headers += h

        data = [list(map(float, x[:-1].split("\t"))) for x in lines]

        # add empty data point for empty histograms
        if len(data) == 0:
            data = [(0, 0)]

        histograms.append(data)

    # sort the whole thing:
    if param_sort:
        sort_order = []

        if param_sort == "numerical":
            t = list(
                zip(list(map(int, headers[1:])),
                    list(range(1,
                               len(headers) + 1))))
            t.sort()

            for tt in t:
                sort_order.append(headers[tt[1]])

        elif param_sort == "alphabetical":
            t = list(zip(headers[1:], list(range(1, len(headers) + 1))))
            t.sort()

            for tt in t:
                sort_order.append(headers[tt[1]])
        else:
            sort_order = param_sort

        # map header to old position
        map_header2pos = {}
        for x in range(1, len(headers)):
            map_header2pos[headers[x]] = x

        order = []
        for x in sort_order:
            if x in map_header2pos:
                order.append(map_header2pos[x])

        new_headers = [headers[0]]
        new_histograms = []

        for x in order:
            new_headers.append(headers[x])
            new_histograms.append(histograms[x - 1])

        histograms = new_histograms
        headers = new_headers

    combined_histogram = Histogram.Combine(histograms, param_missing_value)

    if headers:
        print("\t".join(headers))

    if param_normalize:
        combined_histogram = Histogram.Normalize(combined_histogram)

    Histogram.Print(
        combined_histogram,
        format_bin=param_format_bin,
        format_value=param_format_value,
    )

    print(E.GetFooter())

Example #4

Show file

File: split_file.py Project: gsc0107/cgat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    param_long_options = [
        "verbose=", "help", "split-regex=", "after", "pattern-output=", "skip",
        "column=", "map=", "dry-run", "header", "remove-key", "append",
        "pattern-identifier=", "version", "chunk-size="
    ]

    param_short_options = "v:hr:ap:sc:dek"

    param_loglevel = 1
    param_split_at_regex = None
    param_after = None
    param_skip = None
    param_pattern_output = "%s.chunk"
    param_split_column = None
    param_filename_map = None
    param_dry_run = False
    param_header = False
    param_remove_key = False
    param_append = "w"
    param_pattern_identifier = None
    param_chunk_size = 1

    try:
        optlist, args = getopt.getopt(sys.argv[1:], param_short_options,
                                      param_long_options)

    except getopt.error as msg:
        print(USAGE, msg)
        sys.exit(1)

    for o, a in optlist:
        if o in ("-v", "--verbose"):
            param_loglevel = int(a)
        elif o in ("--version", ):
            print("version=")
            sys.exit(0)
        elif o in ("-h", "--help"):
            print(USAGE)
            sys.exit(0)
        elif o in ("-r", "--split-regex"):
            param_split_at_regex = re.compile(a)
        elif o in ("-a", "--after"):
            param_after = 1
        elif o in ("-s", "--skip"):
            param_skip = 1
        elif o in ("-p", "--pattern-output"):
            param_pattern_output = a
        elif o in ("-c", "--column"):
            param_split_column = int(a) - 1
        elif o in ("-m", "--map"):
            param_filename_map = a
        elif o in ("-d", "--dry-run"):
            param_dry_run = True
        elif o in ("-e", "--header-names"):
            param_header = True
        elif o in ("-r", "--remove-key"):
            param_remove_key = True
        elif o == "--append":
            param_append = "a"
        elif o == "--pattern-identifier":
            param_pattern_identifier = re.compile(a)
        elif o == "--chunk-size":
            param_chunk_size = int(a)

    print(E.GetHeader())
    print(E.GetParams())

    mymap = {}
    if param_filename_map:
        infile = IOTools.openFile(param_filename_map, "r")
        for line in infile:
            if line[0] == "#":
                continue
            data = line[:-1].split("\t")[:2]
            mymap[data[0]] = data[1]

    filenames = set()
    found = set()
    ninput, noutput = 0, 0

    if param_split_column is not None:

        header = None
        files = {}
        for line in sys.stdin:

            if line[0] == "#":
                continue

            ninput += 1

            if param_header:
                if not header:
                    header = line[:-1]
                    continue
            else:
                header = None

            data = line[:-1].split("\t")

            try:
                key = data[param_split_column]
            except ValueError:
                continue

            if param_pattern_identifier:
                key = param_pattern_identifier.search(key).groups()[0]

            if mymap:
                if key in mymap:
                    key = mymap[key]
                else:
                    continue

            found.add(key)

            filename = re.sub("%s", key, param_pattern_output)
            filenames.add(filename)

            if filename not in files:

                # reset if too many files are open
                if len(files) > 1000:
                    if param_loglevel >= 1:
                        print("# resetting all files.")
                        sys.stdout.flush()

                    for f in list(files.values()):
                        f.close()
                    files = {}

                files[filename] = CreateOpen(filename, "a", param_dry_run,
                                             header)

            if param_remove_key:
                del data[param_split_column]
                files[filename].write(string.join(data, "\t") + "\n")
            else:
                files[filename].write(line)

            noutput += 1

        for f in list(files.values()):
            f.close()

    else:
        file_id = 0

        filename = re.sub("%s", str(file_id), param_pattern_output)
        outfile = CreateOpen(filename, param_append, param_dry_run)
        nlines = 0

        header = param_header
        split = 0

        for line in sys.stdin:

            if param_split_at_regex and param_split_at_regex.search(line[:-1]):
                split += 1

            if split == param_chunk_size:
                if param_after:
                    nlines += 1
                    outfile.write(line)
                if nlines > 0:
                    outfile.close()
                    file_id += 1
                    filename = re.sub("%s", str(file_id), param_pattern_output)
                    outfile = CreateOpen(filename, param_append, param_dry_run,
                                         header)
                    filenames.add(filename)
                    split = 0

                nlines = 0
                if param_after or param_skip:
                    continue

            outfile.write(line)
            nlines += 1

        outfile.close()

    if param_loglevel >= 1:
        sys.stdout.write(
            "# ninput=%i, noutput=%i, nfound=%i, nnotfound=%i, nfiles=%i\n" %
            (ninput, noutput, len(found), len(
                set(mymap).difference(found)), len(filenames)))

    print(E.GetFooter())