예제 #1
0
    def submit_function(*args, **kwargs):

        if "submit" in kwargs and kwargs["submit"]:
            del kwargs["submit"]
            submit_args, args_file = _pickle_args(args, kwargs)
            module_file = os.path.abspath(
                sys.modules[func.__module__].__file__)
            submit(snip(__file__),
                   "run_pickled",
                   params=[snip(module_file), function_name, args_file],
                   **submit_args)
        else:
            # remove job contral options before running function
            for x in ("submit", "job_options", "job_queue"):
                if x in kwargs:
                    del kwargs[x]
            return func(*args, **kwargs)
예제 #2
0
    def submit_function(*args, **kwargs):

        if "submit" in kwargs and kwargs["submit"]:
            del kwargs["submit"]
            submit_args, args_file = _pickle_args(args, kwargs)
            module_file = os.path.abspath(
                sys.modules[func.__module__].__file__)
            submit(snip(__file__),
                   "run_pickled",
                   params=[snip(module_file), function_name, args_file],
                   **submit_args)
        else:
            # remove job contral options before running function
            for x in ("submit", "job_options", "job_queue", "job_memory",
                      "job_threads"):
                if x in kwargs:
                    del kwargs[x]
            return func(*args, **kwargs)
예제 #3
0
def run_report(clean=True,
               with_pipeline_status=True,
               pipeline_status_format="svg"):
    '''run CGATreport.

    This will also run ruffus to create an svg image of the pipeline
    status unless *with_pipeline_status* is set to False. The image
    will be saved into the export directory.

    '''

    if with_pipeline_status:
        targetdir = PARAMS["exportdir"]
        if not os.path.exists(targetdir):
            os.mkdir(targetdir)

        pipeline_printout_graph(
            os.path.join(
                targetdir,
                "pipeline.%s" % pipeline_status_format),
            pipeline_status_format,
            ["full"],
            checksum_level=PARAMS["ruffus_checksums_level"]
        )

    dirname, basename = os.path.split(getCaller().__file__)

    report_engine = PARAMS.get("report_engine", "cgatreport")
    assert report_engine in ('sphinxreport', 'cgatreport')

    docdir = os.path.join(dirname, "pipeline_docs", snip(basename, ".py"))
    themedir = os.path.join(dirname, "pipeline_docs", "themes")
    relpath = os.path.relpath(docdir)
    trackerdir = os.path.join(docdir, "trackers")

    # warning: memory gets multiplied by threads, so set it not too
    # high
    job_memory = "1G"
    job_threads = PARAMS["report_threads"]

    # use a fake X display in order to avoid windows popping up
    # from R plots.
    xvfb_command = IOTools.which("xvfb-run")

    # permit multiple servers using -a option
    if xvfb_command:
        xvfb_command += " -a "
    else:
        xvfb_command = ""

    # if there is no DISPLAY variable set, xvfb runs, but
    # exits with error when killing process. Thus, ignore return
    # value.
    # print os.getenv("DISPLAY"), "command=", xvfb_command
    if not os.getenv("DISPLAY"):
        erase_return = "|| true"
    else:
        erase_return = ""

    # in the current version, xvfb always returns with an error, thus
    # ignore these.
    erase_return = "|| true"

    if clean:
        clean = """rm -rf report _cache _static;"""
    else:
        clean = ""

    # with sphinx >1.3.1 the PYTHONPATH needs to be set explicitely as
    # the virtual environment seems to be stripped. It is thus set to
    # the contents of the current sys.path
    syspath = ":".join(sys.path)

    statement = '''
    %(clean)s
    (export SPHINX_DOCSDIR=%(docdir)s;
    export SPHINX_THEMEDIR=%(themedir)s;
    export PYTHONPATH=%(syspath)s;
    %(xvfb_command)s
    %(report_engine)s-build
    --num-jobs=%(report_threads)s
    sphinx-build
    -b html
    -d %(report_doctrees)s
    -c .
    -j %(report_threads)s
    %(docdir)s %(report_html)s
    >& report.log %(erase_return)s )
    '''

    run()

    E.info('the report is available at %s' % os.path.abspath(
        os.path.join(PARAMS['report_html'], "contents.html")))
예제 #4
0
def run_report(clean=True,
               with_pipeline_status=True,
               pipeline_status_format="svg"):
    '''run CGATreport.

    This will also run ruffus to create an svg image of the pipeline
    status unless *with_pipeline_status* is set to False. The image
    will be saved into the export directory.

    '''

    if with_pipeline_status:
        targetdir = PARAMS["exportdir"]
        if not os.path.exists(targetdir):
            os.mkdir(targetdir)

        pipeline_printout_graph(
            os.path.join(
                targetdir,
                "pipeline.%s" % pipeline_status_format),
            pipeline_status_format,
            ["full"],
            checksum_level=PARAMS["ruffus_checksums_level"]
        )

    dirname, basename = os.path.split(getCaller().__file__)

    report_engine = PARAMS.get("report_engine", "cgatreport")
    assert report_engine in ('sphinxreport', 'cgatreport')

    docdir = os.path.join(dirname, "pipeline_docs", snip(basename, ".py"))
    themedir = os.path.join(dirname, "pipeline_docs", "themes")
    relpath = os.path.relpath(docdir)
    trackerdir = os.path.join(docdir, "trackers")

    # warning: memory gets multiplied by threads, so set it not too
    # high
    job_memory = "1G"
    job_threads = PARAMS["report_threads"]

    # use a fake X display in order to avoid windows popping up
    # from R plots.
    xvfb_command = IOTools.which("xvfb-run")

    # permit multiple servers using -a option
    if xvfb_command:
        xvfb_command += " -a "
    else:
        xvfb_command = ""

    # if there is no DISPLAY variable set, xvfb runs, but
    # exits with error when killing process. Thus, ignore return
    # value.
    # print os.getenv("DISPLAY"), "command=", xvfb_command
    if not os.getenv("DISPLAY"):
        erase_return = "|| true"
    else:
        erase_return = ""

    # in the current version, xvfb always returns with an error, thus
    # ignore these.
    erase_return = "|| true"

    if clean:
        clean = """rm -rf report _cache _static;"""
    else:
        clean = ""

    # with sphinx >1.3.1 the PYTHONPATH needs to be set explicitely as
    # the virtual environment seems to be stripped. It is thus set to
    # the contents of the current sys.path
    syspath = ":".join(sys.path)

    statement = '''
    %(clean)s
    (export SPHINX_DOCSDIR=%(docdir)s;
    export SPHINX_THEMEDIR=%(themedir)s;
    export PYTHONPATH=%(syspath)s;
    %(xvfb_command)s
    %(report_engine)s-build
    --num-jobs=%(report_threads)s
    sphinx-build
    -b html
    -d %(report_doctrees)s
    -c .
    -j %(report_threads)s
    %(docdir)s %(report_html)s
    >& report.log %(erase_return)s )
    '''

    run()

    E.info('the report is available at %s' % os.path.abspath(
        os.path.join(PARAMS['report_html'], "contents.html")))
예제 #5
0
def mergeAndLoad(infiles,
                 outfile,
                 suffix=None,
                 columns=(0, 1),
                 regex=None,
                 row_wise=True,
                 retry=True,
                 options="",
                 prefixes=None):
    '''merge multiple categorical tables and load into a database.

    The tables are merged and entered row-wise, i.e, the contents of
    each file are a row.

    For example, the statement::

        mergeAndLoad(['file1.txt', 'file2.txt'],
                     "test_table.load")

    with the two files::
        > cat file1.txt
        Category    Result
        length      12
        width       100

        > cat file2.txt
        Category    Result
        length      20
        width       50

    will be added into table ``test_table`` as::
        track   length   width
        file1   12       100
        file2   20       50

    If row-wise is set::
        mergeAndLoad(['file1.txt', 'file2.txt'],
                     "test_table.load", row_wise=True)

    ``test_table`` will be transposed and look like this::
        track    file1 file2
        length   12    20
        width    20    50

    Arguments
    ---------
    infiles : list
        Filenames of the input data
    outfile : string
        Output filename. This will contain the logging information. The
        table name is derived from `outfile`.
    suffix : string
        If `suffix` is given, the suffix will be removed from the filenames.
    columns : list
        The columns to be taken. By default, the first two columns are
        taken with the first being the key. Filenames are stored in a
        ``track`` column. Directory names are chopped off.  If
        `columns` is set to None, all columns will be taken. Here,
        column names will receive a prefix given by `prefixes`. If
        `prefixes` is None, the filename will be added as a prefix.
    regex : string
        If set, the full filename will be used to extract a
        track name via the supplied regular expression.
    row_wise : bool
        If set to False, each table will be a column in the resulting
        table.  This is useful if histograms are being merged.
    retry : bool
        If True, multiple attempts will be made if the data can
        not be loaded at the first try, for example if a table is locked.
    options : string
        Command line options for the `csv2db.py` script.
    prefixes : list
        If given, the respective prefix will be added to each
        column. The number of `prefixes` and `infiles` needs to be the
        same.

    '''
    PARAMS = getParams()
    if len(infiles) == 0:
        raise ValueError("no files for merging")

    if suffix:
        header = ",".join([os.path.basename(snip(x, suffix)) for x in infiles])
    elif regex:
        header = ",".join(["-".join(re.search(regex, x).groups())
                          for x in infiles])
    else:
        header = ",".join([os.path.basename(x) for x in infiles])

    header_stmt = "--header-names=%s" % header

    if columns:
        column_filter = "| cut -f %s" % ",".join(map(str,
                                                 [x + 1 for x in columns]))
    else:
        column_filter = ""
        if prefixes:
            assert len(prefixes) == len(infiles)
            header_stmt = "--prefixes=%s" % ",".join(prefixes)
        else:
            header_stmt = "--add-file-prefix"

    if infiles[0].endswith(".gz"):
        filenames = " ".join(
            ["<( zcat %s %s )" % (x, column_filter) for x in infiles])
    else:
        filenames = " ".join(
            ["<( cat %s %s )" % (x, column_filter) for x in infiles])

    if row_wise:
        transform = """| perl -p -e "s/bin/track/"
        | cgat table2table --transpose""" % PARAMS
    else:
        transform = ""

    load_statement = build_load_statement(
        toTable(outfile),
        options="--add-index=track " + options,
        retry=retry)

    statement = """cgat combine_tables
    %(header_stmt)s
    --skip-titles
    --missing-value=0
    --ignore-empty
    %(filenames)s
    %(transform)s
    | %(load_statement)s
    > %(outfile)s
    """

    to_cluster = False

    run()
예제 #6
0
def mergeAndLoad(infiles,
                 outfile,
                 suffix=None,
                 columns=(0, 1),
                 regex=None,
                 row_wise=True,
                 retry=True,
                 options="",
                 prefixes=None):
    '''merge multiple categorical tables and load into a database.

    The tables are merged and entered row-wise, i.e, the contents of
    each file are a row.

    For example, the statement::

        mergeAndLoad(['file1.txt', 'file2.txt'],
                     "test_table.load")

    with the two files::
        > cat file1.txt
        Category    Result
        length      12
        width       100

        > cat file2.txt
        Category    Result
        length      20
        width       50

    will be added into table ``test_table`` as::
        track   length   width
        file1   12       100
        file2   20       50

    If row-wise is set::
        mergeAndLoad(['file1.txt', 'file2.txt'],
                     "test_table.load", row_wise=True)

    ``test_table`` will be transposed and look like this::
        track    file1 file2
        length   12    20
        width    20    50

    Arguments
    ---------
    infiles : list
        Filenames of the input data
    outfile : string
        Output filename. This will contain the logging information. The
        table name is derived from `outfile`.
    suffix : string
        If `suffix` is given, the suffix will be removed from the filenames.
    columns : list
        The columns to be taken. By default, the first two columns are
        taken with the first being the key. Filenames are stored in a
        ``track`` column. Directory names are chopped off.  If
        `columns` is set to None, all columns will be taken. Here,
        column names will receive a prefix given by `prefixes`. If
        `prefixes` is None, the filename will be added as a prefix.
    regex : string
        If set, the full filename will be used to extract a
        track name via the supplied regular expression.
    row_wise : bool
        If set to False, each table will be a column in the resulting
        table.  This is useful if histograms are being merged.
    retry : bool
        If True, multiple attempts will be made if the data can
        not be loaded at the first try, for example if a table is locked.
    options : string
        Command line options for the `csv2db.py` script.
    prefixes : list
        If given, the respective prefix will be added to each
        column. The number of `prefixes` and `infiles` needs to be the
        same.

    '''
    PARAMS = getParams()
    if len(infiles) == 0:
        raise ValueError("no files for merging")

    if suffix:
        header = ",".join([os.path.basename(snip(x, suffix)) for x in infiles])
    elif regex:
        header = ",".join(
            ["-".join(re.search(regex, x).groups()) for x in infiles])
    else:
        header = ",".join([os.path.basename(x) for x in infiles])

    header_stmt = "--header-names=%s" % header

    if columns:
        column_filter = "| cut -f %s" % ",".join(
            map(str, [x + 1 for x in columns]))
    else:
        column_filter = ""
        if prefixes:
            assert len(prefixes) == len(infiles)
            header_stmt = "--prefixes=%s" % ",".join(prefixes)
        else:
            header_stmt = "--add-file-prefix"

    if infiles[0].endswith(".gz"):
        filenames = " ".join(
            ["<( zcat %s %s )" % (x, column_filter) for x in infiles])
    else:
        filenames = " ".join(
            ["<( cat %s %s )" % (x, column_filter) for x in infiles])

    if row_wise:
        transform = """| perl -p -e "s/bin/track/"
        | cgat table2table --transpose""" % PARAMS
    else:
        transform = ""

    load_statement = build_load_statement(toTable(outfile),
                                          options="--add-index=track " +
                                          options,
                                          retry=retry)

    statement = """cgat combine_tables
    %(header_stmt)s
    --skip-titles
    --missing-value=0
    --ignore-empty
    %(filenames)s
    %(transform)s
    | %(load_statement)s
    > %(outfile)s
    """

    to_cluster = False

    run()