Python nested_iterの例、cgatcore.iotools.nested_iter Pythonの例

コード例 #1

0

ファイルを表示

ファイル: workflow.py プロジェクト: cgat-developers/cgat-daisy

def expand_generators(config):
    """expand generator expressions in option lists.

    A generator expression are valid python syntax and
    has the following syntax::

      options: generate=["--chrom={}".format(x) for x in [1,2,3,4,5]]

    """

    to_delete = []
    for d, key, value in IOTools.nested_iter(config):
        if isinstance(value, str):
            if value.startswith("generate="):
                expression = re.sub("^generate=\s*", "", value)
                if expression.startswith("'") and expression.startswith("'"):
                    expression = expression[1:-1]
                try:
                    argument_list = eval(expression)
                except SyntaxError as ex:
                    raise ValueError(
                        "error occured while evaluating generator "
                        "expression {}: {}".format(expression, ex))
                if isinstance(d, list):
                    d.extend(argument_list)
                    to_delete.append((d, key))
                else:
                    d[key] = argument_list

    for d, key in to_delete[::-1]:
        del d[key]

    return config

コード例 #2

0

ファイルを表示

ファイル: ToolRunner.py プロジェクト: cgat-developers/cgat-daisy

    def __call__(self, infiles, outfile, only_info=False):

        # NOTE: extras not implemented in ruffus 2.6.3, thus
        # use parameter:
        only_info = "only_info" in P.PARAMS

        if self.mountpoint:
            # revert mount redirection for arvados to allow redirection
            # on individual cluster nodes
            for d, key, value in IOTools.nested_iter(infiles):
                d[key] = re.sub(self.mountpoint, "arv=", value)

        self.instantiate_input(infiles)
        self.save_meta(outfile, output_file=outfile)

        if only_info:
            E.warn("only_info - meta information has been updated")
            return

        params = self.build_params(output_file=outfile)
        benchmark = self.run(outfile, as_namedtuple(params))
        self.save_benchmark(outfile, benchmark)

コード例 #3

0

ファイルを表示

ファイル: benchmark_rename_files.py プロジェクト: cgat-developers/cgat-daisy

def main(argv=sys.argv):

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-n",
        "--dry-run",
        dest="dry_run",
        action="store_true",
        help="only show what will be done, don't do it [%default]")

    parser.add_option("-l",
                      "--link",
                      dest="link",
                      action="store_true",
                      help="link instead of rename [%default]")

    parser.set_defaults(dry_run=False, link=False)

    (options, args) = E.start(parser, argv)

    config = P.get_parameters("benchmark.yml")

    old_data, new_data = [], []

    for old_info in glob.glob("*.dir/tool.info"):
        old_dir, old_file = os.path.split(old_info)
        old_info = toolkit.read_data(old_info)
        old_data.append((old_dir, old_info))

    tool_functions = workflow.build_tool_functions(map_tool_to_runner, config)

    config_files = workflow.expand_globs(config["input"])
    input_combos = workflow.build_combinations(config_files)

    map_property_to_dir = collections.defaultdict(list)

    for toolf, input_files in itertools.product(tool_functions, input_combos):

        # create a copy of the task function and give it its unique name
        # by mangling it with the input_files
        taskf = copy.copy(toolf)
        taskf.register_input(input_files)
        result_dir = os.path.basename(os.path.join(taskf.__name__ + ".dir"))
        new_data.append((result_dir, taskf))
        for a, x, y in IOTools.nested_iter(taskf.input_files):
            map_property_to_dir[(x, y)].append(result_dir)
        map_property_to_dir[("name", taskf.name)].append(result_dir)
        for x, y in list(taskf._option_dict.items()):
            map_property_to_dir[(x, y)].append(result_dir)

    # match by input_files
    options.stdout.write("\t".join(("old", "new", "matching")) + "\n")

    for old_dir, old_info in old_data:
        targets = []
        for a, x, y in IOTools.nested_iter(old_info["input_files"]):
            if (x, y) in map_property_to_dir:
                targets.extend(map_property_to_dir[(x, y)])
        for x, y in list(old_info.items()):
            try:
                targets.extend(map_property_to_dir[(x, y)])
            except TypeError:
                pass

        counts = collections.Counter(targets)
        max_count = max(counts.values())
        max_count_items = [
            x for x, y in list(counts.items()) if y == max_count
        ]

        if len(max_count_items) > 1:
            E.warn("multiple matches for {}, ignored".format(old_dir))
            continue

        new_dir = max_count_items[0]

        options.stdout.write("\t".join(map(str, (old_dir, new_dir,
                                                 max_count))) + "\n")

        if os.path.exists(new_dir):
            raise ValueError("directory {} already exists".format(new_dir))

        if options.dry_run:
            continue

        if options.link:
            os.symlink(old_dir, new_dir)
        else:
            os.rename(old_dir, new_dir)

    E.stop()

コード例 #4

0

ファイルを表示

ファイル: toolkit.py プロジェクト: cgat-developers/cgat-daisy

def redirect2mounts(config,
                    mountpoint=None,
                    debug=None,
                    mount_write=False,
                    substitute_only=False,
                    always_mount=False):
    """redirect filenames in dictionary config to a mount-point.

    Mount points in the config are indicated by the `arv=` prefix. If
    no option in config requires mounting, no mounting will be done and
    the method returns None.

    :param config: dictionary with config values. Will be modified in-place.
    :param mountpoint: if given, paths will be substituted by mountpoint. If None,
        a new mountpoint will be created.
    :param debug: if given, mount in debug mode and save log to filename.
    :param mount_write: if True, mount in --read-write mode.
    :param substitute_only: if True, only perform substitution, do not mount anything
        even if mountpoint is None.
    :param always_mount: if True, always mount, no matter if arv= prefix is present.

    :return: the mountpoint

    """
    arvados_options = ["--disable-event-listening"]
    if debug:
        arvados_options.append(" --debug --logfile={}".format(debug))

    if mount_write:
        arvados_options.append("--read-write")
        arvados_options = " ".join(arvados_options)
        if not mountpoint:
            mountpoint = P.get_temp_dir() + "/"
            E.info("redirect2mounts: mounting arvados at {} with --read-write".
                   format(mountpoint))
            E.run("arv-mount {} {}".format(arvados_options, mountpoint))
            E.info("redirect2mounts: arvados mounted at {} with --read-write".
                   format(mountpoint))
    else:
        arvados_options.append("--read-only")
        if always_mount:
            mountpoint = P.get_temp_dir() + "/"
            do_mount = True
        else:
            do_mount = False

        for d, key, value in IOTools.nested_iter(config):
            if isinstance(value, str):
                if "arv=" in value:
                    if substitute_only and mountpoint is None:
                        continue
                    if not mountpoint:
                        mountpoint = P.get_temp_dir() + "/"
                        do_mount = True
                    d[key] = re.sub("arv=", mountpoint, value)

        if do_mount:
            raise NotImplementedError("arvados support disabled")
            # if not arvados.have_arvados():
            #     raise ValueError(
            #         "config file requires arvados access, but arvados not available")
            arvados_options = " ".join(arvados_options)
            E.debug("redirect2mounts: mounting arvados at {} with options {}".
                    format(mountpoint, arvados_options))
            E.run("arv-mount {} {}".format(arvados_options, mountpoint))
            E.debug(
                "redirect2mounts: arvados mounted at {}".format(mountpoint))

    return mountpoint

コード例 #5

0

ファイルを表示

ファイル: workflow.py プロジェクト: cgat-developers/cgat-daisy

def expand_globs(config, is_test=False):
    """detect and expand glob expressions in the input section.

    A glob expression is any filename that contains a '*'. Multiple
    glob expressions can be combined on the same line by a ','.

    A "find" expression is detected starting with 'find'. These
    expressions will be evaluated in a shell and the results insterted
    into the dictionary.

    If a filename starts with "file=", the contents of the file
    following the "=" are read and inserted. Multiple files can be
    separated by a ','.

    If a glob or find expression is evaluated to nothing, an exception
    is raised unless ``is_test`` is set. In that case, two files will be
    returned called "test1" and "test2".
    """

    for d, key, value in IOTools.nested_iter(config):
        if isinstance(
                value,
                str) and not (isinstance(key, str) and key.endswith("_regex")):
            if value.startswith("find"):
                try:
                    data = E.run(value, return_stdout=True)
                except Exception as e:
                    data = e.output
                d[key] = [x for x in data.split("\n") if x]
            elif "*" in value:
                if "," in value:
                    v = [glob.glob(x.strip()) for x in value.split(",")]
                    v = [item for sublist in v for item in sublist]
                else:
                    v = glob.glob(value)
                d[key] = v
            elif value.startswith("file="):
                filenames = [x.strip() for x in value.split("=")[1].split(",")]
                paths = []
                for fn in filenames:
                    with IOTools.open_file(fn) as inf:
                        paths.extend([x.strip() for x in inf if x.strip()])
                d[key] = paths
            if len(d[key]) == 0:
                if not is_test:
                    raise ValueError(
                        "expression '{}' expanded to nothing".format(value))
                else:
                    # insert some random files for testing purposes:
                    if "*" in value:
                        # replace glob expressions
                        value = re.sub(",.*", "", value)
                        d[key] = [
                            re.sub("[*]", "test1", value),
                            re.sub("[*]", "test2", value)
                        ]
                    else:
                        if "bam" in value:
                            d[key] = ["test1.bam", "test2.bam"]
                        elif "vcf" in value:
                            d[key] = ["test1.vcf.gz", "test2.vcf.gz"]
                        else:
                            d[key] = ["test1.txt", "test2.txt"]
    return config