コード例 #1
0
def test_groupby_regex_with_combinatorial_option(tmp_path):

    assert build_combinations(
        {"groupby": "regex",
         "files_x": ["y.x", "z.x"],
         "files_a": ["{}/data_0.a".format(tmp_path),
                     "{}/data_1.a".format(tmp_path)],
         "files_b": ["{}/data_0.b".format(tmp_path),
                     "{}/data_1.b".format(tmp_path)],
         "files_a_regex": r"data_(\d+).a",
         "files_b_regex": r"data_(\d+).b"}) == \
        [
            {'files_a': "{}/data_0.a".format(tmp_path),
             'files_b': "{}/data_0.b".format(tmp_path),
             'files_x': "y.x",
             'name': "0"},
            {'files_a': "{}/data_0.a".format(tmp_path),
             'files_b': "{}/data_0.b".format(tmp_path),
             'files_x': "z.x",
             'name': "0"},
            {'files_a': "{}/data_1.a".format(tmp_path),
             'files_b': "{}/data_1.b".format(tmp_path),
             'files_x': "y.x",
             'name': "1"},
            {'files_a': "{}/data_1.a".format(tmp_path),
             'files_b': "{}/data_1.b".format(tmp_path),
             'files_x': "z.x",
             'name': "1"},
        ]
コード例 #2
0
def test_groupby_design_with_combinatorial_option(tmp_path):

    design_file = tmp_path / "design.tsv"
    with open(design_file, "w") as outf:
        outf.write("label\tc_option1\tc_option2\n")
        outf.write("label1\tvalue1\tvalueA\n")
        outf.write("label2\tvalue1\tvalueB\n")
        outf.write("label3\tvalue2\tvalueA\n")
        outf.write("label4\tvalue2\tvalueB\n")

    assert build_combinations(
        {"groupby": "file",
         "label": "label",
         "input": design_file,
         "option1": "c_option1",
         "option2": "c_option2",
         "option3": ["valueX", "valueY"]}) == \
         [{'option1': 'value1', 'option2': 'valueA', "name": "label1", "option3": "valueX"},
          {'option1': 'value1', 'option2': 'valueA', "name": "label1", "option3": "valueY"},
          {'option1': 'value1', 'option2': 'valueB', "name": "label2", "option3": "valueX"},
          {'option1': 'value1', 'option2': 'valueB', "name": "label2", "option3": "valueY"},
          {'option1': 'value2', 'option2': 'valueA', "name": "label3", "option3": "valueX"},
          {'option1': 'value2', 'option2': 'valueA', "name": "label3", "option3": "valueY"},
          {'option1': 'value2', 'option2': 'valueB', "name": "label4", "option3": "valueX"},
          {'option1': 'value2', 'option2': 'valueB', "name": "label4", "option3": "valueY"}]
コード例 #3
0
def test_two_options():
    assert build_combinations(
        {'option1': ["value1", "value2"],
         'option2': ["valueA", "valueB"]}) == \
         [{'option1': 'value1', 'option2': 'valueA'},
          {'option1': 'value1', 'option2': 'valueB'},
          {'option1': 'value2', 'option2': 'valueA'},
          {'option1': 'value2', 'option2': 'valueB'}]
コード例 #4
0
def test_groupby_regex_filters_when_data_point_missing(tmp_path):
    assert build_combinations(
        {"groupby": "regex",
         "files_a": ["{}/data_0.a".format(tmp_path)],
         "files_b": ["{}/data_0.b".format(tmp_path),
                     "{}/data_1.b".format(tmp_path)],
         "files_a_regex": r"data_(\d+).a",
         "files_b_regex": r"data_(\d+).b"}) == \
        [{'files_a': "{}/data_0.a".format(tmp_path),
          'files_b': "{}/data_0.b".format(tmp_path),
          'name': "0"}]
コード例 #5
0
def test_groupby_named_regex(tmp_path):

    assert build_combinations(
        {"groupby": "regex",
         "files_a": ["{}/data_0.a".format(tmp_path),
                     "{}/data_1.a".format(tmp_path)],
         "files_b": ["{}/data_0.b".format(tmp_path),
                     "{}/data_1.b".format(tmp_path)],
         "files_a_regex": r"data_(?P<key1>\d+).a",
         "files_b_regex": r"data_(?P<key1>\d+).b"}) == \
        [{'files_a': "{}/data_0.a".format(tmp_path),
          'files_b': "{}/data_0.b".format(tmp_path),
          'name': "0"},
         {'files_a': "{}/data_1.a".format(tmp_path),
          'files_b': "{}/data_1.b".format(tmp_path),
          'name': "1"}]
コード例 #6
0
def main(argv=sys.argv):

    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option(
        "-n",
        "--dry-run",
        dest="dry_run",
        action="store_true",
        help="only show what will be done, don't do it [%default]")

    parser.add_option("-l",
                      "--link",
                      dest="link",
                      action="store_true",
                      help="link instead of rename [%default]")

    parser.set_defaults(dry_run=False, link=False)

    (options, args) = E.start(parser, argv)

    config = P.get_parameters("benchmark.yml")

    old_data, new_data = [], []

    for old_info in glob.glob("*.dir/tool.info"):
        old_dir, old_file = os.path.split(old_info)
        old_info = toolkit.read_data(old_info)
        old_data.append((old_dir, old_info))

    tool_functions = workflow.build_tool_functions(map_tool_to_runner, config)

    config_files = workflow.expand_globs(config["input"])
    input_combos = workflow.build_combinations(config_files)

    map_property_to_dir = collections.defaultdict(list)

    for toolf, input_files in itertools.product(tool_functions, input_combos):

        # create a copy of the task function and give it its unique name
        # by mangling it with the input_files
        taskf = copy.copy(toolf)
        taskf.register_input(input_files)
        result_dir = os.path.basename(os.path.join(taskf.__name__ + ".dir"))
        new_data.append((result_dir, taskf))
        for a, x, y in IOTools.nested_iter(taskf.input_files):
            map_property_to_dir[(x, y)].append(result_dir)
        map_property_to_dir[("name", taskf.name)].append(result_dir)
        for x, y in list(taskf._option_dict.items()):
            map_property_to_dir[(x, y)].append(result_dir)

    # match by input_files
    options.stdout.write("\t".join(("old", "new", "matching")) + "\n")

    for old_dir, old_info in old_data:
        targets = []
        for a, x, y in IOTools.nested_iter(old_info["input_files"]):
            if (x, y) in map_property_to_dir:
                targets.extend(map_property_to_dir[(x, y)])
        for x, y in list(old_info.items()):
            try:
                targets.extend(map_property_to_dir[(x, y)])
            except TypeError:
                pass

        counts = collections.Counter(targets)
        max_count = max(counts.values())
        max_count_items = [
            x for x, y in list(counts.items()) if y == max_count
        ]

        if len(max_count_items) > 1:
            E.warn("multiple matches for {}, ignored".format(old_dir))
            continue

        new_dir = max_count_items[0]

        options.stdout.write("\t".join(map(str, (old_dir, new_dir,
                                                 max_count))) + "\n")

        if os.path.exists(new_dir):
            raise ValueError("directory {} already exists".format(new_dir))

        if options.dry_run:
            continue

        if options.link:
            os.symlink(old_dir, new_dir)
        else:
            os.rename(old_dir, new_dir)

    E.stop()
コード例 #7
0
def test_one_option():
    assert build_combinations(
        {"option1": ["value1", "value2"]}) == \
        [{'option1': 'value1'},
         {'option1': 'value2'}]
コード例 #8
0
def test_complex_values():
    assert build_combinations(
        {'option1': [{"value1": [1, 2, 3]},
                     {"value2": [4, 5, 6]}]}) == \
        [{'option1': {'value1': [1, 2, 3]}},
         {'option1': {'value2': [4, 5, 6]}}]