Ejemplo n.º 1
0
    def get_mismatches(self, reference_metadata,
                       target_metadata,
                       traverse_sub_workflows=False):
        """
        Takes two metadata files (both belonging to a common
        workflow execution), iterates through the outputs of
        their task, downloads the objects if not already exist
        in the working directory, compares the corresponding
        files, and returns the files that do not match.
        """
        def record_compare_result(match, reference, target):
            if not match:
                if call not in mismatches:
                    mismatches[call] = []
                mismatches[call].append([reference, target])

        # First we define a method that takes a list
        # of a task outputs, and keeps only those that
        # are files and their extension match the
        # file types that we want to compare
        # (e.g., filter only VCF files).
        filter_method = FilterBasedOnExtensions(
            self.filetypes_to_compare.keys()).filter

        # Then we create two instances of the Metadata
        # class, one for each metadata file, and we
        # invoke the `get_outputs` method which traverses
        # the outputs of task, and returns those filtered
        # by the above-defined filter.
        ref_output_files = Metadata(reference_metadata).get_outputs(
            traverse_sub_workflows, filter_method)
        test_output_files = Metadata(target_metadata).get_outputs(
            traverse_sub_workflows, filter_method)

        mismatches = {}
        i = 0

        r_t = ref_output_files.keys() - test_output_files.keys()
        t_r = test_output_files.keys() - ref_output_files.keys()
        if r_t or t_r:
            print(f"\n{COLOR_BLINKING}WARNING!{COLOR_ENDC}")
            print(f"The reference and test metadata files differ "
                  f"in their outputs; "
                  f"{COLOR_ULINE}the differences will be skipped.{COLOR_ENDC}")
            if r_t:
                print(f"\t{len(r_t)}/{len(ref_output_files.keys())} "
                      f"outputs of the reference are not in the test:")
                for x in r_t:
                    print(f"\t\t- {x}")
            if t_r:
                print(f"\t{len(t_r)}/{len(test_output_files.keys())} "
                      f"outputs of the test are not in the reference:")
                for x in t_r:
                    print(f"\t\t- {x}")
            print("\n")

        [ref_output_files.pop(x) for x in r_t]
        print(f"{COLOR_YELLOW}Comparing {len(ref_output_files)} "
              f"files that are common between reference and test "
              f"metadata files and their respective task is executed "
              f"successfully.{COLOR_ENDC}")
        for call, ref_outputs in ref_output_files.items():
            i += 1
            matched = True
            print(f"Comparing\t{i}/{len(ref_output_files)}\t{call} ... ", end="")
            for extension, objs in ref_outputs.items():
                if len(objs) != len(test_output_files[call][extension]):
                    record_compare_result(False, objs, test_output_files[call][extension])
                    matched = False
                    continue
                for idx, obj in enumerate(objs):
                    equals, x, y = \
                        self.filetypes_to_compare[extension].equals(
                            obj, test_output_files[call][extension][idx])
                    record_compare_result(equals, x, y)
                    if not equals:
                        matched = False
            if matched:
                print(f"{COLOR_GREEN}match{COLOR_ENDC}")
            else:
                print(f"{COLOR_RED}mismatch{COLOR_ENDC}")
        return mismatches