def setUp(self):
        self.generator = OntologyCodeGenerator()
        self.dir_path = None

        curr_dir = os.path.dirname(__file__)
        self.spec_dir = os.path.join(curr_dir, "test_specs/")
        self.test_output = os.path.join(curr_dir, "test_outputs/")
Beispiel #2
0
def create(args_):
    """
    Function for the `create` mode. Generates the ontology.
    Args:
        args_: parsed args for the `create` mode
    """
    spec_path = normalize_path(args_.spec)
    dest_path = normalize_path(args_.dest_path)
    spec_paths = [normalize_path(config) for config in args_.spec_paths] \
        if args_.spec_paths is not None else None
    merged_path = normalize_path(args_.merged_path)
    leient_prefix = args_.lenient_prefix

    generator = OntologyCodeGenerator(spec_paths, args_.gen_all)
    if args_.no_dry_run is None:
        log.info("Ontology will be generated in a temporary directory as "
                 "--no_dry_run is not specified by the user.")
        args_.no_dry_run = False

    if leient_prefix:
        log.info("Will not enforce prefix check.")

    is_dry_run = not args_.no_dry_run
    include_init = not args_.exclude_init
    generated_folder = generator.generate(spec_path, dest_path, is_dry_run,
                                          include_init, merged_path,
                                          leient_prefix)
    log.info("Ontology generated in the directory %s.", generated_folder)
Beispiel #3
0
def build_ontology(sql_db, project_name):
    """
    Find the ontology specification from the project, and then create the
    ontologies.

    Args:
        sql_db: The SQLite Database containing the project
        project_name: The name of the project.

    Returns:

    """
    onto_path = "./stave_test_onto"
    res = query(
        sql_db,
        f"SELECT ontology FROM stave_backend_project "
        f"WHERE stave_backend_project.name = "
        f'"{project_name}"',
    ).fetchone()[0]
    with tempfile.NamedTemporaryFile("w") as onto_file:
        onto_file.write(res)
        OntologyCodeGenerator().generate(onto_file.name,
                                         onto_path,
                                         lenient_prefix=True)
    # Make sure the newly created path is in the python path.
    sys.path.append(onto_path)

    # Make sure we can import the newly generated modules.
    try:
        importlib.import_module("edu.cmu")
    except Exception:
        pass
Beispiel #4
0
def clean(args_):
    """
        Function for the `clean` mode. Cleans the given directory of generated
        files.
        Args:
            args_: parsed args for the `clean` mode
        """
    dir_ = normalize_path(args_.dir)
    generator = OntologyCodeGenerator()
    is_empty, del_dir = generator.cleanup_generated_ontology(dir_, args_.force)
    if not is_empty:
        log.info("Directory %s not empty, cannot delete completely.", dir_)
    else:
        log.info("Directory %s deleted.", dir_)
    if not args_.force:
        log.info("Deleted files moved to %s.", del_dir)
Beispiel #5
0
    def test_include_and_exclude_init(self):
        json_file_path = os.path.join(
            self.spec_dir, "example_import_ontology.json"
        )
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_filename = _get_temp_filename(json_file_path, temp_dir)
            # Test with include_init = True
            folder_path = self.generator.generate(
                temp_filename, temp_dir, is_dry_run=False, include_init=True
            )
            gen_files = sorted(utils.get_generated_files_in_dir(folder_path))

            # Assert the generated python files
            exp_file_path = [
                "ft/__init__",
                "ft/onto/__init__",
                "ft/onto/example_import_ontology",
            ]
            exp_files = sorted(
                [
                    f"{os.path.join(folder_path, file)}.py"
                    for file in exp_file_path
                ]
            )

            self.assertEqual(gen_files, exp_files)

            # Now, corrupt one of the init files
            corrupted_path = os.path.join(folder_path, "ft/__init__.py")
            with open(corrupted_path, "w") as f:
                f.write("# ***corrupted file***\n")

            # Re-generate using include_init = False
            self.generator = OntologyCodeGenerator()
            folder_path = self.generator.generate(
                temp_filename, folder_path, is_dry_run=False, include_init=False
            )
            gen_files = sorted(utils.get_generated_files_in_dir(folder_path))

            # Assert the generated python files after removing the corrupted
            # file which should not have been regenerated
            exp_files = [file for file in exp_files if file != corrupted_path]
            self.assertEqual(gen_files, exp_files)
    def test_top_ontology_parsing_imports(self):
        temp_dir = tempfile.mkdtemp()
        temp_filename = os.path.join(temp_dir, 'temp.py')
        sys.path.append(temp_dir)
        with open(temp_filename, 'w') as temp_file:
            temp_file.write('import os.path\n'
                            'import os.path as os_path\n'
                            'from os import path\n')
        temp_module = importlib.import_module('temp')

        manager = ImportManager(None, None)

        gen = OntologyCodeGenerator()
        gen.initialize_top_entries(manager, temp_module)

        imports = manager.get_import_statements()

        expected_imports = ["from os import path"]

        self.assertListEqual(imports, expected_imports)
Beispiel #7
0
        def build_ontology():
            onto_path = "./stave_test_onto"
            res = self._query(
                f'SELECT ontology FROM nlpviewer_backend_project '
                f'WHERE nlpviewer_backend_project.name = '
                f'"{project_name}"').fetchone()[0]
            with tempfile.NamedTemporaryFile('w') as onto_file:
                onto_file.write(res)
                OntologyCodeGenerator().generate(onto_file.name,
                                                 onto_path,
                                                 lenient_prefix=True)
            # Make sure the newly created path is in the python path.
            sys.path.append(onto_path)

            # Make sure we can import the newly generated modules.
            try:
                importlib.import_module('edu.cmu')
            except Exception:
                pass
class GenerateOntologyTest(unittest.TestCase):
    def setUp(self):
        self.generator = OntologyCodeGenerator()
        self.dir_path = None

        curr_dir = os.path.dirname(__file__)
        self.spec_dir = os.path.join(curr_dir, "test_specs/")
        self.test_output = os.path.join(curr_dir, "test_outputs/")

    def tearDown(self):
        """
        Cleans up the generated files after test case if any. Only cleans up if
        generate_ontology passes successfully.
        """
        if self.dir_path is not None:
            self.generator.cleanup_generated_ontology(self.dir_path,
                                                      is_forced=True)

    @data(('example_ontology',
           ['ft/onto/example_import_ontology', 'ft/onto/example_ontology']),
          ('example_complex_ontology', ['ft/onto/example_complex_ontology']),
          ('example_multi_module_ontology',
           ['ft/onto/ft_module', 'custom/user/custom_module']),
          ('race_qa_onto', ['ft/onto/race_qa_ontology']))
    def test_generated_code(self, value):
        input_file_name, file_paths = value
        file_paths = sorted(file_paths + _get_init_paths(file_paths))

        # Read json and generate code in a file.
        with tempfile.TemporaryDirectory() as tempdir:
            json_file_path = os.path.join(self.spec_dir,
                                          f'{input_file_name}.json')
            folder_path = self.generator.generate(json_file_path,
                                                  tempdir,
                                                  is_dry_run=True)
            self.dir_path = folder_path

            # Reorder code.
            generated_files = sorted(
                utils.get_generated_files_in_dir(folder_path))
            expected_files = [
                f"{os.path.join(folder_path, file)}.py" for file in file_paths
            ]

            self.assertEqual(generated_files, expected_files)

            for i, generated_file in enumerate(generated_files):
                with open(generated_file, 'r') as f:
                    generated_code = f.read()

                # assert if generated code matches with the expected code
                expected_code_path = os.path.join(self.test_output,
                                                  f'{file_paths[i]}.py')
                with open(expected_code_path, 'r') as f:
                    expected_code = f.read()

                self.assertEqual(generated_code, expected_code)

    def test_dry_run_false(self):
        json_file_path = os.path.join(self.spec_dir,
                                      "example_import_ontology.json")
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_filename = _get_temp_filename(json_file_path, temp_dir)
            self.generator.generate(temp_filename, temp_dir, is_dry_run=False)
            folder_path = temp_dir
            for name in ["ft", "onto", "example_import_ontology.py"]:
                self.assertTrue(name in os.listdir(folder_path))
                folder_path = os.path.join(folder_path, name)

    def test_include_and_exclude_init(self):
        json_file_path = os.path.join(self.spec_dir,
                                      "example_import_ontology.json")
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_filename = _get_temp_filename(json_file_path, temp_dir)
            # Test with include_init = True
            folder_path = self.generator.generate(temp_filename,
                                                  temp_dir,
                                                  is_dry_run=False,
                                                  include_init=True)
            gen_files = sorted(utils.get_generated_files_in_dir(folder_path))

            # Assert the generated python files
            exp_file_path = [
                'ft/__init__', 'ft/onto/__init__',
                'ft/onto/example_import_ontology'
            ]
            exp_files = sorted([
                f"{os.path.join(folder_path, file)}.py"
                for file in exp_file_path
            ])

            self.assertEqual(gen_files, exp_files)

            # Now, corrupt one of the init files
            corrupted_path = os.path.join(folder_path, 'ft/__init__.py')
            with open(corrupted_path, 'w') as f:
                f.write('# ***corrupted file***\n')

            # Re-generate using include_init = False
            self.generator = OntologyCodeGenerator()
            folder_path = self.generator.generate(temp_filename,
                                                  folder_path,
                                                  is_dry_run=False,
                                                  include_init=False)
            gen_files = sorted(utils.get_generated_files_in_dir(folder_path))

            # Assert the generated python files after removing the corrupted
            # file which should not have been regenerated
            exp_files = [file for file in exp_files if file != corrupted_path]
            self.assertEqual(gen_files, exp_files)

    @data(
        (True, 'test_duplicate_entry.json', DuplicateEntriesWarning),
        (True, 'test_duplicate_attr_name.json', DuplicatedAttributesWarning),
        (False, 'example_ontology.json', OntologySourceNotFoundException),
        (False, 'test_invalid_parent.json', ParentEntryNotSupportedException),
        (False, 'test_invalid_attribute.json', TypeNotDeclaredException),
        (False, 'test_nested_item_type.json', UnsupportedTypeException),
        (False, 'test_no_item_type.json', TypeNotDeclaredException),
        (False, 'test_unknown_item_type.json', TypeNotDeclaredException))
    def test_warnings_errors(self, value):
        expected_warning, file, msg_type = value
        temp_dir = tempfile.mkdtemp()
        json_file_name = os.path.join(self.spec_dir, file)
        temp_filename = _get_temp_filename(json_file_name, temp_dir)
        if expected_warning:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")
                self.generator.generate(temp_filename,
                                        temp_dir,
                                        is_dry_run=True)
                self.assertEqual(len(w), 1)
                assert w[0].category, msg_type
        else:
            with self.assertRaises(msg_type):
                self.generator.generate(temp_filename,
                                        temp_dir,
                                        is_dry_run=True)

    @log_capture()
    def test_directory_already_present(self):
        json_file_path = os.path.join(self.spec_dir,
                                      "example_import_ontology.json")

        with tempfile.TemporaryDirectory() as temp_dir:
            os.mkdir(os.path.join(temp_dir, "ft"))
            temp_filename = _get_temp_filename(json_file_path, temp_dir)
            with LogCapture() as l:
                self.generator.generate(temp_filename, temp_dir, False)
                l.check_present(
                    ('root', 'WARNING',
                     f'The directory with the name ft is already present in '
                     f'{temp_dir}. New files will be merge into the existing '
                     f'directory.'))

    def test_top_ontology_parsing_imports(self):
        temp_dir = tempfile.mkdtemp()
        temp_filename = os.path.join(temp_dir, 'temp.py')
        sys.path.append(temp_dir)
        with open(temp_filename, 'w') as temp_file:
            temp_file.write('import os.path\n'
                            'import os.path as os_path\n'
                            'from os import path\n')
        temp_module = importlib.import_module('temp')

        manager = ImportManager(None, None)

        gen = OntologyCodeGenerator()
        gen.initialize_top_entries(manager, temp_module)

        imports = manager.get_import_statements()

        expected_imports = ["from os import path"]

        self.assertListEqual(imports, expected_imports)

    @data("example_ontology.json", "example_import_ontology.json",
          "example_multi_module_ontology.json",
          "example_complex_ontology.json", "test_unknown_item_type.json")
    def test_valid_json(self, input_filepath):
        input_filepath = os.path.join(self.spec_dir, input_filepath)
        utils.validate_json_schema(input_filepath)

    @data(("test_duplicate_attribute.json", "non-unique elements"),
          ("test_additional_properties.json",
           "Additional properties are not allowed"))
    def test_invalid_json(self, value):
        input_filepath, error_msg = value
        input_filepath = os.path.join(self.spec_dir, input_filepath)
        with self.assertRaises(jsonschema.exceptions.ValidationError) as cm:
            utils.validate_json_schema(input_filepath)
        self.assertTrue(error_msg in cm.exception.args[0])
Beispiel #9
0
    def initialize(self) -> "Pipeline":
        """
        This function should be called before the pipeline can be used to
        process the actual data. This function will call the `initialize` of
        all the components inside this pipeline.

        Returns:

        """
        # create EntryTree type object merged_entry_tree to store the parsed
        # entry tree from ontology specification file passed in as part of
        # resource and add the result to resource with key of merged_entry_tree.
        merged_entry_tree = EntryTree()
        if self.resource.get("onto_specs_path"):
            OntologyCodeGenerator().parse_schema_for_no_import_onto_specs_file(
                ontology_path=self.resource.get("onto_specs_path"),
                ontology_dict=self.resource.get("onto_specs_dict"),
                merged_entry_tree=merged_entry_tree,
            )
            self.resource.update(merged_entry_tree=merged_entry_tree)

        # The process manager need to be assigned first.
        self._proc_mgr = ProcessManager(len(self._components))

        if self._initialized:
            # The pipeline has already been initialized, so we are doing
            # re-initialization here.
            logging.info("Re-initializing the Pipeline.")

        # Reset the flags of the components before initializing them.
        self._reader.reset_flags()
        for c in self._components:
            c.reset_flags()

        # Handle the reader.
        if not self._reader.is_initialized:
            self._reader.initialize(self.resource, self._reader_config)
        else:
            logging.info(
                "The reader [%s] has already initialized, "
                "will skip its initialization.",
                self._reader.name,
            )

        if self._check_type_consistency:
            self.reader.enforce_consistency(enforce=True)
        else:
            self.reader.enforce_consistency(enforce=False)

        # Handle other components.
        self.initialize_components()
        self._initialized = True

        # Create profiler
        if self._enable_profiling:
            self.reader.set_profiling(True)
            self._profiler = [0.0] * len(self.components)

        # Check record types and attributes of each pipeline component
        if self._do_init_type_check:
            current_records: Dict[str, Set[str]] = {}
            self._reader.record(current_records)
            for component in self.components:
                if hasattr(component, "expected_types_and_attributes"):
                    record_types_and_attributes_check(
                        component.expected_types_and_attributes(
                        ),  # type: ignore
                        current_records,
                    )
                if hasattr(component, "record"):
                    component.record(current_records)  # type: ignore

        return self