Ejemplo n.º 1
0
    def test_write_read_graph_to_events_two(self):
        """This loads two different graphs corresponding to two different URLs.
        They must be properly reloaded. """
        test_graph_input_a = rdflib.Graph().parse(
            "tests/input_test_data/test_events_enumerate_CIM_Process.xml")
        test_url_a = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=."

        test_graph_input_b = rdflib.Graph().parse(
            "tests/input_test_data/test_events_enumerate_python_package.xml")
        test_url_b = "http://vps516494.ovh.net/Survol/survol/sources_types/test_events_enumerate_python_package.py?xid=."

        count_events_a = lib_kbase.write_graph_to_events(
            test_url_a, test_graph_input_a)
        self.assertEqual(count_events_a, 682)

        count_events_b = lib_kbase.write_graph_to_events(
            test_url_b, test_graph_input_b)
        self.assertEqual(count_events_b, 1090)

        test_graph_output_a = rdflib.Graph()
        count_events_output_a = lib_kbase.read_events_to_graph(
            test_url_a, test_graph_output_a)
        self.assertEqual(count_events_a, count_events_output_a)

        test_graph_output_b = rdflib.Graph()
        count_events_output_b = lib_kbase.read_events_to_graph(
            test_url_b, test_graph_output_b)
        self.assertEqual(count_events_b, count_events_output_b)
Ejemplo n.º 2
0
    def test_sqlalchemy_sqlite_write_two_urls(self):

        graph_cleanup = rdflib.Graph()
        lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup)

        triples_count_a = 1000
        test_graph_input_a = _create_dummy_graph(triples_count_a)
        test_url_a = "http://dummy.xyz/url_a"
        count_events_url_a = lib_kbase.write_graph_to_events(
            test_url_a, test_graph_input_a)
        actual_events_count_1 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_1, triples_count_a)

        triples_count_b = 2000
        test_graph_input_b = _create_dummy_graph(triples_count_b)
        test_url_b = "http://dummy.xyz/url_b"
        count_events_url_b = lib_kbase.write_graph_to_events(
            test_url_b, test_graph_input_b)
        actual_events_count_2 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_2,
                         triples_count_a + triples_count_b)

        test_graph_output_a = rdflib.Graph()
        count_events_output_a = lib_kbase.read_events_to_graph(
            test_url_a, test_graph_output_a)
        self.assertEqual(count_events_url_a, count_events_output_a)
        actual_events_count_3 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_3, triples_count_b)

        test_graph_output_b = rdflib.Graph()
        count_events_output_b = lib_kbase.read_events_to_graph(
            test_url_b, test_graph_output_b)
        self.assertEqual(count_events_url_b, count_events_output_b)
        actual_events_count_4 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_4, 0)
Ejemplo n.º 3
0
    def test_read_graph_to_events_twice(self):
        """Writes then reads twice a RDF graph and its URL as a context.
        The second time, it must be empty."""

        # This is the format="application/rdf+xml"
        test_graph_input = rdflib.Graph().parse(
            "tests/input_test_data/test_events_enumerate_CIM_Process.xml")

        test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=."

        count_events = lib_kbase.write_graph_to_events(test_url,
                                                       test_graph_input)
        self.assertEqual(count_events, 682)

        test_graph_output_1 = rdflib.Graph()
        count_events_output_1 = lib_kbase.read_events_to_graph(
            test_url, test_graph_output_1)

        self.assertEqual(count_events, count_events_output_1)

        test_graph_output_2 = rdflib.Graph()
        count_events_output_2 = lib_kbase.read_events_to_graph(
            test_url, test_graph_output_2)

        self.assertEqual(0, count_events_output_2)
Ejemplo n.º 4
0
    def test_pure_memory_write_two_urls_plus_none(self):

        graph_cleanup = rdflib.Graph()
        lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup)

        triples_count_a = 10
        test_graph_input_a = _create_dummy_graph(triples_count_a)
        test_url_a = "http://dummy.xyz/url_a"
        count_events_url_a = lib_kbase.write_graph_to_events(
            test_url_a, test_graph_input_a)
        self.assertEqual(count_events_url_a, triples_count_a)
        actual_events_count_1 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_1, triples_count_a)

        triples_count_b = 20
        test_graph_input_b = _create_dummy_graph(triples_count_b)
        test_url_b = "http://dummy.xyz/url_b"
        count_events_url_b = lib_kbase.write_graph_to_events(
            test_url_b, test_graph_input_b)
        self.assertEqual(count_events_url_b,
                         max(triples_count_a, triples_count_b))
        actual_events_count_2 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_2, count_events_url_b)

        triples_count_z = 100
        test_graph_input_z = _create_dummy_graph(triples_count_z)
        count_events_url_z = lib_kbase.write_graph_to_events(
            None, test_graph_input_z)
        self.assertEqual(
            count_events_url_z,
            max(triples_count_a, triples_count_b, triples_count_z))
        actual_events_count_3 = lib_kbase.events_count()
        self.assertEqual(count_events_url_z, count_events_url_z)

        test_graph_output_a = rdflib.Graph()
        count_events_output_a = lib_kbase.read_events_to_graph(
            test_url_a, test_graph_output_a)
        self.assertEqual(count_events_url_a, count_events_output_a)
        actual_events_count_3 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_3,
                         max(triples_count_b, triples_count_z))

        test_graph_output_b = rdflib.Graph()
        count_events_output_b = lib_kbase.read_events_to_graph(
            test_url_b, test_graph_output_b)
        self.assertEqual(count_events_url_b, count_events_output_b)
        actual_events_count_4 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_4, triples_count_z)

        test_graph_output_z = rdflib.Graph()
        count_events_output_z = lib_kbase.retrieve_all_events_to_graph_then_clear(
            test_graph_output_z)
        self.assertEqual(count_events_url_z, count_events_output_z)
        actual_events_count_5 = lib_kbase.events_count()
        self.assertEqual(actual_events_count_5, 0)
Ejemplo n.º 5
0
    def test_write_read_graph_to_events_one(self):
        """Writes then reads a RDF graph and its URL as a context."""
        test_graph_input = rdflib.Graph().parse(
            "tests/input_test_data/test_events_enumerate_CIM_Process.xml")

        test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=."

        count_events = lib_kbase.write_graph_to_events(test_url,
                                                       test_graph_input)
        self.assertEqual(count_events, 682)

        test_graph_output = rdflib.Graph()
        count_events_output = lib_kbase.read_events_to_graph(
            test_url, test_graph_output)

        self.assertEqual(count_events, count_events_output)
Ejemplo n.º 6
0
    def test_write_read_graph_to_events_with_other_nodes(self):
        triples_data_set = [
            [("CIM_Process", {
                "Handle": 123
            }), "ParentProcessId", 1],
            [
                ("CIM_Directory", {
                    "Name": "/tmp"
                }),
                # CIM_DirectoryContainsFile.GroupComponent or CIM_DirectoryContainsFile.PartComponent
                "CIM_DirectoryContainsFile",
                ("CIM_DataFile", {
                    "Name": "/tmp/anyfile.tmp"
                })
            ]
        ]

        updates_total_number = lib_event.store_events_as_json_triples_list(
            triples_data_set)

        test_graph_input = rdflib.Graph().parse(
            "tests/input_test_data/test_events_enumerate_CIM_Process.xml")

        test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=."

        count_events = lib_kbase.write_graph_to_events(test_url,
                                                       test_graph_input)
        self.assertEqual(count_events, 682)

        test_graph_output = rdflib.Graph()
        count_events_output = lib_kbase.read_events_to_graph(
            test_url, test_graph_output)
        self.assertEqual(count_events, count_events_output)

        new_graph = rdflib.Graph()
        lib_kbase.retrieve_all_events_to_graph_then_clear(new_graph)
        print("files_updates_total_number=", updates_total_number)
        print("len(triples_list)=", len(new_graph))
        self.assertEqual(updates_total_number, 2)
        self.assertEqual(len(new_graph), 2)
Ejemplo n.º 7
0
    def __init__(self,
                 parameters=None,
                 can_process_remote=False,
                 layout_style="",
                 collapsed_properties=None):
        # It is possible to run these scripts as CGI scripts, so this transforms
        # command line arguments into CGI arguments. This is very helpful for debugging.

        # The HTTP server can set the logging level with the environment variable SURVOL_LOGGING_LEVEL.
        try:
            logging_level = os.environ["SURVOL_LOGGING_LEVEL"]
            logging.getLogger().setLevel(logging_level)
            logging.info("logging_level set with SURVOL_LOGGING_LEVEL=%s" % logging_level)
        except KeyError:
            logging.info("logging_level is not forced with SURVOL_LOGGING_LEVEL.")

        lib_command_line.command_line_to_cgi_args()
        assert "QUERY_STRING" in os.environ

        # Some limitations of cgiserver.py and Python2:
        # TODO: When running from cgiserver.py, and if QUERY_STRING is finished by a dot ".", this dot
        # TODO: is removed. Workaround: Any CGI variable added after.
        # TODO: Also: Several slashes "/" are merged into one.
        # TODO: Example: "xid=http://192.168.1.83:5988/." becomes "xid=http:/192.168.1.83:5988/"
        # TODO: ... or "xx.py?xid=smbshr.Id=////WDMyCloudMirror///jsmith" ...
        # TODO: ... becomes "xx.py?xid=smbshr.Id=/WDMyCloudMirror/jsmith"
        # TODO: Replace by "xid=http:%2F%2F192.168.1.83:5988/."

        mode = lib_util.GuessDisplayMode()
        logging.debug("mode=%s" % mode)

        # Contains the optional arguments of the script, entered as CGI arguments..
        self.m_parameters = parameters if parameters else {}

        self.m_parameterized_links = dict()

        self.m_layout_style = layout_style
        self.m_collapsed_properties = collapsed_properties if collapsed_properties else []

        # When in merge mode, the display parameters must be stored in a place accessible by the graph.

        doc_modu_all = _get_calling_module_doc()

        # Take only the first non-empty line. See lib_util.FromModuleToDoc()
        self.m_page_title, self.m_page_subtitle = lib_util.SplitTextTitleRest(doc_modu_all)

        # Title page contains __doc__ plus object label.

        # Example: REQUEST_URI=/Survol/survol/print_environment_variables.py
        # This does NOT contain the host and the port, which implies a confusion if severl Survol agents
        # use the same database. It makes sense, because the result should not depend in the agent.
        self.m_calling_url = lib_util.RequestUri()
        self.m_url_without_mode = lib_util.url_mode_replace(self.m_calling_url, "")

        full_title, entity_class, entity_id, entity_host = lib_naming.parse_entity_uri_with_host(
            self.m_calling_url,
            long_display=False,
            force_entity_ip_addr=None)
        # Here, the commas separating the CGI arguments are intact, but the commas in the arguments are encoded.
        entity_id_dict = lib_util.SplitMoniker(entity_id)

        self._concatenate_entity_documentation(full_title, entity_class, entity_id)

        # Global CanProcessRemote has precedence over parameter can_process_remote
        # which should probably be deprecated, although they do not have exactly the same role:
        # * Global CanProcessRemote is used by entity.py to display scripts which have this capability.
        # * Parameter can_process_remote is used to inform, at execution time, of this capability.
        # Many scripts are not enumerated by entity.py so a global CanProcessRemote is not necessary.
        # For clarity, it might be fine to replace the parameter can_process_remote by the global value.
        # There cannot be nasty consequences except that some scripts might not be displayed
        # when they should be, and vice-versa.
        try:
            globalCanProcessRemote = globals()["CanProcessRemote"]
        except KeyError:
            globalCanProcessRemote = False

        if can_process_remote != globalCanProcessRemote:
            # "INCONSISTENCY CanProcessRemote ... which is not an issue.
            can_process_remote = True

        self.m_can_process_remote = can_process_remote

        self.m_arguments = cgi.FieldStorage()

        self.m_entity_type = entity_class
        self.m_entity_id = entity_id
        self.m_entity_host = entity_host
        self.m_entity_id_dict = entity_id_dict

        self._create_or_get_graph()

        # Depending on the caller module, maybe the arguments should be 64decoded. See "sql/query".
        # As the entity type is available, it is possible to import it and check if it encodes it arguments.
        # See presence of source_types.sql.query.DecodeCgiArg(keyWord,cgiArg) for example.

        # This is probably too generous to indicate a local host.
        self.test_remote_if_possible(can_process_remote)

        if mode == "edit":
            self.enter_edition_mode()
            logging.critical("Should not be here because the HTML form is displayed.")
            assert False

        # Scripts which can run as events feeders must have their name starting with "events_feeder_".
        # This allows to use CGI programs as events genetors not written in Python.
        # TODO: Using the script name is enough, the module is not necessary.
        full_script_path, _, _ = self.m_calling_url.partition("?")
        script_basename = os.path.basename(full_script_path)
        daemonizable_script = os.path.basename(script_basename).startswith("events_feeder_")

        if not daemonizable_script:
            # This would be absurd to have a normal CGI script started in this mode.
            assert mode != "daemon", "Script is not an events generator:" + self.m_calling_url
            # Runs as usual as a CGI script. The script will fill the graph.
            return

        # The events graph must be specified because, from here, everything will access the events graph.
        set_events_credentials()

        # Maybe this is in the daemon.
        if mode == "daemon":
            # Just runs as usual. At the end of the script, OutCgiRdf will write the RDF graph in the events.
            # Here, this process is started by the supervisor process; It is not started by the HTTP server,
            # in CGI or WSGI.
            return

        try:
            # This may throw "[Errno 111] Connection refused"
            is_daemon_running = lib_daemon.is_events_feeder_daemon_running(self.m_url_without_mode)
        except Exception as exc:
            # Then display the content in snapshot mode, which is better than nothing.
            self.report_error_message("Cannot start daemon, caught:%s\n" % exc)
            logging.error("Cannot start daemon: When getting daemon status, caught:%s" % exc)
            return

        if not is_daemon_running:
            # This is the case of a daemonizable script, normally run.
            # TODO: Slight ambiguity here: The daemon could be intentionally stopped, and the user
            # TODO: would like to see the existing events stored in the persistent triplestore,
            # TODO: without restarting the daemon. We do not know how to do this yet.
            lib_daemon.start_events_feeder_daemon(self.m_url_without_mode)
            # After that, whether the daemon dedicated to the script and its parameters is started or not,
            # the script is then executed in normal, snapshot mode, as a CGI script.
        else:
            # Events are probably stored in the big events graph. The host and port are not used in the URL.
            lib_kbase.read_events_to_graph(self.m_url_without_mode, self.m_graph)

            # TODO: The layout parameters and any other display parameters of the calling script
            # TODO: must be in the constructor.
            # TODO: This, because the rest of the script is not executed.
            self.OutCgiRdf()

            # The rest of the script must not be executed because daemon scripts are organised so that
            # when the daemon is started, it writes all events in the database, to be read by the same script
            # run in CGI or WSGI.
            # The snapshot part of a daemon script is executed only when the deamon is not started.
            logging.info("Events are read from the events database because the deamon is running.")
            if _is_wsgi():
                logging.info("Leaving the execution of the script run in a WSGI server.")
                # This is not an error.
            else:
                logging.info("Exiting the process of the script run in snapshot mode and CGI server.")
            # This raises SystemExit which can be handled.
            exit(0)