def test_write_read_graph_to_events_two(self): """This loads two different graphs corresponding to two different URLs. They must be properly reloaded. """ test_graph_input_a = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url_a = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." test_graph_input_b = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_python_package.xml") test_url_b = "http://vps516494.ovh.net/Survol/survol/sources_types/test_events_enumerate_python_package.py?xid=." count_events_a = lib_kbase.write_graph_to_events( test_url_a, test_graph_input_a) self.assertEqual(count_events_a, 682) count_events_b = lib_kbase.write_graph_to_events( test_url_b, test_graph_input_b) self.assertEqual(count_events_b, 1090) test_graph_output_a = rdflib.Graph() count_events_output_a = lib_kbase.read_events_to_graph( test_url_a, test_graph_output_a) self.assertEqual(count_events_a, count_events_output_a) test_graph_output_b = rdflib.Graph() count_events_output_b = lib_kbase.read_events_to_graph( test_url_b, test_graph_output_b) self.assertEqual(count_events_b, count_events_output_b)
def test_sqlalchemy_sqlite_write_two_urls(self): graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) triples_count_a = 1000 test_graph_input_a = _create_dummy_graph(triples_count_a) test_url_a = "http://dummy.xyz/url_a" count_events_url_a = lib_kbase.write_graph_to_events( test_url_a, test_graph_input_a) actual_events_count_1 = lib_kbase.events_count() self.assertEqual(actual_events_count_1, triples_count_a) triples_count_b = 2000 test_graph_input_b = _create_dummy_graph(triples_count_b) test_url_b = "http://dummy.xyz/url_b" count_events_url_b = lib_kbase.write_graph_to_events( test_url_b, test_graph_input_b) actual_events_count_2 = lib_kbase.events_count() self.assertEqual(actual_events_count_2, triples_count_a + triples_count_b) test_graph_output_a = rdflib.Graph() count_events_output_a = lib_kbase.read_events_to_graph( test_url_a, test_graph_output_a) self.assertEqual(count_events_url_a, count_events_output_a) actual_events_count_3 = lib_kbase.events_count() self.assertEqual(actual_events_count_3, triples_count_b) test_graph_output_b = rdflib.Graph() count_events_output_b = lib_kbase.read_events_to_graph( test_url_b, test_graph_output_b) self.assertEqual(count_events_url_b, count_events_output_b) actual_events_count_4 = lib_kbase.events_count() self.assertEqual(actual_events_count_4, 0)
def test_read_graph_to_events_twice(self): """Writes then reads twice a RDF graph and its URL as a context. The second time, it must be empty.""" # This is the format="application/rdf+xml" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output_1 = rdflib.Graph() count_events_output_1 = lib_kbase.read_events_to_graph( test_url, test_graph_output_1) self.assertEqual(count_events, count_events_output_1) test_graph_output_2 = rdflib.Graph() count_events_output_2 = lib_kbase.read_events_to_graph( test_url, test_graph_output_2) self.assertEqual(0, count_events_output_2)
def test_pure_memory_write_two_urls_plus_none(self): graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) triples_count_a = 10 test_graph_input_a = _create_dummy_graph(triples_count_a) test_url_a = "http://dummy.xyz/url_a" count_events_url_a = lib_kbase.write_graph_to_events( test_url_a, test_graph_input_a) self.assertEqual(count_events_url_a, triples_count_a) actual_events_count_1 = lib_kbase.events_count() self.assertEqual(actual_events_count_1, triples_count_a) triples_count_b = 20 test_graph_input_b = _create_dummy_graph(triples_count_b) test_url_b = "http://dummy.xyz/url_b" count_events_url_b = lib_kbase.write_graph_to_events( test_url_b, test_graph_input_b) self.assertEqual(count_events_url_b, max(triples_count_a, triples_count_b)) actual_events_count_2 = lib_kbase.events_count() self.assertEqual(actual_events_count_2, count_events_url_b) triples_count_z = 100 test_graph_input_z = _create_dummy_graph(triples_count_z) count_events_url_z = lib_kbase.write_graph_to_events( None, test_graph_input_z) self.assertEqual( count_events_url_z, max(triples_count_a, triples_count_b, triples_count_z)) actual_events_count_3 = lib_kbase.events_count() self.assertEqual(count_events_url_z, count_events_url_z) test_graph_output_a = rdflib.Graph() count_events_output_a = lib_kbase.read_events_to_graph( test_url_a, test_graph_output_a) self.assertEqual(count_events_url_a, count_events_output_a) actual_events_count_3 = lib_kbase.events_count() self.assertEqual(actual_events_count_3, max(triples_count_b, triples_count_z)) test_graph_output_b = rdflib.Graph() count_events_output_b = lib_kbase.read_events_to_graph( test_url_b, test_graph_output_b) self.assertEqual(count_events_url_b, count_events_output_b) actual_events_count_4 = lib_kbase.events_count() self.assertEqual(actual_events_count_4, triples_count_z) test_graph_output_z = rdflib.Graph() count_events_output_z = lib_kbase.retrieve_all_events_to_graph_then_clear( test_graph_output_z) self.assertEqual(count_events_url_z, count_events_output_z) actual_events_count_5 = lib_kbase.events_count() self.assertEqual(actual_events_count_5, 0)
def test_write_read_graph_to_events_one(self): """Writes then reads a RDF graph and its URL as a context.""" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output = rdflib.Graph() count_events_output = lib_kbase.read_events_to_graph( test_url, test_graph_output) self.assertEqual(count_events, count_events_output)
def test_write_read_graph_to_events_with_other_nodes(self): triples_data_set = [ [("CIM_Process", { "Handle": 123 }), "ParentProcessId", 1], [ ("CIM_Directory", { "Name": "/tmp" }), # CIM_DirectoryContainsFile.GroupComponent or CIM_DirectoryContainsFile.PartComponent "CIM_DirectoryContainsFile", ("CIM_DataFile", { "Name": "/tmp/anyfile.tmp" }) ] ] updates_total_number = lib_event.store_events_as_json_triples_list( triples_data_set) test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output = rdflib.Graph() count_events_output = lib_kbase.read_events_to_graph( test_url, test_graph_output) self.assertEqual(count_events, count_events_output) new_graph = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(new_graph) print("files_updates_total_number=", updates_total_number) print("len(triples_list)=", len(new_graph)) self.assertEqual(updates_total_number, 2) self.assertEqual(len(new_graph), 2)
def __init__(self, parameters=None, can_process_remote=False, layout_style="", collapsed_properties=None): # It is possible to run these scripts as CGI scripts, so this transforms # command line arguments into CGI arguments. This is very helpful for debugging. # The HTTP server can set the logging level with the environment variable SURVOL_LOGGING_LEVEL. try: logging_level = os.environ["SURVOL_LOGGING_LEVEL"] logging.getLogger().setLevel(logging_level) logging.info("logging_level set with SURVOL_LOGGING_LEVEL=%s" % logging_level) except KeyError: logging.info("logging_level is not forced with SURVOL_LOGGING_LEVEL.") lib_command_line.command_line_to_cgi_args() assert "QUERY_STRING" in os.environ # Some limitations of cgiserver.py and Python2: # TODO: When running from cgiserver.py, and if QUERY_STRING is finished by a dot ".", this dot # TODO: is removed. Workaround: Any CGI variable added after. # TODO: Also: Several slashes "/" are merged into one. # TODO: Example: "xid=http://192.168.1.83:5988/." becomes "xid=http:/192.168.1.83:5988/" # TODO: ... or "xx.py?xid=smbshr.Id=////WDMyCloudMirror///jsmith" ... # TODO: ... becomes "xx.py?xid=smbshr.Id=/WDMyCloudMirror/jsmith" # TODO: Replace by "xid=http:%2F%2F192.168.1.83:5988/." mode = lib_util.GuessDisplayMode() logging.debug("mode=%s" % mode) # Contains the optional arguments of the script, entered as CGI arguments.. self.m_parameters = parameters if parameters else {} self.m_parameterized_links = dict() self.m_layout_style = layout_style self.m_collapsed_properties = collapsed_properties if collapsed_properties else [] # When in merge mode, the display parameters must be stored in a place accessible by the graph. doc_modu_all = _get_calling_module_doc() # Take only the first non-empty line. See lib_util.FromModuleToDoc() self.m_page_title, self.m_page_subtitle = lib_util.SplitTextTitleRest(doc_modu_all) # Title page contains __doc__ plus object label. # Example: REQUEST_URI=/Survol/survol/print_environment_variables.py # This does NOT contain the host and the port, which implies a confusion if severl Survol agents # use the same database. It makes sense, because the result should not depend in the agent. self.m_calling_url = lib_util.RequestUri() self.m_url_without_mode = lib_util.url_mode_replace(self.m_calling_url, "") full_title, entity_class, entity_id, entity_host = lib_naming.parse_entity_uri_with_host( self.m_calling_url, long_display=False, force_entity_ip_addr=None) # Here, the commas separating the CGI arguments are intact, but the commas in the arguments are encoded. entity_id_dict = lib_util.SplitMoniker(entity_id) self._concatenate_entity_documentation(full_title, entity_class, entity_id) # Global CanProcessRemote has precedence over parameter can_process_remote # which should probably be deprecated, although they do not have exactly the same role: # * Global CanProcessRemote is used by entity.py to display scripts which have this capability. # * Parameter can_process_remote is used to inform, at execution time, of this capability. # Many scripts are not enumerated by entity.py so a global CanProcessRemote is not necessary. # For clarity, it might be fine to replace the parameter can_process_remote by the global value. # There cannot be nasty consequences except that some scripts might not be displayed # when they should be, and vice-versa. try: globalCanProcessRemote = globals()["CanProcessRemote"] except KeyError: globalCanProcessRemote = False if can_process_remote != globalCanProcessRemote: # "INCONSISTENCY CanProcessRemote ... which is not an issue. can_process_remote = True self.m_can_process_remote = can_process_remote self.m_arguments = cgi.FieldStorage() self.m_entity_type = entity_class self.m_entity_id = entity_id self.m_entity_host = entity_host self.m_entity_id_dict = entity_id_dict self._create_or_get_graph() # Depending on the caller module, maybe the arguments should be 64decoded. See "sql/query". # As the entity type is available, it is possible to import it and check if it encodes it arguments. # See presence of source_types.sql.query.DecodeCgiArg(keyWord,cgiArg) for example. # This is probably too generous to indicate a local host. self.test_remote_if_possible(can_process_remote) if mode == "edit": self.enter_edition_mode() logging.critical("Should not be here because the HTML form is displayed.") assert False # Scripts which can run as events feeders must have their name starting with "events_feeder_". # This allows to use CGI programs as events genetors not written in Python. # TODO: Using the script name is enough, the module is not necessary. full_script_path, _, _ = self.m_calling_url.partition("?") script_basename = os.path.basename(full_script_path) daemonizable_script = os.path.basename(script_basename).startswith("events_feeder_") if not daemonizable_script: # This would be absurd to have a normal CGI script started in this mode. assert mode != "daemon", "Script is not an events generator:" + self.m_calling_url # Runs as usual as a CGI script. The script will fill the graph. return # The events graph must be specified because, from here, everything will access the events graph. set_events_credentials() # Maybe this is in the daemon. if mode == "daemon": # Just runs as usual. At the end of the script, OutCgiRdf will write the RDF graph in the events. # Here, this process is started by the supervisor process; It is not started by the HTTP server, # in CGI or WSGI. return try: # This may throw "[Errno 111] Connection refused" is_daemon_running = lib_daemon.is_events_feeder_daemon_running(self.m_url_without_mode) except Exception as exc: # Then display the content in snapshot mode, which is better than nothing. self.report_error_message("Cannot start daemon, caught:%s\n" % exc) logging.error("Cannot start daemon: When getting daemon status, caught:%s" % exc) return if not is_daemon_running: # This is the case of a daemonizable script, normally run. # TODO: Slight ambiguity here: The daemon could be intentionally stopped, and the user # TODO: would like to see the existing events stored in the persistent triplestore, # TODO: without restarting the daemon. We do not know how to do this yet. lib_daemon.start_events_feeder_daemon(self.m_url_without_mode) # After that, whether the daemon dedicated to the script and its parameters is started or not, # the script is then executed in normal, snapshot mode, as a CGI script. else: # Events are probably stored in the big events graph. The host and port are not used in the URL. lib_kbase.read_events_to_graph(self.m_url_without_mode, self.m_graph) # TODO: The layout parameters and any other display parameters of the calling script # TODO: must be in the constructor. # TODO: This, because the rest of the script is not executed. self.OutCgiRdf() # The rest of the script must not be executed because daemon scripts are organised so that # when the daemon is started, it writes all events in the database, to be read by the same script # run in CGI or WSGI. # The snapshot part of a daemon script is executed only when the deamon is not started. logging.info("Events are read from the events database because the deamon is running.") if _is_wsgi(): logging.info("Leaving the execution of the script run in a WSGI server.") # This is not an error. else: logging.info("Exiting the process of the script run in snapshot mode and CGI server.") # This raises SystemExit which can be handled. exit(0)