def test_pure_memory_write_read_write_read(self): graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) triples_count_a = 100 test_graph_input_a = _create_dummy_graph(triples_count_a) test_url_a = "http://dummy.xyz/url_a" count_events_url_a = lib_kbase.write_graph_to_events( test_url_a, test_graph_input_a) self.assertEqual(count_events_url_a, triples_count_a) actual_events_count_1 = lib_kbase.events_count() self.assertEqual(actual_events_count_1, triples_count_a) triples_count_b = 50 test_graph_input_b = _create_dummy_graph(triples_count_b) test_url_b = "http://dummy.xyz/url_b" count_events_url_b = lib_kbase.write_graph_to_events( test_url_b, test_graph_input_b) self.assertEqual(count_events_url_b, triples_count_b) actual_events_count_2 = lib_kbase.events_count() self.assertEqual(actual_events_count_2, max(triples_count_a, triples_count_b)) test_graph_output_a = rdflib.Graph() count_events_output_a = lib_kbase.read_events_to_graph( test_url_a, test_graph_output_a) self.assertEqual(count_events_url_a, count_events_output_a) actual_events_count_3 = lib_kbase.events_count() self.assertEqual(actual_events_count_3, triples_count_b) triples_count_z = 10 test_graph_input_z = _create_dummy_graph(triples_count_z) count_events_url_z = lib_kbase.write_graph_to_events( None, test_graph_input_z) self.assertEqual(count_events_url_z, triples_count_z) actual_events_count_4 = lib_kbase.events_count() self.assertEqual(actual_events_count_4, max(triples_count_b, triples_count_z)) test_graph_output_b = rdflib.Graph() count_events_output_b = lib_kbase.read_events_to_graph( test_url_b, test_graph_output_b) self.assertEqual(count_events_url_b, count_events_output_b) actual_events_count_5 = lib_kbase.events_count() self.assertEqual(actual_events_count_5, triples_count_z) test_graph_output_z = rdflib.Graph() count_events_output_z = lib_kbase.retrieve_all_events_to_graph_then_clear( test_graph_output_z) self.assertEqual(count_events_url_z, count_events_output_z) actual_events_count_6 = lib_kbase.events_count() self.assertEqual(actual_events_count_6, 0)
def test_read_graph_to_events_twice(self): """Writes then reads twice a RDF graph and its URL as a context. The second time, it must be empty.""" # This is the format="application/rdf+xml" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output_1 = rdflib.Graph() count_events_output_1 = lib_kbase.read_events_to_graph( test_url, test_graph_output_1) self.assertEqual(count_events, count_events_output_1) test_graph_output_2 = rdflib.Graph() count_events_output_2 = lib_kbase.read_events_to_graph( test_url, test_graph_output_2) self.assertEqual(0, count_events_output_2)
def test_sqlalchemy_write_nourl_read_all_thread(self): """Writes into a RDF graph, then reads from a Python thread.""" triples_count = 1000 test_graph_input = _create_dummy_graph(triples_count) count_events = lib_kbase.write_graph_to_events(None, test_graph_input) sys.stderr.write( "test_sqlalchemy_write_nourl_read_all_thread count_events=%d\n" % count_events) self.assertEqual(count_events, triples_count) actual_events_count = lib_kbase.events_count() self.assertEqual(count_events, actual_events_count) # This shared queue is used by a thread reading the events, to send them back to this process. shared_queue = queue.Queue() created_thread = _create_thread(count_events, shared_queue) # Reads the triples sent by the thread. output_list = _read_triples_from_queue(count_events, shared_queue) # The thread is not needed any longer. created_thread.join() input_triples = _graph_to_triples_set(test_graph_input) output_triples = _graph_to_triples_set(output_list) # The two lists of triples must be identical: Comparison of the string representations. self.assertEqual(input_triples, output_triples)
def test_sqlalchemy_sqlite_write_nourl_read_all_subprocess(self): triples_count = 1000 test_graph_input = _create_dummy_graph(triples_count) count_events = lib_kbase.write_graph_to_events(None, test_graph_input) self.assertEqual(count_events, triples_count) # This shared queue is used by a subprocess reading the events, to send them back to this process. shared_queue = multiprocessing.Queue() created_process = _create_subprocess(count_events, shared_queue, self.sqlite_path) # Reads the triples sent by the subprocess. output_list = _read_triples_from_queue(count_events, shared_queue) # The subprocess is not needed any longer. created_process.terminate() created_process.join() print("Killing pid=", created_process.pid) input_triples = _graph_to_triples_set(test_graph_input) output_triples = _graph_to_triples_set(output_list) # The two lists of triples must be identical: Comparison of the string representations. self.assertEqual(input_triples, output_triples)
def test_pure_memory_put_duplicate_retrieve_events(self): triples_count_a = 10 triples_a = _create_dummy_graph(triples_count_a) triples_count_b = 20 triples_b = _create_dummy_graph(triples_count_b) triples_count_c = 30 triples_c = _create_dummy_graph(triples_count_c) returned_number_a = lib_kbase.write_graph_to_events(None, triples_a) self.assertEqual(returned_number_a, triples_count_a) returned_number_b = lib_kbase.write_graph_to_events(None, triples_b) self.assertEqual(returned_number_b, max(triples_count_a, triples_count_b)) returned_number_c = lib_kbase.write_graph_to_events(None, triples_c) self.assertEqual( returned_number_c, max(triples_count_a, triples_count_b, triples_count_c))
def test_sqlalchemy_sqlite_write_mixed_read_all_subprocess(self): """Writes a RDF graph, then reads from another process.""" graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) triples_count = 1000 test_graph_input_nourl = _create_dummy_graph(triples_count) count_events_nourl = lib_kbase.write_graph_to_events( None, test_graph_input_nourl) test_graph_input_url = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") # The URL is not important because it is not accessed. # However, it must be correctly handled by rdflib when it creates a UriRef test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/Linux/tcp_sockets.py?xid=." count_events_url = lib_kbase.write_graph_to_events( test_url, test_graph_input_url) count_events = count_events_nourl + count_events_url # This shared queue is used by a subprocess reading the events, to send them back to this process. shared_queue = multiprocessing.Queue() created_process = _create_subprocess(count_events, shared_queue, self.sqlite_path) # Reads the triples sent by the subprocess. output_list = _read_triples_from_queue(count_events, shared_queue) # The subprocess is not needed any longer. created_process.terminate() created_process.join() print("Killing pid=", created_process.pid) input_triples_nourl = _graph_to_triples_set(test_graph_input_nourl) input_triples_url = _graph_to_triples_set(test_graph_input_url) input_triples = input_triples_nourl | input_triples_url output_triples = _graph_to_triples_set(output_list) # The two lists of triples must be identical: Comparison of the string representations. self.assertEqual(input_triples, output_triples)
def test_write_graph_to_events(self): """Writes a RDF graph and its URL as a context.""" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") # The URL is not important because it is not accessed. # However, it must be correctly handled by rdflib when it creates a UriRef test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/Linux/tcp_sockets.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 43)
def _out_cgi_mode(theCgi, top_url, mode, error_msg=None): """ The result can be sent to the Web browser in several formats. """ theCgi._bind_identical_nodes() grph = theCgi.m_graph page_title = theCgi.m_page_title parameters = theCgi.m_parameters parameterized_links = theCgi.m_parameterized_links # Now extract and remove all metadata, also the ones which were already here. # They are not left in the graph, because they break some tests. collapsed_properties, commutative_properties = lib_properties.extract_properties_metadata(grph) # Which this, all collapsed properties are in this list. # It would not harm to leave them, but some tests which analyses the exact output content would break. # Also: The meta data would be visible. collapsed_properties.extend(theCgi.m_collapsed_properties) if mode == "html": # Used rarely and performance not very important. This returns a HTML page. lib_export_html.output_rdf_graph_as_html(theCgi, top_url, error_msg, _global_cgi_env_list) elif mode == "json": lib_export_json.output_rdf_graph_as_json_d3(page_title, error_msg, parameters, grph) elif mode == "menu": lib_export_json.output_rdf_graph_as_json_menu(page_title, error_msg, parameters, grph) elif mode == "rdf": lib_export_ontology.output_rdf_graph_as_rdf(grph) elif mode == "daemon": # Only in this output mode, all meta-data are injected in the graph, to be used at the next output. for one_collapsed_property in collapsed_properties: lib_properties.add_property_metadata_to_graph(grph, one_collapsed_property, pc.meta_property_collapsed) for one_commutative_property in commutative_properties: lib_properties.add_property_metadata_to_graph(grph, one_commutative_property, pc.meta_property_commutative) # This is the end of a loop, or events transaction, in the script which does not run in CGI context, # but in a separate daemon process. This stores the results to the persistent graph database for events. try: triples_count = lib_kbase.write_graph_to_events(theCgi.m_url_without_mode, theCgi.m_graph) except Exception as exc: logging.error("_out_cgi_mode Exception exc=%s", exc) raise elif mode in ["svg", ""]: # Default mode, because graphviz did not like several CGI arguments in a SVG document (Bug ?), # probably because of the ampersand which must be escaped, or had to be in old versions. # This test holds because for the moment, all collapsed properties are known in advance. # This will be more flexible. lib_export_dot.output_rdf_graph_as_svg(page_title, error_msg, parameters, grph, parameterized_links, top_url, theCgi.m_layout_style, collapsed_properties, commutative_properties) else: logging.error("_out_cgi_mode invalid mode=%s", mode) ErrorMessageHtml("_out_cgi_mode invalid mode=%s" % mode)
def test_pure_memory_insert_write_nourl_read_all_subprocess(self): triples_count = 100 test_graph_input = _create_dummy_graph(triples_count) count_events = lib_kbase.write_graph_to_events(None, test_graph_input) self.assertEqual(count_events, triples_count) graph_whole_content = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_whole_content) input_triples = _graph_to_triples_set(test_graph_input) output_triples = _graph_to_triples_set(graph_whole_content) # The two lists of triples must be identical: Comparison of the string representations. self.assertEqual(input_triples, output_triples)
def test_write_read_graph_to_events_one(self): """Writes then reads a RDF graph and its URL as a context.""" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output = rdflib.Graph() count_events_output = lib_kbase.read_events_to_graph( test_url, test_graph_output) self.assertEqual(count_events, count_events_output)
def test_sqlalchemy_write_url_read_all_only(self): """Writes for an URL then reads all.""" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") # The URL is not important because it is not accessed. # However, it must be correctly handled by rdflib when it creates a UriRef test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/Linux/tcp_sockets.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 43) graph_from_sql_alchemy = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear( graph_from_sql_alchemy) self.assertEqual(count_events, len(graph_from_sql_alchemy))
def test_sqlalchemy_nourl_events_count(self): """Just writes.""" graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") count_events = lib_kbase.write_graph_to_events(None, test_graph_input) self.assertEqual(count_events, 43) actual_events_count = lib_kbase.events_count() self.assertEqual(count_events, actual_events_count) graph_from_sql_alchemy = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear( graph_from_sql_alchemy) read_events_count = len(graph_from_sql_alchemy) self.assertEqual(read_events_count, actual_events_count)
def Main(): logging.getLogger().setLevel(logging.DEBUG) lib_common.set_events_credentials() time_start = time.time() http_content_length = int(os.environ['CONTENT_LENGTH']) # https://stackoverflow.com/questions/49171591/inets-httpd-cgi-script-how-do-you-retrieve-json-data # The script MUST NOT attempt to read more than CONTENT_LENGTH bytes, even if more data is available. logging.debug("http_content_length=%d time_start=%f", http_content_length, time_start) extra_error_status = "" try: rdflib_graph = _get_graph_from_stdin(http_content_length) time_loaded = time.time() triples_number = len(rdflib_graph) files_updates_total_number = lib_kbase.write_graph_to_events(None, rdflib_graph) time_stored = time.time() logging.debug("time_stored=%f files_updates_total_number=%d", time_stored, files_updates_total_number) server_result = { 'success': 'true', 'time_start': '%f' % time_start, 'time_loaded': '%f' % time_loaded, 'time_stored': '%f' % time_stored, 'triples_number': '%d' % triples_number} except Exception as exc: logging.error("Exception=%s", exc) server_result = { 'success': 'false', 'time_start': '%f' % time_start, 'error_message': '%s:%s:%s' % (str(exc), extra_error_status, traceback.format_exc())} json_output = json.dumps(server_result) sys.stdout.write('Content-Type: application/json\n') sys.stdout.write('Content-Length: %d\n\n' % len(json_output)) sys.stdout.write(json_output)
def test_pure_memory_read_by_entity(self): triples_count = 10 triples_graph = _create_dummy_graph(triples_count) returned_number = lib_kbase.write_graph_to_events(None, triples_graph) self.assertEqual(returned_number, triples_count) for triple_counter, one_triple in enumerate(triples_graph): actual_events_count = lib_kbase.events_count() self.assertEqual(actual_events_count, returned_number - triple_counter) test_graph_output = rdflib.Graph() entity_node = one_triple[0] lib_kbase.retrieve_events_to_graph(test_graph_output, entity_node) self.assertEqual(len(test_graph_output), 1) for returned_triple in test_graph_output: break self.assertEqual(returned_triple, one_triple) actual_events_count = lib_kbase.events_count() self.assertEqual(actual_events_count, 0)
def test_write_read_graph_to_events_with_other_nodes(self): triples_data_set = [ [("CIM_Process", { "Handle": 123 }), "ParentProcessId", 1], [ ("CIM_Directory", { "Name": "/tmp" }), # CIM_DirectoryContainsFile.GroupComponent or CIM_DirectoryContainsFile.PartComponent "CIM_DirectoryContainsFile", ("CIM_DataFile", { "Name": "/tmp/anyfile.tmp" }) ] ] updates_total_number = lib_event.store_events_as_json_triples_list( triples_data_set) test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_enumerate_CIM_Process.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/enumerate_CIM_Process.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 682) test_graph_output = rdflib.Graph() count_events_output = lib_kbase.read_events_to_graph( test_url, test_graph_output) self.assertEqual(count_events, count_events_output) new_graph = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(new_graph) print("files_updates_total_number=", updates_total_number) print("len(triples_list)=", len(new_graph)) self.assertEqual(updates_total_number, 2) self.assertEqual(len(new_graph), 2)
def test_sqlalchemy_url_events_count(self): graph_cleanup = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear(graph_cleanup) test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/Linux/tcp_sockets.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 43) actual_events_count = lib_kbase.events_count() self.assertEqual(count_events, actual_events_count) graph_from_sql_alchemy = rdflib.Graph() lib_kbase.retrieve_all_events_to_graph_then_clear( graph_from_sql_alchemy) read_events_count = len(graph_from_sql_alchemy) self.assertEqual(read_events_count, actual_events_count)
def test_sqlalchemy_write_url_read_all_thread(self): """Writes a RDF graph, then reads from a Python thread.""" test_graph_input = rdflib.Graph().parse( "tests/input_test_data/test_events_tcp_sockets.xml") # The URL is not important because it is not accessed. # However, it must be correctly handled by rdflib when it creates a UriRef test_url = "http://vps516494.ovh.net/Survol/survol/sources_types/Linux/tcp_sockets.py?xid=." count_events = lib_kbase.write_graph_to_events(test_url, test_graph_input) self.assertEqual(count_events, 43) actual_events_count = lib_kbase.events_count() self.assertEqual(count_events, actual_events_count) sys.stderr.write("WRITE lib_kbase.events_count()=%s\n" % lib_kbase.events_count()) # This shared queue is used by a thread reading the events, to send them back to this process. shared_queue = queue.Queue() created_thread = _create_thread(count_events, shared_queue) # Reads the triples sent by the thread. output_list = _read_triples_from_queue(count_events, shared_queue) # The thread is not needed any longer. created_thread.join() input_triples = _graph_to_triples_set(test_graph_input) output_triples = _graph_to_triples_set(output_list) # The two lists of triples must be identical: Comparison of the string representations. self.assertEqual(input_triples, output_triples)
def store_events_as_json_triples_list(json_data_list): """Triples stored in JSON format are used when creating triples in dockit. They are also used for testing.""" rdflib_graph = _json_triples_to_graph(json_data_list) return lib_kbase.write_graph_to_events(None, rdflib_graph)