def test_connect_queries_dotted(self): """Test queries provided at connect time.""" # Same test as above but with dotted container ut_provider = UTDataDriver() dotted_container_qs = _TEST_QUERIES.copy() for query in dotted_container_qs: query["query_container"] = "Saved.Searches" ut_provider.svc_queries = dotted_container_qs data_provider = QueryProvider(data_environment="LogAnalytics", driver=ut_provider) data_provider.connect("testuri") self.assertTrue(hasattr(data_provider, "Saved")) saved_searches = getattr(data_provider, "Saved") saved_searches = getattr(saved_searches, "Searches") for attr in dotted_container_qs: attr = attr["name"].split(".")[0] self.assertTrue(hasattr(saved_searches, attr)) self.assertTrue( isinstance(getattr(saved_searches, attr), (partial, QueryContainer))) q_store = data_provider.query_store q_src = q_store.get_query("Saved.Searches.test.query3") self.assertEqual(q_src.query, dotted_container_qs[2]["query"])
def test_connect_queries_dotted(self): """Test queries provided at connect time.""" queries = { "test_query1": "Select * from test", "test_query2": "Select * from test2", "test.query3": "Select * from test2", } # Same test as above but with dotted container ut_provider = UTDataDriver() ut_provider.svc_queries = (queries, "Saved.Searches") data_provider = QueryProvider( data_environment="LogAnalytics", driver=ut_provider ) data_provider.connect("testuri") self.assertTrue(hasattr(data_provider, "Saved")) saved_searches = getattr(data_provider, "Saved") saved_searches = getattr(saved_searches, "Searches") for attr in queries: attr = attr.split(".")[0] self.assertTrue(hasattr(saved_searches, attr)) self.assertTrue( isinstance(getattr(saved_searches, attr), (partial, QueryContainer)) ) q_store = data_provider._query_store q_src = q_store.get_query("Saved.Searches.test.query3") self.assertEqual(q_src.query, queries["test.query3"])
def test_create_provider(self): """Test method.""" qry_prov = QueryProvider("LocalData") qry_prov.connect() self.assertTrue(qry_prov.connected) queries = qry_prov.list_queries() self.assertGreaterEqual(len(queries), 8) self.assertIn("SecurityAlert.list_alerts", queries) self.assertIn("WindowsSecurity.list_host_events", queries) self.assertIn("Network.list_azure_network_flows_by_ip", queries)
def setUp(self): """Intialize test.""" provider = UTDataDriver() self.assertTrue(provider.loaded) provider.connect("testuri") self.assertTrue(provider.connected) self.provider = provider self.la_provider = QueryProvider(data_environment="LogAnalytics", driver=self.provider) self.query_sources = self.la_provider._query_store.data_families self.splunk_provider = QueryProvider(data_environment="Splunk", driver=self.provider) self.splunk_query_sources = self.splunk_provider._query_store.data_families
def test_additional_queries(self): """Test method.""" data_path = get_test_data_path() query_path = str(Path(get_test_data_path()) / "localdata") qry_prov = QueryProvider("LocalData", data_paths=[data_path], query_paths=[query_path]) queries = qry_prov.list_queries() self.assertGreaterEqual(len(queries), 11) for query in queries: qry_func = getattr(qry_prov, query) d_frame = qry_func() self.assertIsInstance(d_frame, pd.DataFrame) self.assertGreaterEqual(len(d_frame), 1)
def test_query_create_funcs(self): la_provider = QueryProvider( data_environment="LogAnalytics", driver=self.provider ) # graph_provider = QueryProvider(data_environment = 'SecurityGraph', # la_provider driver='dummy') all_queries = [ q for q in dir(la_provider.all_queries) if not q.startswith("__") ] winsec_queries = [ q for q in dir(la_provider.WindowsSecurity) if not q.startswith("__") ] alert_queries = [ q for q in dir(la_provider.SecurityAlert) if not q.startswith("__") ] self.assertGreaterEqual(len(all_queries), 14) self.assertGreaterEqual(len(winsec_queries), 9) self.assertGreaterEqual(len(alert_queries), 5) # Test that function attributes have been created properly for func_name, func in la_provider.all_queries: self.assertIsInstance(func, partial) self.assertTrue(len(func.__doc__)) self.assertIn("Parameters", func.__doc__)
def test_load_kql_query_defs(self): la_provider = QueryProvider( data_environment="LogAnalytics", driver=self.provider ) # graph_provider = QueryProvider(data_environment = 'SecurityGraph', # la_provider driver='dummy') # Did we read and process the query definitions OK q_sources = la_provider._query_store.data_families self.assertGreaterEqual(len(q_sources["WindowsSecurity"]), 9) self.assertGreaterEqual(len(q_sources["SecurityAlert"]), 5) self.assertGreaterEqual(len(q_sources["LinuxSyslog"]), 5) # pick one item and check properties get_alert_q = q_sources["SecurityAlert"]["get_alert"] self.assertEqual(len(get_alert_q.default_params), 7) self.assertEqual(len(get_alert_q.params), 8) self.assertEqual(len(get_alert_q.required_params), 1) self.assertEqual(len(get_alert_q.metadata), 6) self.assertIn("data_families", get_alert_q.metadata) self.assertIn("data_environments", get_alert_q.metadata) self.assertEqual(len(get_alert_q.data_families), 1) self.assertEqual(get_alert_q.name, "get_alert") self.assertIn("Retrieves", get_alert_q.description)
def test_queries(self): """Test method.""" data_path = Path(get_test_data_path()) / "localdata" qry_prov = QueryProvider("LocalData", data_paths=[str(data_path)]) queries = qry_prov.list_queries() for query in queries: qry_func = getattr(qry_prov, query) d_frame = qry_func() self.assertIsInstance(d_frame, pd.DataFrame) self.assertGreater(len(d_frame), 1) schema = qry_prov.schema for cols in schema.values(): self.assertIsInstance(cols, dict) self.assertGreater(len(cols), 10)
def setUp(self): """Test initialization.""" provider = UTDataDriver() self.assertTrue(provider.loaded) provider.connect("testuri") self.assertTrue(provider.connected) self.provider = provider self.la_provider = QueryProvider(data_environment="LogAnalytics", driver=self.provider)
def test_cust_formatters_splunk(): """Test SplunkDriver formatting.""" provider = UTDataDriver() provider.connect("testuri") la_provider = QueryProvider(data_environment="LogAnalytics", driver=provider) query_sources = la_provider.query_store.data_families splunk_provider = QueryProvider(data_environment="Splunk", driver=provider) splunk_query_sources = splunk_provider.query_store.data_families splunk_fmt = { "datetime": splunk_driver.SplunkDriver._format_datetime, "list": splunk_driver.SplunkDriver._format_list, } test_dt = datetime.utcnow() ip_address_list = "192.168.0.1, 192.168.0.2, 192.168.0.3" check_dt_str = test_dt.isoformat(sep=" ") # Using an Azure Sentinel query here since we want something # that requires a list parameter q_src = query_sources["Azure"]["list_azure_activity_for_ip"] query = q_src.create_query( formatters=splunk_fmt, start=test_dt, ip_address_list=ip_address_list ) check.is_in(check_dt_str, query) query = q_src.create_query(formatters=splunk_fmt, ip_address_list=ip_address_list) # Double-quote list elements check_list = ",".join([f'"{ip.strip()}"' for ip in ip_address_list.split(",")]) check.is_in(check_list, query) int_list = [1, 2, 3, 4] query = q_src.create_query(formatters=splunk_fmt, ip_address_list=int_list) # Always quoted strings check_list = ",".join([f'"{i}"' for i in int_list]) check.is_in(check_list, query) # Use a splunk query to verify timeformat parameter and datetime formatting q_src = splunk_query_sources["SplunkGeneral"]["get_events_parameterized"] query = q_src.create_query(formatters=splunk_fmt, start=test_dt, end=test_dt) check.is_in('timeformat="%Y-%m-%d %H:%M:%S.%6N"', query) check.is_in(f'earliest="{check_dt_str}"', query)
def test_split_ranges(self): """Test time range split logic.""" start = datetime.utcnow() - pd.Timedelta("5H") end = datetime.utcnow() + pd.Timedelta("5min") delta = pd.Timedelta("1H") ranges = QueryProvider._calc_split_ranges(start, end, delta) self.assertEqual(len(ranges), 5) self.assertEqual(ranges[0][0], start) self.assertEqual(ranges[-1][1], end) st_times = [start_tm[0] for start_tm in ranges] for end_time in (end_tm[1] for end_tm in ranges): self.assertNotIn(end_time, st_times) end = end + pd.Timedelta("20min") ranges = QueryProvider._calc_split_ranges(start, end, delta) self.assertEqual(len(ranges), 5) self.assertEqual(ranges[0][0], start) self.assertEqual(ranges[-1][1], end)
def setUp(self): """Test initialization.""" provider = UTDataDriver() self.assertTrue(provider.loaded) provider.connect("testuri") self.assertTrue(provider.connected) self.provider = provider with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.la_provider = QueryProvider(data_environment="LogAnalytics", driver=self.provider)
def test_graph_load_query_exec(self): provider = QueryProvider(data_environment="SecurityGraph", driver=self.provider) df = provider.all_queries.get_alert("help") self.assertIsNone(df) with self.assertRaises(ValueError) as cm: df = provider.all_queries.get_alert() self.assertIn("alert_id", str(cm.exception)) df = provider.all_queries.get_alert(alert_id="foo") self.assertEqual(len(df), 1) self.assertIn("/foo", df["query"].iloc[0])
def setUp(self): """Intialize test.""" provider = UTDataDriver() self.assertTrue(provider.loaded) provider.connect("testuri") self.assertTrue(provider.connected) self.provider = provider with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) self.la_provider = QueryProvider( data_environment="LogAnalytics", driver=self.provider ) self.query_sources = self.la_provider.query_store.data_families
def test_additional_queries(self): """Test method.""" data_path = get_test_data_path() query_path = str(Path(get_test_data_path()) / "localdata") qry_prov = QueryProvider("LocalData", data_paths=[data_path], query_paths=[query_path]) queries = qry_prov.list_queries() self.assertGreaterEqual(len(queries), 11) qry_params = { "start": -1, "end": 0, "ip_address_list": ["test"], "host_name": "test", "account_name": "test", } for query in queries: qry_func = getattr(qry_prov, query) d_frame = qry_func(**qry_params) self.assertIsInstance(d_frame, pd.DataFrame) self.assertGreaterEqual(len(d_frame), 1)
def test_connect_queries(self): """Test queries provided at connect time.""" ut_provider = UTDataDriver() ut_provider.svc_queries = _TEST_QUERIES data_provider = QueryProvider(data_environment="LogAnalytics", driver=ut_provider) data_provider.connect("testuri") # Check that we have expected attributes self.assertTrue(hasattr(data_provider, "SavedSearches")) saved_searches = getattr(data_provider, "SavedSearches") for attr in _TEST_QUERIES: attr = attr["name"].split(".")[0] self.assertTrue(hasattr(saved_searches, attr)) self.assertTrue( isinstance(getattr(saved_searches, attr), (partial, QueryContainer))) # Check that we have expected query text q_store = data_provider.query_store q_src = q_store.get_query("SavedSearches.test.query3") self.assertEqual(q_src.query, _TEST_QUERIES[2]["query"])
def test_graph_query_create_funcs(self): provider = QueryProvider(data_environment="SecurityGraph", driver=self.provider) all_queries = [q for q in dir(provider.all_queries) if not q.startswith("__")] alert_queries = [ q for q in dir(provider.SecurityGraphAlert) if not q.startswith("__") ] self.assertGreaterEqual(len(all_queries), 7) self.assertGreaterEqual(len(alert_queries), 7) # Test that function attributes have been created properly for func_name, func in provider.all_queries: self.assertIsInstance(func, partial) self.assertTrue(len(func.__doc__)) self.assertIn("Parameters", func.__doc__)
def test_load_query_exec(self): la_provider = QueryProvider( data_environment="LogAnalytics", driver=self.provider ) # graph_provider = QueryProvider(data_environment = 'SecurityGraph', # la_provider driver='dummy') df = la_provider.all_queries.get_alert("help") self.assertIsNone(df) with self.assertRaises(ValueError) as cm: df = la_provider.all_queries.get_alert() self.assertIn("system_alert_id", str(cm.exception)) df = la_provider.all_queries.get_alert(system_alert_id="foo") self.assertEqual(len(df), 1) self.assertIn('SystemAlertId == "foo"', df["query"].iloc[0])
def test_load_graph_query_defs(self): provider = QueryProvider(data_environment="SecurityGraph", driver=self.provider) # Did we read and process the query definitions OK q_sources = provider._query_store.data_families self.assertGreaterEqual(len(q_sources["SecurityGraphAlert"]), 7) # pick one item and check properties get_alert_q = q_sources["SecurityGraphAlert"]["get_alert"] self.assertEqual(len(get_alert_q.default_params), 6) self.assertEqual(len(get_alert_q.params), 7) self.assertEqual(len(get_alert_q.required_params), 1) self.assertEqual(len(get_alert_q.metadata), 6) self.assertIn("data_families", get_alert_q.metadata) self.assertIn("data_environments", get_alert_q.metadata) self.assertEqual(len(get_alert_q.data_families), 1) self.assertEqual(get_alert_q.name, "get_alert") self.assertIn("Retrieves", get_alert_q.description)
def test_split_queries(self): """Test queries split into time segments.""" la_provider = self.la_provider start = datetime.utcnow() - pd.Timedelta("5H") end = datetime.utcnow() + pd.Timedelta("5min") delta = pd.Timedelta("1H") ranges = QueryProvider._calc_split_ranges(start, end, delta) result_queries = la_provider.all_queries.list_alerts( "print", start=start, end=end, split_query_by="1H") queries = result_queries.split("\n\n") self.assertEqual(len(queries), 5) for idx, (st_time, e_time) in enumerate(ranges): self.assertIn(st_time.isoformat(sep="T") + "Z", queries[idx]) self.assertIn(e_time.isoformat(sep="T") + "Z", queries[idx]) self.assertIn(start.isoformat(sep="T") + "Z", queries[0]) self.assertIn(end.isoformat(sep="T") + "Z", queries[-1])
def test_load_yaml_def(self): la_provider = QueryProvider( data_environment="LogAnalytics", driver=self.provider ) with self.assertRaises((ImportError, ValueError)) as cm: file_path = Path(_TEST_DATA, "data_q_meta_fail.yaml") la_provider.import_query_file(query_file=file_path) self.assertIn("no data families defined", str(cm.exception)) with self.assertRaises((ImportError, ValueError)) as cm: file_path = Path(_TEST_DATA, "data_q_source_fail_param.yaml") la_provider.import_query_file(query_file=file_path) self.assertIn("Missing parameters are", str(cm.exception)) with self.assertRaises((ImportError, ValueError)) as cm: file_path = Path(_TEST_DATA, "data_q_source_fail_type.yaml") la_provider.import_query_file(query_file=file_path) self.assertIn("Parameters with missing types", str(cm.exception)) before_queries = len(list(la_provider.list_queries())) file_path = Path(_TEST_DATA, "data_q_success.yaml") la_provider.import_query_file(query_file=file_path) self.assertEqual(before_queries + 3, len(list(la_provider.list_queries())))
* pip install msticpy ### Import Libraries import os import pandas as pd from msticpy.nbtools.wsconfig import WorkspaceConfig from msticpy.data.data_providers import QueryProvider os.environ["KQLMAGIC_LOAD_MODE"]="silent" ### Define Connection String We are going to authenticate to our demo workspace with an AppKey. Therefore, there is no need for you to pass an azure account or authenticate with your credentials! This is a great demo environment to test your notebooks! connect_str = f"loganalytics://workspace='DEMO_WORKSPACE';appkey='DEMO_KEY';alias='myworkspace'" qry_prov = QueryProvider("LogAnalytics") qry_prov.connect(connect_str) ### Native Kqlmagic interface See https://github.com/Microsoft/jupyter-Kqlmagic %kql SecurityEvent | take 1 ### MSITCPy query interface alerts_df = qry_prov.exec_query(""" SecurityAlert | take 10 """) print(type(alerts_df)) alerts_df.head(5)