def testExportedRowsForValuesOfMultipleTypes(self): zip_fd, prefix = self.ProcessValuesToZip({ rdf_client_fs.StatEntry: [ rdf_client_fs.StatEntry(pathspec=rdf_paths.PathSpec( path="/foo/bar", pathtype="OS")) ], rdf_client.Process: [rdf_client.Process(pid=42)] }) with self.db_connection: stat_entry_script = zip_fd.read( "%s/ExportedFile_from_StatEntry.sql" % prefix) process_script = zip_fd.read( "%s/ExportedProcess_from_Process.sql" % prefix) self.db_cursor.executescript(stat_entry_script) self.db_cursor.executescript(process_script) self.db_cursor.execute( "SELECT \"metadata.client_urn\", \"metadata.source_urn\", urn " "FROM \"ExportedFile.from_StatEntry\";") stat_entry_results = self.db_cursor.fetchall() self.assertEqual(len(stat_entry_results), 1) # Client URN self.assertEqual(stat_entry_results[0][0], str(self.client_id)) # Source URN self.assertEqual(stat_entry_results[0][1], str(self.results_urn)) # URN self.assertEqual(stat_entry_results[0][2], self.client_id.Add("/fs/os/foo/bar")) self.db_cursor.execute( "SELECT \"metadata.client_urn\", \"metadata.source_urn\", pid " "FROM \"ExportedProcess.from_Process\";") process_results = self.db_cursor.fetchall() self.assertEqual(len(process_results), 1) # Client URN self.assertEqual(process_results[0][0], str(self.client_id)) # Source URN self.assertEqual(process_results[0][1], str(self.results_urn)) # PID self.assertEqual(process_results[0][2], 42)
def _StartFlow(self, client_id, flow_cls, **kw): flow_id = flow.StartFlow(flow_cls=flow_cls, client_id=client_id, **kw) # Lease the client message. data_store.REL_DB.LeaseClientActionRequests( client_id, lease_time=rdfvalue.Duration.From(10000, rdfvalue.SECONDS)) # Write some responses. In the relational db, the client queue will be # cleaned up as soon as all responses are available. Therefore we cheat # here and make it look like the request needs more responses so it's not # considered complete. # Write the status first. This will mark the request as waiting for 2 # responses. status = rdf_flow_objects.FlowStatus( client_id=client_id, flow_id=flow_id, request_id=1, response_id=2) data_store.REL_DB.WriteFlowResponses([status]) # Now we read the request, adjust the number, and write it back. reqs = data_store.REL_DB.ReadAllFlowRequestsAndResponses(client_id, flow_id) req = reqs[0][0] req.nr_responses_expected = 99 data_store.REL_DB.WriteFlowRequests([req]) # This response now won't trigger any deletion of client messages. response = rdf_flow_objects.FlowResponse( client_id=client_id, flow_id=flow_id, request_id=1, response_id=1, payload=rdf_client.Process(name="test_process")) data_store.REL_DB.WriteFlowResponses([response]) # This is not strictly needed as we don't display this information in the # UI. req.nr_responses_expected = 2 data_store.REL_DB.WriteFlowRequests([req]) return flow_id
def testFetchesAndStoresBinary(self): process = rdf_client.Process(pid=2, ppid=1, cmdline=["test_img.dd"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=1333718907167083) client_mock = action_mocks.ListProcessesMock([process]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, client_id=self.SetupClient(0), fetch_binaries=True, token=self.token) results = flow.GRRFlow.ResultCollectionForFID(session_id) binaries = list(results) self.assertEqual(len(binaries), 1) self.assertEqual(binaries[0].pathspec.path, process.exe) self.assertEqual(binaries[0].st_size, os.stat(process.exe).st_size)
def testEmailPluginStopsSendingEmailsAfterLimitIsReached(self): responses = [rdf_client.Process(pid=i) for i in range(11)] self.ProcessResponses( plugin_args=email_plugin.EmailOutputPluginArgs( email_address=self.email_address, emails_limit=10), responses=responses, process_responses_separately=True) self.assertEqual(len(self.email_messages), 10) for msg in self.email_messages: self.assertEqual(msg["address"], self.email_address) self.assertTrue( "got a new result in %s" % self.results_urn in msg["title"]) self.assertTrue(utils.SmartStr(self.client_id) in msg["message"]) self.assertTrue(utils.SmartStr(self.hostname) in msg["message"]) for msg in self.email_messages[:10]: self.assertFalse("sending of emails will be disabled now" in msg) self.assertTrue("sending of emails will be disabled now" in self.email_messages[9]["message"])
def testFetchesAndStoresBinary(self): client_id = self.SetupClient(0) process = rdf_client.Process( pid=2, ppid=1, cmdline=["test_img.dd"], exe=os.path.join(self.base_path, "test_img.dd"), ctime=1333718907167083) client_mock = action_mocks.ListProcessesMock([process]) session_id = flow_test_lib.TestFlowHelper( flow_processes.ListProcesses.__name__, client_mock, client_id=client_id, fetch_binaries=True, creator=self.test_username) binaries = flow_test_lib.GetFlowResults(client_id, session_id) self.assertLen(binaries, 1) self.assertEqual(binaries[0].pathspec.path, process.exe) self.assertEqual(binaries[0].st_size, os.stat(process.exe).st_size)
def testProcessListingOnly(self): """Test that the ListProcesses flow works.""" client_id = self.SetupClient(0) client_mock = action_mocks.ListProcessesMock([ rdf_client.Process(pid=2, ppid=1, cmdline=["cmd.exe"], exe="c:\\windows\\cmd.exe", ctime=1333718907167083) ]) session_id = flow_test_lib.TestFlowHelper(compatibility.GetName( flow_processes.ListProcesses), client_mock, client_id=client_id, creator=self.test_username) processes = flow_test_lib.GetFlowResults(client_id, session_id) self.assertLen(processes, 1) self.assertEqual(processes[0].ctime, 1333718907167083) self.assertEqual(processes[0].cmdline, ["cmd.exe"])
def testReadsConfigurationValuesCorrectly(self): with test_lib.ConfigOverrider({ 'Splunk.url': 'http://a', 'Splunk.token': 'b', 'Splunk.verify_https': False, 'Splunk.source': 'c', 'Splunk.sourcetype': 'd', 'Splunk.index': 'e' }): mock_post = self._CallPlugin( plugin_args=splunk_plugin.SplunkOutputPluginArgs(), responses=[rdf_client.Process(pid=42)]) self.assertEqual(mock_post.call_args[KWARGS]['url'], 'http://a/services/collector/event') self.assertFalse(mock_post.call_args[KWARGS]['verify']) self.assertEqual(mock_post.call_args[KWARGS]['headers']['Authorization'], 'Splunk b') events = self._ParseEvents(mock_post) self.assertEqual(events[0]['source'], 'c') self.assertEqual(events[0]['sourcetype'], 'd') self.assertEqual(events[0]['index'], 'e')
def testCSVPluginWithValuesOfMultipleTypes(self): zip_fd, prefix = self.ProcessValuesToZip({ rdf_client_fs.StatEntry: [ rdf_client_fs.StatEntry(pathspec=rdf_paths.PathSpec( path="/foo/bar", pathtype="OS")) ], rdf_client.Process: [rdf_client.Process(pid=42)] }) self.assertEqual( set(zip_fd.namelist()), set([ "%s/MANIFEST" % prefix, "%s/ExportedFile/from_StatEntry.csv" % prefix, "%s/ExportedProcess/from_Process.csv" % prefix ])) parsed_manifest = yaml.load(zip_fd.read("%s/MANIFEST" % prefix)) self.assertEqual( parsed_manifest, { "export_stats": { "StatEntry": { "ExportedFile": 1 }, "Process": { "ExportedProcess": 1 } } }) with zip_fd.open("%s/ExportedFile/from_StatEntry.csv" % prefix) as filedesc: content = filedesc.read().decode("utf-8") parsed_output = list(csv.DictReader(io.StringIO(content))) self.assertLen(parsed_output, 1) # Make sure metadata is filled in. self.assertEqual(parsed_output[0]["metadata.client_urn"], "aff4:/%s" % self.client_id) self.assertEqual(parsed_output[0]["metadata.hostname"], "Host-0.example.com") self.assertEqual(parsed_output[0]["metadata.mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(parsed_output[0]["metadata.source_urn"], self.results_urn) self.assertEqual(parsed_output[0]["urn"], "aff4:/%s/fs/os/foo/bar" % self.client_id) filepath = "%s/ExportedProcess/from_Process.csv" % prefix with zip_fd.open(filepath) as filedesc: content = filedesc.read().decode("utf-8") parsed_output = list(csv.DictReader(io.StringIO(content))) self.assertLen(parsed_output, 1) self.assertEqual(parsed_output[0]["metadata.client_urn"], "aff4:/%s" % self.client_id) self.assertEqual(parsed_output[0]["metadata.hostname"], "Host-0.example.com") self.assertEqual(parsed_output[0]["metadata.mac_address"], "aabbccddee00\nbbccddeeff00") self.assertEqual(parsed_output[0]["metadata.source_urn"], self.results_urn) self.assertEqual(parsed_output[0]["pid"], "42")
def Parse(self, cmd, args, stdout, stderr, return_val, time_taken, knowledge_base): """Parse the ps output. Note that cmdline consumes every field up to the end of line and as it is string, we can't perfectly see what the arguments on the command line really were. We just assume a space is the arg seperator. It's imperfect, but it's better than nothing. Obviously, if cmd/cmdline is specified, it must be the last column of output. Args: cmd: A string containing the base command that was run. args: A list of strings containing the commandline args for the command. stdout: A string containing the stdout of the command run. stderr: A string containing the stderr of the command run. (Unused) return_val: The return code following command execution. time_taken: The time taken to run the process. (Unused) knowledge_base: An RDF KnowledgeBase. (Unused) Yields: RDF Process objects. """ _ = stderr, time_taken, knowledge_base # Unused. self.CheckReturn(cmd, return_val) if not stdout: # We have nothing to process so bug out. (Handles a input of None.) return rdf_convert_table = { "pid": ("pid", int), "tgid": ("pid", int), "ppid": ("ppid", int), "comm": ("name", str), "ucomm": ("name", str), "ruid": ("real_uid", int), "uid": ("effective_uid", int), "euid": ("effective_uid", int), "suid": ("saved_uid", int), "svuid": ("saved_uid", int), "user": ("username", str), "euser": ("username", str), "uname": ("username", str), "rgid": ("real_gid", int), "gid": ("effective_gid", int), "egid": ("effective_gid", int), "sgid": ("saved_gid", int), "svgid": ("saved_gid", int), "tty": ("terminal", str), "tt": ("terminal", str), "tname": ("terminal", str), "stat": ("status", str), "nice": ("nice", int), "ni": ("nice", int), "thcount": ("num_threads", int), "nlwp": ("num_threads", int), "pcpu": ("cpu_percent", float), "%cpu": ("cpu_percent", float), "c": ("cpu_percent", float), "rss": ("RSS_size", long), "rssize": ("RSS_size", long), "rsz": ("RSS_size", long), "vsz": ("VMS_size", long), "vsize": ("VMS_size", long), "pmem": ("memory_percent", float), "%mem": ("memory_percent", float), "args": ("cmdline", self._SplitCmd), "command": ("cmdline", self._SplitCmd), "cmd": ("cmdline", self._SplitCmd) } expected_fields = self._FindPsOutputFormat(cmd, args) # If we made it here, we expect we can now parse the output and we know # expected its format. lines = stdout.splitlines() if self._HasHeaders(args): # Ignore the headers. lines = lines[1:] for line in lines: try: # The "len() - 1" allows us to group any extra fields into # the last field. e.g. cmdline. entries = line.split(None, len(expected_fields) - 1) # Create an empty process for us to fill in as best we can. process = rdf_client.Process() for name, value in zip(expected_fields, entries): if name not in rdf_convert_table: # If the field is not something we can process, skip it. continue rdf_name, method = rdf_convert_table[name] setattr(process, rdf_name, method(value)) # Approximate the 'comm' from the cmdline if it wasn't detailed. # i.e. the basename of the first arg of the commandline. if not process.name and process.cmdline: process.name = os.path.basename(process.cmdline[0]) yield process except ValueError: logging.warn("Unparsable line found for %s %s:\n" " %s", cmd, args, line)
def testBigQueryPluginFallbackToAFF4(self): plugin_args = bigquery_plugin.BigQueryOutputPluginArgs() responses = [ rdf_client.StatEntry( pathspec=rdf_paths.PathSpec(path="/中国新闻网新闻中", pathtype="OS")), rdf_client.Process(pid=42), rdf_client.Process(pid=43), rdf_client.SoftwarePackage(name="test.deb") ] plugin = bigquery_plugin.BigQueryOutputPlugin( source_urn=self.results_urn, output_base_urn=self.base_urn, args=plugin_args, token=self.token) plugin.InitializeState() messages = [] for response in responses: messages.append( rdf_flows.GrrMessage(source=self.client_id, payload=response)) with test_lib.FakeTime(1445995873): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider({"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We have 3 output types but a limit of 2 upload failures, so we # shouldn't try the third one. self.assertEqual(mock_bigquery.return_value.InsertData.call_count, 2) # We should have written a data file and a schema file for each type. for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: schema_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.schema" % output_name), token=self.token) data_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) if output_name == "ExportedFile": self.CompareSchemaToKnownGood(json.load(schema_fd)) self.assertEqual( json.load(actual_fd)["urn"], self.client_id.Add("/fs/os/中国新闻网新闻中")) elif output_name == "ExportedProcess": self.assertEqual(json.load(schema_fd)[1]["name"], "pid") expected_pids = ["42", "43"] for i, line in enumerate(actual_fd): self.assertEqual(json.loads(line)["pid"], expected_pids[i]) else: self.assertEqual(json.load(schema_fd)[1]["name"], "name") self.assertEqual(json.load(actual_fd)["name"], "test.deb") # Process the same messages to make sure we're re-using the filehandles. with test_lib.FakeTime(1445995878): with mock.patch.object(bigquery, "GetBigQueryClient") as mock_bigquery: mock_bigquery.return_value.configure_mock(**{ "InsertData.side_effect": bigquery.BigQueryJobUploadError() }) with test_lib.ConfigOverrider({"BigQuery.max_upload_failures": 2}): for message in messages: plugin.ProcessResponses([message]) plugin.Flush() # We shouldn't call insertdata at all because we have passed max # failures already self.assertEqual(mock_bigquery.return_value.InsertData.call_count, 0) expected_line_counts = { "ExportedFile": 2, "ExportedProcess": 4, "AutoExportedSoftwarePackage": 2 } for output_name in [ "ExportedFile", "ExportedProcess", "AutoExportedSoftwarePackage" ]: data_fd = aff4.FACTORY.Open( self.base_urn.Add( "C-1000000000000000_Results_%s_1445995873.data" % output_name), token=self.token) actual_fd = gzip.GzipFile(None, "r", 9, data_fd) self.assertEqual( sum(1 for line in actual_fd), expected_line_counts[output_name])
def RunOnClients(client_ids, num_processes): client_mock = action_mocks.ListProcessesMock( [rdf_client.Process(pid=1, exe="a.exe")] * num_processes) self.AssignTasksToClients(client_ids) hunt_test_lib.TestHuntHelper( client_mock, client_ids, check_flow_errors=False, token=self.token)