#!/usr/bin/env python """A keyword index of client machines. An index of client machines, associating likely identifiers to client IDs. """ from grr.lib import aff4 from grr.lib import keyword_index from grr.lib import rdfvalue from grr.lib import utils from grr.lib.rdfvalues import client as rdf_client # The system's primary client index. MAIN_INDEX = rdfvalue.RDFURN("aff4:/client_index") class ClientIndex(keyword_index.AFF4KeywordIndex): """An index of client machines. """ START_TIME_PREFIX = "start_date:" START_TIME_PREFIX_LEN = len(START_TIME_PREFIX) END_TIME_PREFIX = "end_date:" END_TIME_PREFIX_LEN = len(END_TIME_PREFIX) # We accept and return client URNs, but store client ids, # e.g. "C.00aaeccbb45f33a3". def _ClientIdFromURN(self, urn): return urn.Basename()
class HashFileStore(FileStore): """FileStore that stores files referenced by hash.""" PATH = rdfvalue.RDFURN("aff4:/files/hash") PRIORITY = 2 EXTERNAL = False HASH_TYPES = {"generic": ["md5", "sha1", "sha256", "SignedData"], "pecoff": ["md5", "sha1"]} FILE_HASH_TYPE = FileStoreHash def CheckHashes(self, hashes): """Check hashes against the filestore. Blobs use the hash in the schema: aff4:/files/hash/generic/sha256/[sha256hash] Args: hashes: A list of Hash objects to check. Yields: Tuples of (RDFURN, hash object) that exist in the store. """ hash_map = {} for hsh in hashes: if hsh.HasField("sha256"): # The canonical name of the file is where we store the file hash. hash_map[aff4.ROOT_URN.Add("files/hash/generic/sha256").Add( str(hsh.sha256))] = hsh for metadata in aff4.FACTORY.Stat(list(hash_map), token=self.token): yield metadata["urn"], hash_map[metadata["urn"]] def _GetHashers(self, hash_types): return [getattr(hashlib, hash_type) for hash_type in hash_types if hasattr(hashlib, hash_type)] def _HashFile(self, fd): """Look for the required hashes in the file.""" hashes = fd.Get(fd.Schema.HASH) if hashes: found_all = True for fingerprint_type, hash_types in self.HASH_TYPES.iteritems(): for hash_type in hash_types: if fingerprint_type == "pecoff": hash_type = "pecoff_%s" % hash_type if not hashes.HasField(hash_type): found_all = False break if not found_all: break if found_all: return hashes fingerprinter = fingerprint.Fingerprinter(fd) if "generic" in self.HASH_TYPES: hashers = self._GetHashers(self.HASH_TYPES["generic"]) fingerprinter.EvalGeneric(hashers=hashers) if "pecoff" in self.HASH_TYPES: hashers = self._GetHashers(self.HASH_TYPES["pecoff"]) if hashers: fingerprinter.EvalPecoff(hashers=hashers) if not hashes: hashes = fd.Schema.HASH() for result in fingerprinter.HashIt(): fingerprint_type = result["name"] for hash_type in self.HASH_TYPES[fingerprint_type]: if hash_type not in result: continue if hash_type == "SignedData": # There can be several certs in the same file. for signed_data in result[hash_type]: hashes.signed_data.Append(revision=signed_data[0], cert_type=signed_data[1], certificate=signed_data[2]) continue # Set the hashes in the original object if fingerprint_type == "generic": hashes.Set(hash_type, result[hash_type]) elif fingerprint_type == "pecoff": hashes.Set("pecoff_%s" % hash_type, result[hash_type]) else: logging.error("Unknown fingerprint_type %s.", fingerprint_type) try: fd.Set(hashes) except IOError: pass return hashes def AddFile(self, fd, sync=False): """Hash the content of an AFF4Stream and create FileStoreImage objects. We take a file in the client space: aff4:/C.123123123/fs/os/usr/local/blah Hash it, update the hash in the original file if its different to the one calculated on the client, and create FileStoreImages at the following URNs (if they don't already exist): aff4:/files/hash/generic/sha256/123123123 (canonical reference) aff4:/files/hash/generic/sha1/345345345 aff4:/files/hash/generic/md5/456456456 aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs) aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs) When present in PE files, the signing data (revision, cert_type, certificate) is added to the original client-space blobimage. This can't be done simply in the FileStore.Write() method with fixed hash buffer sizes because the authenticode hashes need to track hashing of different-sized regions based on the signature information. Args: fd: File open for reading. sync: Should the file be synced immediately. Raises: IOError: If there was an error writing the file. """ file_store_files = [] hashes = self._HashFile(fd) # The empty file is very common, we don't keep the back references for it # in the DB since it just takes up too much space. empty_hash = ("e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855") if hashes.sha256 == empty_hash: return for hash_type, hash_digest in hashes.ListSetFields(): # Determine fingerprint type. hash_digest = str(hash_digest) hash_type = hash_type.name fingerprint_type = "generic" if hash_type.startswith("pecoff_"): fingerprint_type = "pecoff" hash_type = hash_type[len("pecoff_"):] if hash_type not in self.HASH_TYPES[fingerprint_type]: continue # These files are all created through async write so they should be # fast. file_store_urn = self.PATH.Add(fingerprint_type).Add( hash_type).Add(hash_digest) file_store_fd = aff4.FACTORY.Create(file_store_urn, FileStoreImage, mode="w", token=self.token) file_store_fd.FromBlobImage(fd) file_store_fd.AddIndex(fd.urn) file_store_files.append(file_store_fd) # Write the hashes attribute to all the created files.. for file_store_fd in file_store_files: file_store_fd.Set(hashes) file_store_fd.Close(sync=sync) # We do not want to be externally written here. return None def FindFile(self, fd): """Find an AFF4Stream in the file store. We take a file in the client space: aff4:/C.123123123/fs/os/usr/local/blah Hash it and then find the matching RDFURN's: aff4:/files/hash/generic/sha256/123123123 (canonical reference) aff4:/files/hash/generic/sha1/345345345 aff4:/files/hash/generic/md5/456456456 aff4:/files/hash/pecoff/md5/aaaaaaaa (only for PEs) aff4:/files/hash/pecoff/sha1/bbbbbbbb (only for PEs) Args: fd: File open for reading. Returns: A list of RDFURN's corresponding to the input file. """ hashes = self._HashFile(fd) urns_to_check = [] for hash_type, hash_digest in hashes.ListSetFields(): hash_digest = str(hash_digest) hash_type = hash_type.name fingerprint_type = "generic" if hash_type.startswith("pecoff_"): fingerprint_type = "pecoff" hash_type = hash_type[len("pecoff_"):] if hash_type not in self.HASH_TYPES[fingerprint_type]: continue file_store_urn = self.PATH.Add(fingerprint_type).Add( hash_type).Add(hash_digest) urns_to_check.append(file_store_urn) return [data["urn"] for data in aff4.FACTORY.Stat(urns_to_check, token=self.token)] @staticmethod def ListHashes(token=None, age=aff4.NEWEST_TIME): """Yields all the hashes in the file store. Args: token: Security token, instance of ACLToken. age: AFF4 age specification. Only get hits corresponding to the given age spec. Should be aff4.NEWEST_TIME or a time range given as a tuple (start, end) in microseconds since Jan 1st, 1970. If just a microseconds value is given it's treated as the higher end of the range, i.e. (0, age). See aff4.FACTORY.ParseAgeSpecification for details. Yields: FileStoreHash instances corresponding to all the hashes in the file store. Raises: ValueError: if age was set to aff4.ALL_TIMES. """ if age == aff4.ALL_TIMES: raise ValueError("age==aff4.ALL_TIMES is not allowed.") urns = [] for fingerprint_type, hash_types in HashFileStore.HASH_TYPES.iteritems(): for hash_type in hash_types: urns.append(HashFileStore.PATH.Add(fingerprint_type).Add(hash_type)) for _, values in aff4.FACTORY.MultiListChildren(urns, token=token, age=age): for value in values: yield FileStoreHash(value) @classmethod def GetClientsForHash(cls, hash_obj, token=None, age=aff4.NEWEST_TIME): """Yields client_files for the specified file store hash. Args: hash_obj: RDFURN that we want to get hits for. token: Security token. age: AFF4 age specification. Only get hits corresponding to the given age spec. Should be aff4.NEWEST_TIME or a time range given as a tuple (start, end) in microseconds since Jan 1st, 1970. If just a microseconds value is given it's treated as the higher end of the range, i.e. (0, age). See aff4.FACTORY.ParseAgeSpecification for details. Yields: RDFURNs corresponding to a client file that has the hash. Raises: ValueError: if age was set to aff4.ALL_TIMES. """ if age == aff4.ALL_TIMES: raise ValueError("age==aff4.ALL_TIMES is not supported.") results = cls.GetClientsForHashes([hash_obj], token=token, age=age) for _, client_files in results: for client_file in client_files: yield client_file @classmethod def GetClientsForHashes(cls, hashes, token=None, age=aff4.NEWEST_TIME): """Yields (hash, client_files) pairs for all the specified hashes. Args: hashes: List of RDFURN's. token: Security token. age: AFF4 age specification. Only get hits corresponding to the given age spec. Should be aff4.NEWEST_TIME or a time range given as a tuple (start, end) in microseconds since Jan 1st, 1970. If just a microseconds value is given it's treated as the higher end of the range, i.e. (0, age). See aff4.FACTORY.ParseAgeSpecification for details. Yields: (hash, client_files) tuples, where hash is a FILE_HASH_TYPE instance and client_files is a list of RDFURN's corresponding to client files that have the hash. Raises: ValueError: if age was set to aff4.ALL_TIMES. """ if age == aff4.ALL_TIMES: raise ValueError("age==aff4.ALL_TIMES is not supported.") timestamp = aff4.FACTORY.ParseAgeSpecification(age) for hash_obj, client_files in data_store.DB.MultiResolvePrefix( hashes, "index:target:", token=token, timestamp=timestamp): yield (cls.FILE_HASH_TYPE(hash_obj), [file_urn for _, file_urn, _ in client_files])
def ProcessOneHunt(self, exceptions_by_hunt): """Reads results for one hunt and process them.""" hunt_results_urn, results = ( hunts_results.HuntResultQueue.ClaimNotificationsForCollection( start_time=self.args.start_processing_time, token=self.token, lease_time=self.lifetime)) logging.debug("Found %d results for hunt %s", len(results), hunt_results_urn) if not results: return 0 hunt_urn = rdfvalue.RDFURN(hunt_results_urn.Dirname()) batch_size = self.args.batch_size or self.DEFAULT_BATCH_SIZE metadata_urn = hunt_urn.Add("ResultsMetadata") exceptions_by_plugin = {} num_processed_for_hunt = 0 collection_obj = implementation.GRRHunt.ResultCollectionForHID( hunt_urn) try: with aff4.FACTORY.OpenWithLock(metadata_urn, lease_time=600, token=self.token) as metadata_obj: all_plugins, used_plugins = self.LoadPlugins(metadata_obj) num_processed = int( metadata_obj.Get( metadata_obj.Schema.NUM_PROCESSED_RESULTS)) for batch in utils.Grouper(results, batch_size): results = list( collection_obj.MultiResolve( [r.value.ResultRecord() for r in batch])) self.RunPlugins(hunt_urn, used_plugins, results, exceptions_by_plugin) hunts_results.HuntResultQueue.DeleteNotifications( batch, token=self.token) num_processed += len(batch) num_processed_for_hunt += len(batch) self.HeartBeat() metadata_obj.Set( metadata_obj.Schema.NUM_PROCESSED_RESULTS( num_processed)) metadata_obj.UpdateLease(600) if self.CheckIfRunningTooLong(): logging.warning("Run too long, stopping.") break metadata_obj.Set( metadata_obj.Schema.OUTPUT_PLUGINS(all_plugins)) metadata_obj.Set( metadata_obj.Schema.NUM_PROCESSED_RESULTS(num_processed)) except aff4.LockError: logging.warn( "ProcessHuntResultCollectionsCronFlow: " "Could not get lock on hunt metadata %s.", metadata_urn) return 0 if exceptions_by_plugin: for plugin, exceptions in exceptions_by_plugin.items(): exceptions_by_hunt.setdefault(hunt_urn, {}).setdefault( plugin, []).extend(exceptions) logging.debug("Processed %d results.", num_processed_for_hunt) return len(results)
def setUp(self): super(BasicContextTests, self).setUp() self.context = self.to_test_context() self.context.LoadCertificates() self.session_id = rdfvalue.RDFURN("W:1234")
def __init__(self, hash_response, is_known=False): self.hash_response = hash_response self.is_known = is_known self.blob_urn = rdfvalue.RDFURN("aff4:/blobs").Add( hash_response.data.encode("hex"))
class VFSGRRClient(standard.VFSDirectory): """A Remote client.""" # URN of the index for client labels. labels_index_urn = rdfvalue.RDFURN("aff4:/index/labels/clients") class SchemaCls(standard.VFSDirectory.SchemaCls): """The schema for the client.""" client_index = rdfvalue.RDFURN("aff4:/index/client") CERT = aff4.Attribute("metadata:cert", rdf_crypto.RDFX509Cert, "The PEM encoded cert of the client.") FILESYSTEM = aff4.Attribute("aff4:filesystem", rdf_client.Filesystems, "Filesystems on the client.") CLIENT_INFO = aff4.Attribute("metadata:ClientInfo", rdf_client.ClientInformation, "GRR client information", "GRR client", default="") LAST_BOOT_TIME = aff4.Attribute("metadata:LastBootTime", rdfvalue.RDFDatetime, "When the machine was last booted", "BootTime") FIRST_SEEN = aff4.Attribute( "metadata:FirstSeen", rdfvalue.RDFDatetime, "First time the client registered with us", "FirstSeen") # Information about the host. HOSTNAME = aff4.Attribute("metadata:hostname", rdfvalue.RDFString, "Hostname of the host.", "Host", index=client_index) FQDN = aff4.Attribute("metadata:fqdn", rdfvalue.RDFString, "Fully qualified hostname of the host.", "FQDN", index=client_index) SYSTEM = aff4.Attribute("metadata:system", rdfvalue.RDFString, "Operating System class.", "System") UNAME = aff4.Attribute("metadata:uname", rdfvalue.RDFString, "Uname string.", "Uname") OS_RELEASE = aff4.Attribute("metadata:os_release", rdfvalue.RDFString, "OS Major release number.", "Release") OS_VERSION = aff4.Attribute("metadata:os_version", rdf_client.VersionString, "OS Version number.", "Version") # ARCH values come from platform.uname machine value, e.g. x86_64, AMD64. ARCH = aff4.Attribute("metadata:architecture", rdfvalue.RDFString, "Architecture.", "Architecture") INSTALL_DATE = aff4.Attribute("metadata:install_date", rdfvalue.RDFDatetime, "Install Date.", "Install") # The knowledge base is used for storing data about the host and users. # This is currently a slightly odd object as we only use some of the fields. # The proto itself is used in Artifact handling outside of GRR (e.g. Plaso). # Over time we will migrate fields into this proto, but for now it is a mix. KNOWLEDGE_BASE = aff4.Attribute("metadata:knowledge_base", rdf_client.KnowledgeBase, "Artifact Knowledge Base", "KnowledgeBase") GRR_CONFIGURATION = aff4.Attribute( "aff4:client_configuration", rdf_protodict.Dict, "Running configuration for the GRR client.", "Config") LIBRARY_VERSIONS = aff4.Attribute( "aff4:library_versions", rdf_protodict.Dict, "Running library versions for the client.", "Libraries") USERNAMES = aff4.Attribute("aff4:user_names", SpaceSeparatedStringArray, "A space separated list of system users.", "Usernames", index=client_index) # This information is duplicated from the INTERFACES attribute but is done # to allow for fast searching by mac address. MAC_ADDRESS = aff4.Attribute("aff4:mac_addresses", rdfvalue.RDFString, "A hex encoded MAC address.", "MAC", index=client_index) KERNEL = aff4.Attribute("aff4:kernel_version", rdfvalue.RDFString, "Kernel version string.", "KernelVersion") # Same for IP addresses. HOST_IPS = aff4.Attribute("aff4:host_ips", rdfvalue.RDFString, "An IP address.", "Host_ip", index=client_index) PING = aff4.Attribute( "metadata:ping", rdfvalue.RDFDatetime, "The last time the server heard from this client.", "LastCheckin", versioned=False, default=0) CLOCK = aff4.Attribute("metadata:clock", rdfvalue.RDFDatetime, "The last clock read on the client " "(Can be used to estimate client clock skew).", "Clock", versioned=False) CLIENT_IP = aff4.Attribute( "metadata:client_ip", rdfvalue.RDFString, "The ip address this client connected from.", "Client_ip", versioned=False) # This is the last foreman rule that applied to us LAST_FOREMAN_TIME = aff4.Attribute( "aff4:last_foreman_time", rdfvalue.RDFDatetime, "The last time the foreman checked us.", versioned=False) LAST_INTERFACES = aff4.Attribute( "aff4:last_interfaces", rdf_client.Interfaces, "Last seen network interfaces. Full history is maintained in the " "clientid/network object. Separated for performance reasons.", versioned=False) LAST_CRASH = aff4.Attribute("aff4:last_crash", rdf_client.ClientCrash, "Last client crash.", creates_new_object_version=False, versioned=False) VOLUMES = aff4.Attribute("aff4:volumes", rdf_client.Volumes, "Client disk volumes.") HARDWARE_INFO = aff4.Attribute("aff4:hardware_info", rdf_client.HardwareInfo, "Various hardware information.", default="") MEMORY_SIZE = aff4.Attribute( "aff4:memory_size", rdfvalue.ByteSize, "Amount of memory this client's machine has.") # Valid client ids CLIENT_ID_RE = re.compile(r"^C\.[0-9a-fA-F]{16}$") @property def age(self): """RDFDatetime at which the object was created.""" # TODO(user) move up to AFF4Object after some analysis of how .age is # used in the codebase. aff4_type = self.Get(self.Schema.TYPE) if aff4_type: return aff4_type.age else: # If there is no type attribute yet, we have only just been created and # not flushed yet, so just set timestamp to now. return rdfvalue.RDFDatetime().Now() def Initialize(self): # Our URN must be a valid client.id. self.client_id = rdf_client.ClientURN(self.urn) def Update(self, attribute=None, priority=None): if attribute == "CONTAINS": flow_id = flow.GRRFlow.StartFlow(client_id=self.client_id, flow_name="Interrogate", token=self.token, priority=priority) return flow_id def OpenMember(self, path, mode="rw"): return aff4.AFF4Volume.OpenMember(self, path, mode=mode) AFF4_PREFIXES = { rdf_paths.PathSpec.PathType.OS: "/fs/os", rdf_paths.PathSpec.PathType.TSK: "/fs/tsk", rdf_paths.PathSpec.PathType.REGISTRY: "/registry", rdf_paths.PathSpec.PathType.MEMORY: "/devices/memory", rdf_paths.PathSpec.PathType.TMPFILE: "/temp" } @staticmethod def ClientURNFromURN(urn): return rdf_client.ClientURN(rdfvalue.RDFURN(urn).Split()[0]) @staticmethod def PathspecToURN(pathspec, client_urn): """Returns a mapping between a pathspec and an AFF4 URN. Args: pathspec: The PathSpec instance to convert. client_urn: A URN of any object within the client. We use it to find the client id. Returns: A urn that corresponds to this pathspec. Raises: ValueError: If pathspec is not of the correct type. """ client_urn = rdf_client.ClientURN(client_urn) if not isinstance(pathspec, rdfvalue.RDFValue): raise ValueError("Pathspec should be an rdfvalue.") # If the first level is OS and the second level is TSK its probably a mount # point resolution. We map it into the tsk branch. For example if we get: # path: \\\\.\\Volume{1234}\\ # pathtype: OS # mount_point: /c:/ # nested_path { # path: /windows/ # pathtype: TSK # } # We map this to aff4://client_id/fs/tsk/\\\\.\\Volume{1234}\\/windows/ dev = pathspec[0].path if pathspec[0].HasField("offset"): # We divide here just to get prettier numbers in the GUI dev += ":" + str(pathspec[0].offset / 512) if (len(pathspec) > 1 and pathspec[0].pathtype == rdf_paths.PathSpec.PathType.OS and pathspec[1].pathtype == rdf_paths.PathSpec.PathType.TSK): result = [ VFSGRRClient.AFF4_PREFIXES[rdf_paths.PathSpec.PathType.TSK], dev ] # Skip the top level pathspec. pathspec = pathspec[1] else: # For now just map the top level prefix based on the first pathtype result = [VFSGRRClient.AFF4_PREFIXES[pathspec[0].pathtype]] for p in pathspec: component = p.path # The following encode different pathspec properties into the AFF4 path in # such a way that unique files on the client are mapped to unique URNs in # the AFF4 space. Note that this transformation does not need to be # reversible since we always use the PathSpec when accessing files on the # client. if p.HasField("offset"): component += ":" + str(p.offset / 512) # Support ADS names. if p.HasField("stream_name"): component += ":" + p.stream_name result.append(component) return client_urn.Add("/".join(result)) def GetSummary(self): """Gets a client summary object. Returns: rdf_client.ClientSummary """ self.max_age = 0 summary = rdf_client.ClientSummary(client_id=self.urn) summary.system_info.node = self.Get(self.Schema.HOSTNAME) summary.system_info.system = self.Get(self.Schema.SYSTEM) summary.system_info.release = self.Get(self.Schema.OS_RELEASE) summary.system_info.version = str(self.Get(self.Schema.OS_VERSION, "")) summary.system_info.kernel = self.Get(self.Schema.KERNEL) summary.system_info.fqdn = self.Get(self.Schema.FQDN) summary.system_info.machine = self.Get(self.Schema.ARCH) summary.system_info.install_date = self.Get(self.Schema.INSTALL_DATE) kb = self.Get(self.Schema.KNOWLEDGE_BASE) if kb: summary.users = kb.users summary.interfaces = self.Get(self.Schema.LAST_INTERFACES) summary.client_info = self.Get(self.Schema.CLIENT_INFO) summary.serial_number = self.Get( self.Schema.HARDWARE_INFO).serial_number summary.timestamp = self.age summary.system_manufacturer = self.Get( self.Schema.HARDWARE_INFO).system_manufacturer return summary def AddLabels(self, *label_names, **kwargs): super(VFSGRRClient, self).AddLabels(*label_names, **kwargs) with aff4.FACTORY.Create(standard.LabelSet.CLIENT_LABELS_URN, "LabelSet", mode="w", token=self.token) as client_labels_index: for label_name in label_names: client_labels_index.Add(label_name)
class AbstractClientStatsCronFlow(cronjobs.SystemCronFlow): """A cron job which opens every client in the system. We feed all the client objects to the AbstractClientStatsCollector instances. """ CLIENT_STATS_URN = rdfvalue.RDFURN("aff4:/stats/ClientFleetStats") def BeginProcessing(self): pass def ProcessClient(self, client): raise NotImplementedError() def FinishProcessing(self): pass def GetClientLabelsList(self, client): """Get set of labels applied to this client.""" client_labels = [aff4_grr.ALL_CLIENTS_LABEL] label_set = client.GetLabelsNames(owner="GRR") client_labels.extend(label_set) return client_labels def _StatsForLabel(self, label): if label not in self.stats: self.stats[label] = aff4.FACTORY.Create( self.CLIENT_STATS_URN.Add(label), "ClientFleetStats", mode="w", token=self.token) return self.stats[label] @flow.StateHandler() def Start(self): """Retrieve all the clients for the AbstractClientStatsCollectors.""" try: self.stats = {} self.BeginProcessing() root = aff4.FACTORY.Open(aff4.ROOT_URN, token=self.token) children_urns = list(root.ListChildren()) logging.debug("Found %d children.", len(children_urns)) processed_count = 0 for child in aff4.FACTORY.MultiOpen(children_urns, mode="r", token=self.token, age=aff4.NEWEST_TIME): if isinstance(child, aff4.AFF4Object.VFSGRRClient): self.ProcessClient(child) processed_count += 1 # This flow is not dead: we don't want to run out of lease time. self.HeartBeat() self.FinishProcessing() for fd in self.stats.values(): fd.Close() logging.info("%s: processed %d clients.", self.__class__.__name__, processed_count) except Exception as e: # pylint: disable=broad-except logging.exception("Error while calculating stats: %s", e) raise
class StatsStore(aff4.AFF4Volume): """Implementation of the long-term storage of collected stats data. This class allows to write current stats data to the data store, read and delete them. StatsStore uses data_store to store the data. All historical stats data are stored in a single data store subject per process. By process we mean, for example: "admin UI", "worker #1", "worker #3", etc. Stats data are stored as subject's attributes. """ DATA_STORE_ROOT = rdfvalue.RDFURN("aff4:/stats_store") ALL_TIMESTAMPS = data_store.DataStore.ALL_TIMESTAMPS NEWEST_TIMESTAMP = data_store.DataStore.NEWEST_TIMESTAMP def Initialize(self): super(StatsStore, self).Initialize() if self.urn is None: self.urn = self.DATA_STORE_ROOT def WriteStats(self, process_id=None, timestamp=None, sync=False): """Writes current stats values to the data store with a given timestamp.""" if not process_id: raise ValueError("process_id can't be None") process_data = aff4.FACTORY.Create(self.urn.Add(process_id), "StatsStoreProcessData", mode="rw", token=self.token) process_data.WriteStats(timestamp=timestamp, sync=sync) def ListUsedProcessIds(self): """List process ids that were used when saving data to stats store.""" return [urn.Basename() for urn in self.ListChildren()] def ReadMetadata(self, process_id=None): """Reads metadata of stored values for the given process.""" if not process_id: raise ValueError("process_id can't be None") results = self.MultiReadMetadata(process_ids=[process_id]) try: return results[process_id] except KeyError: return {} def MultiReadMetadata(self, process_ids=None): """Reads metadata of stored values for multiple given processes.""" if not process_ids: process_ids = self.ListUsedProcessIds() subjects = [ self.DATA_STORE_ROOT.Add(process_id) for process_id in process_ids ] subjects_data = aff4.FACTORY.MultiOpen( subjects, mode="r", token=self.token, aff4_type="StatsStoreProcessData") results = {} for subject_data in subjects_data: results[subject_data.urn.Basename()] = subject_data.Get( subject_data.Schema.METRICS_METADATA).AsDict() for process_id in process_ids: results.setdefault(process_id, {}) return results def ReadStats(self, process_id=None, predicate_regex=".*", timestamp=ALL_TIMESTAMPS, limit=10000): """Reads stats values from the data store for the current process.""" if not process_id: raise ValueError("process_id can't be None") results = self.MultiReadStats(process_ids=[process_id], predicate_regex=predicate_regex, timestamp=timestamp, limit=limit) try: return results[process_id] except KeyError: return {} def MultiReadStats(self, process_ids=None, predicate_regex=".*", timestamp=ALL_TIMESTAMPS, limit=10000): """Reads historical data for multiple process ids at once.""" if not process_ids: process_ids = self.ListUsedProcessIds() multi_metadata = self.MultiReadMetadata(process_ids=process_ids) subjects = [ self.DATA_STORE_ROOT.Add(process_id) for process_id in process_ids ] multi_query_results = data_store.DB.MultiResolveRegex( subjects, StatsStoreProcessData.STATS_STORE_PREFIX + predicate_regex, token=self.token, timestamp=timestamp, limit=limit) results = {} for subject, subject_results in multi_query_results: subject = rdfvalue.RDFURN(subject) subject_results = sorted(subject_results, key=lambda x: x[2]) subject_metadata = multi_metadata.get(subject.Basename(), {}) part_results = {} for predicate, value_string, timestamp in subject_results: metric_name = predicate[len(StatsStoreProcessData. STATS_STORE_PREFIX):] try: metadata = subject_metadata[metric_name] except KeyError: continue stored_value = StatsStoreValue(value_string) fields_values = [] if metadata.fields_defs: for stored_field_value in stored_value.fields_values: fields_values.append(stored_field_value.value) current_dict = part_results.setdefault(metric_name, {}) for field_value in fields_values[:-1]: new_dict = {} current_dict.setdefault(field_value, new_dict) current_dict = new_dict result_values_list = current_dict.setdefault( fields_values[-1], []) else: result_values_list = part_results.setdefault( metric_name, []) result_values_list.append((stored_value.value, timestamp)) results[subject.Basename()] = part_results return results def DeleteStats(self, process_id=None, timestamp=ALL_TIMESTAMPS, sync=False): """Deletes all stats in the given time range.""" if not process_id: raise ValueError("process_id can't be None") process_data = aff4.FACTORY.Create(self.urn.Add(process_id), "StatsStoreProcessData", mode="w", token=self.token) process_data.DeleteStats(timestamp=timestamp, sync=sync)
def MultiReadStats(self, process_ids=None, predicate_regex=".*", timestamp=ALL_TIMESTAMPS, limit=10000): """Reads historical data for multiple process ids at once.""" if not process_ids: process_ids = self.ListUsedProcessIds() multi_metadata = self.MultiReadMetadata(process_ids=process_ids) subjects = [ self.DATA_STORE_ROOT.Add(process_id) for process_id in process_ids ] multi_query_results = data_store.DB.MultiResolveRegex( subjects, StatsStoreProcessData.STATS_STORE_PREFIX + predicate_regex, token=self.token, timestamp=timestamp, limit=limit) results = {} for subject, subject_results in multi_query_results: subject = rdfvalue.RDFURN(subject) subject_results = sorted(subject_results, key=lambda x: x[2]) subject_metadata = multi_metadata.get(subject.Basename(), {}) part_results = {} for predicate, value_string, timestamp in subject_results: metric_name = predicate[len(StatsStoreProcessData. STATS_STORE_PREFIX):] try: metadata = subject_metadata[metric_name] except KeyError: continue stored_value = StatsStoreValue(value_string) fields_values = [] if metadata.fields_defs: for stored_field_value in stored_value.fields_values: fields_values.append(stored_field_value.value) current_dict = part_results.setdefault(metric_name, {}) for field_value in fields_values[:-1]: new_dict = {} current_dict.setdefault(field_value, new_dict) current_dict = new_dict result_values_list = current_dict.setdefault( fields_values[-1], []) else: result_values_list = part_results.setdefault( metric_name, []) result_values_list.append((stored_value.value, timestamp)) results[subject.Basename()] = part_results return results
def FileNameToURN(self, fname): return rdfvalue.RDFURN(self.client_id).Add("/fs/os").Add( os.path.join(self.base_path, "searching", fname))
def CheckClientAccess(self, subject, token): client_id, _ = rdfvalue.RDFURN(subject).Split(2) client_urn = rdfvalue.ClientURN(client_id) return self.CheckACL(token, client_urn)
def testCronJobACLWorkflow(self): with self.ACLChecksDisabled(): cronjobs.ScheduleSystemCronFlows( names=[cron_system.OSBreakDown.__name__], token=self.token) cronjobs.CRON_MANAGER.DisableJob( rdfvalue.RDFURN("aff4:/cron/OSBreakDown")) # Open up and click on Cron Job Viewer. self.Open("/") self.WaitUntil(self.IsElementPresent, "client_query") self.Click("css=a[grrtarget=ManageCron]") # Select a cron job self.Click("css=td:contains('OSBreakDown')") # Click on Enable button and check that dialog appears. self.Click("css=button[name=EnableCronJob]") self.WaitUntil(self.IsTextPresent, "Are you sure you want to ENABLE this cron job?") # Click on "Proceed" and wait for authorization dialog to appear. self.Click("css=button[name=Proceed]") self.WaitUntil(self.IsElementPresent, "css=h3:contains('Create a new approval')") # This asks the user "test" (which is us) to approve the request. self.Type("css=input[id=acl_approver]", "test") self.Type("css=input[id=acl_reason]", self.reason) self.Click("acl_dialog_submit") # "Request Approval" dialog should go away self.WaitUntilNot(self.IsVisible, "css=.modal-backdrop") self.Open("/") self.WaitUntilEqual("1", self.GetText, "notification_button") self.Click("notification_button") self.Click("css=td:contains('Please grant access to a cron job')") self.WaitUntilContains("Grant Access for GRR Use", self.GetText, "css=h2:contains('Grant')") self.WaitUntil(self.IsTextPresent, "The user test has requested") # Cron job overview should be visible self.WaitUntil(self.IsTextPresent, "aff4:/cron/OSBreakDown") self.WaitUntil(self.IsTextPresent, "CRON_ARGS") self.Click("css=button:contains('Approve')") self.WaitUntil( self.IsTextPresent, "You have granted access for aff4:/cron/OSBreakDown to test") # Now test starts up self.Open("/") # We should be notified that we have an approval self.WaitUntilEqual("1", self.GetText, "notification_button") self.Click("notification_button") self.WaitUntil( self.GetText, "css=td:contains('has granted you access to " "a cron job')") self.Click("css=tr:contains('has granted you access') a") # Enable OSBreakDown cron job (it should be selected by default). self.Click("css=td:contains('OSBreakDown')") # Click on Enable and wait for dialog again. self.Click("css=button[name=EnableCronJob]:not([disabled])") self.WaitUntil(self.IsTextPresent, "Are you sure you want to ENABLE this cron job?") # Click on "Proceed" and wait for authorization dialog to appear. self.Click("css=button[name=Proceed]") # This is insufficient - we need 2 approvers. self.WaitUntilContains("Requires 2 approvers for access.", self.GetText, "css=div#acl_form") # Lets add another approver. token = access_control.ACLToken(username="******") flow.GRRFlow.StartFlow( flow_name="GrantCronJobApprovalFlow", subject_urn=rdfvalue.RDFURN("aff4:/cron/OSBreakDown"), reason=self.reason, delegate="test", token=token) # Now test starts up self.Open("/") # We should be notified that we have an approval self.WaitUntilEqual("1", self.GetText, "notification_button") self.Click("notification_button") self.Click("css=tr:contains('has granted you access') a") # Wait for modal backdrop to go away. self.WaitUntilNot(self.IsVisible, "css=.modal-backdrop") self.WaitUntil(self.IsTextPresent, "OSBreakDown") # Enable OSBreakDown cron job (it should be selected by default). self.Click("css=button[name=EnableCronJob]:not([disabled])") self.WaitUntil(self.IsTextPresent, "Are you sure you want to ENABLE this cron job?") # Click on "Proceed" and wait for authorization dialog to appear. self.Click("css=button[name=Proceed]") # This is still insufficient - one of the approvers should have # "admin" label. self.WaitUntilContains( "At least 1 approver(s) should have 'admin' label.", self.GetText, "css=div#acl_form") # Let's make "approver" an admin. with self.ACLChecksDisabled(): self.CreateAdminUser("approver") # And try again self.Open("/") self.Click("css=a[grrtarget=ManageCron]") # Select and enable OSBreakDown cron job. self.Click("css=td:contains('OSBreakDown')") # Click on Enable button and check that dialog appears. self.Click("css=button[name=EnableCronJob]:not([disabled])") self.WaitUntil(self.IsTextPresent, "Are you sure you want to ENABLE this cron job?") # Click on "Proceed" and wait for success label to appear. # Also check that "Proceed" button gets disabled. self.Click("css=button[name=Proceed]") self.WaitUntil(self.IsTextPresent, "Cron job was ENABLEd successfully!")
def BuildTable(self, start_row, end_row, request): """Renders the table.""" container_urn = rdfvalue.RDFURN(request.REQ["container"]) container = aff4.FACTORY.Open(container_urn, token=request.token) self.AddDynamicColumns(container) sort_direction = request.REQ.get("sSortDir_0", "asc") == "desc" # Get the query from the user. query_expression = request.REQ.get("query") if not query_expression: query_expression = "subject matches '.'" limit = max(self.max_items, end_row) key = utils.SmartUnicode(container_urn) key += ":" + query_expression + ":" + str(limit) try: children = self.content_cache.Get(key) except KeyError: children = {} for c in sorted(container.Query(query_expression, limit=limit)): children[utils.SmartUnicode(c.urn)] = c self.content_cache.Put(key, children) child_names = children.keys() child_names.sort(reverse=sort_direction) if len(children) == self.max_items: self.columns[0].AddElement(0, rdfvalue.RDFString("nuke")) msg = ("This table contains more than %d entries, please use a filter " "string or download it as a CSV file.") % self.max_items self.columns[1].AddElement(0, rdfvalue.RDFString(msg)) self.AddRow({}, row_index=0) return row_index = start_row # Make sure the table knows how large it is. self.size = len(child_names) for child_urn in child_names[row_index:]: fd = children[child_urn] row_attributes = dict() # Add the fd to all the columns for column in self.columns: try: column.AddRowFromFd(row_index, fd) except AttributeError: pass if "Container" in fd.behaviours: row_attributes["Icon"] = dict(icon="directory") else: row_attributes["Icon"] = dict(icon="file") self.AddRow(row_attributes, row_index=row_index) row_index += 1 if row_index > end_row: return
def testSchedule(self): """Test the ability to schedule a task.""" test_queue = rdfvalue.RDFURN("fooSchedule") task = rdf_flows.GrrMessage(queue=test_queue, task_ttl=5, session_id="aff4:/Test", generate_task_id=True) manager = queue_manager.QueueManager(token=self.token) with data_store.DB.GetMutationPool(token=self.token) as pool: manager.Schedule([task], pool) self.assertGreater(task.task_id, 0) self.assertGreater(task.task_id & 0xffffffff, 0) self.assertEqual( (long(self._current_mock_time * 1000) & 0xffffffff) << 32, task.task_id & 0x1fffffff00000000) self.assertEqual(task.task_ttl, 5) stored_tasks = data_store.DB.QueueQueryTasks(test_queue, limit=100000, token=self.token) self.assertEqual(len(stored_tasks), 1) stored_task = stored_tasks[0] self.assertGreater(stored_task.eta, 0) stored_task.eta = None self.assertRDFValuesEqual(stored_task, task) # Get a lease on the task tasks = manager.QueryAndOwn(test_queue, lease_seconds=100, limit=100) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].task_ttl, 4) self.assertEqual(tasks[0].session_id, "aff4:/Test") # If we try to get another lease on it we should fail self._current_mock_time += 10 tasks = manager.QueryAndOwn(test_queue, lease_seconds=100, limit=100) self.assertEqual(len(tasks), 0) # However after 100 seconds this should work again self._current_mock_time += 110 tasks = manager.QueryAndOwn(test_queue, lease_seconds=100, limit=100) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].task_ttl, 3) # Check now that after a few retransmits we drop the message for i in range(2, 0, -1): self._current_mock_time += 110 tasks = manager.QueryAndOwn(test_queue, lease_seconds=100) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].task_ttl, i) # The task is now gone self._current_mock_time += 110 tasks = manager.QueryAndOwn(test_queue, lease_seconds=100) self.assertEqual(len(tasks), 0)
def DownloadCollection(coll_path, target_path, token=None, overwrite=False, dump_client_info=False, flatten=False, max_threads=10): """Iterate through a Collection object downloading all files. Args: coll_path: Path to an AFF4 collection. target_path: Base directory to write to. token: Token for access. overwrite: If True, overwrite existing files. dump_client_info: If True, this will detect client paths, and dump a yaml version of the client object to the root path. This is useful for seeing the hostname/users of the machine the client id refers to. flatten: If True, produce a "files" flat folder with links to all the found files. max_threads: Use this many threads to do the downloads. """ completed_clients = set() try: coll = aff4.FACTORY.Open(coll_path, aff4_type="RDFValueCollection", token=token) except IOError: logging.error( "%s is not a valid collection. Typo? " "Are you sure something was written to it?", coll_path) return thread_pool = threadpool.ThreadPool.Factory("Downloader", max_threads) thread_pool.Start() logging.info("Expecting to download %s files", coll.size) # Collections can include anything they want, but we only handle RDFURN and # StatEntry entries in this function. for grr_message in coll: source = None # If a raw message, work out the type. if isinstance(grr_message, rdf_flows.GrrMessage): source = grr_message.source grr_message = grr_message.payload # Collections can contain AFF4ObjectSummary objects which encapsulate # RDFURNs and StatEntrys. if isinstance(grr_message, rdf_client.AFF4ObjectSummary): urn = grr_message.urn elif isinstance(grr_message, rdfvalue.RDFURN): urn = grr_message elif isinstance(grr_message, rdf_client.StatEntry): urn = rdfvalue.RDFURN(grr_message.aff4path) elif isinstance(grr_message, file_finder.FileFinderResult): urn = rdfvalue.RDFURN(grr_message.stat_entry.aff4path) elif isinstance(grr_message, rdfvalue.RDFBytes): try: os.makedirs(target_path) except OSError: pass try: # We just dump out bytes and carry on. client_id = source.Split()[0] with open(os.path.join(target_path, client_id), "wb") as fd: fd.write(str(grr_message)) except AttributeError: pass continue else: continue # Handle dumping client info, but only once per client. client_id = urn.Split()[0] re_match = aff4.AFF4Object.VFSGRRClient.CLIENT_ID_RE.match(client_id) if dump_client_info and re_match and client_id not in completed_clients: args = (rdf_client.ClientURN(client_id), target_path, token, overwrite) thread_pool.AddTask(target=DumpClientYaml, args=args, name="ClientYamlDownloader") completed_clients.add(client_id) # Now queue downloading the actual files. args = (urn, target_path, token, overwrite) if flatten: target = CopyAndSymlinkAFF4ToLocal else: target = CopyAFF4ToLocal thread_pool.AddTask(target=target, args=args, name="Downloader") # Join and stop the threadpool. thread_pool.Stop()
def GenerateSample(self, number=0): return rdfvalue.RDFURN("aff4:/C.12342%s/fs/os/" % number)
def ClientURNFromURN(urn): return rdf_client.ClientURN(rdfvalue.RDFURN(urn).Split()[0])
class CronManager(object): """CronManager is used to schedule/terminate cron jobs.""" CRON_JOBS_PATH = rdfvalue.RDFURN("aff4:/cron") def ScheduleFlow(self, cron_args=None, job_name=None, token=None, disabled=False): """Creates a cron job that runs given flow with a given frequency. Args: cron_args: A protobuf of type CreateCronJobFlowArgs. job_name: Use this job_name instead of an autogenerated unique name (used for system cron jobs - we want them to have well-defined persistent name). token: Security token used for data store access. disabled: If True, the job object will be created, but will be disabled. Returns: URN of the cron job created. """ if not job_name: uid = utils.PRNG.GetUShort() job_name = "%s_%s" % (cron_args.flow_runner_args.flow_name, uid) cron_job_urn = self.CRON_JOBS_PATH.Add(job_name) with aff4.FACTORY.Create(cron_job_urn, aff4_type=CronJob, mode="rw", token=token, force_new_version=False) as cron_job: # If the cronjob was already present we don't want to overwrite the # original start_time. existing_cron_args = cron_job.Get(cron_job.Schema.CRON_ARGS) if existing_cron_args and existing_cron_args.start_time: cron_args.start_time = existing_cron_args.start_time if cron_args != existing_cron_args: cron_job.Set(cron_job.Schema.CRON_ARGS(cron_args)) if disabled != cron_job.Get(cron_job.Schema.DISABLED): cron_job.Set(cron_job.Schema.DISABLED(disabled)) return cron_job_urn def ListJobs(self, token=None): """Returns a generator of URNs of all currently running cron jobs.""" return aff4.FACTORY.Open(self.CRON_JOBS_PATH, token=token).ListChildren() def EnableJob(self, job_urn, token=None): """Enable cron job with the given URN.""" cron_job = aff4.FACTORY.Open(job_urn, mode="rw", aff4_type=CronJob, token=token) cron_job.Set(cron_job.Schema.DISABLED(0)) cron_job.Close() def DisableJob(self, job_urn, token=None): """Disable cron job with the given URN.""" cron_job = aff4.FACTORY.Open(job_urn, mode="rw", aff4_type=CronJob, token=token) cron_job.Set(cron_job.Schema.DISABLED(1)) cron_job.Close() def DeleteJob(self, job_urn, token=None): """Deletes cron job with the given URN.""" aff4.FACTORY.Delete(job_urn, token=token) def RunOnce(self, token=None, force=False, urns=None): """Tries to lock and run cron jobs. Args: token: security token force: If True, force a run urns: List of URNs to run. If unset, run them all """ urns = urns or self.ListJobs(token=token) for cron_job_urn in urns: try: with aff4.FACTORY.OpenWithLock(cron_job_urn, blocking=False, token=token, lease_time=600) as cron_job: try: logging.info("Running cron job: %s", cron_job.urn) cron_job.Run(force=force) except Exception as e: # pylint: disable=broad-except logging.exception("Error processing cron job %s: %s", cron_job.urn, e) stats.STATS.IncrementCounter("cron_internal_error") except aff4.LockError: pass
class SchemaCls(standard.VFSDirectory.SchemaCls): """The schema for the client.""" client_index = rdfvalue.RDFURN("aff4:/index/client") CERT = aff4.Attribute("metadata:cert", rdf_crypto.RDFX509Cert, "The PEM encoded cert of the client.") FILESYSTEM = aff4.Attribute("aff4:filesystem", rdf_client.Filesystems, "Filesystems on the client.") CLIENT_INFO = aff4.Attribute("metadata:ClientInfo", rdf_client.ClientInformation, "GRR client information", "GRR client", default="") LAST_BOOT_TIME = aff4.Attribute("metadata:LastBootTime", rdfvalue.RDFDatetime, "When the machine was last booted", "BootTime") FIRST_SEEN = aff4.Attribute( "metadata:FirstSeen", rdfvalue.RDFDatetime, "First time the client registered with us", "FirstSeen") # Information about the host. HOSTNAME = aff4.Attribute("metadata:hostname", rdfvalue.RDFString, "Hostname of the host.", "Host", index=client_index) FQDN = aff4.Attribute("metadata:fqdn", rdfvalue.RDFString, "Fully qualified hostname of the host.", "FQDN", index=client_index) SYSTEM = aff4.Attribute("metadata:system", rdfvalue.RDFString, "Operating System class.", "System") UNAME = aff4.Attribute("metadata:uname", rdfvalue.RDFString, "Uname string.", "Uname") OS_RELEASE = aff4.Attribute("metadata:os_release", rdfvalue.RDFString, "OS Major release number.", "Release") OS_VERSION = aff4.Attribute("metadata:os_version", rdf_client.VersionString, "OS Version number.", "Version") # ARCH values come from platform.uname machine value, e.g. x86_64, AMD64. ARCH = aff4.Attribute("metadata:architecture", rdfvalue.RDFString, "Architecture.", "Architecture") INSTALL_DATE = aff4.Attribute("metadata:install_date", rdfvalue.RDFDatetime, "Install Date.", "Install") # The knowledge base is used for storing data about the host and users. # This is currently a slightly odd object as we only use some of the fields. # The proto itself is used in Artifact handling outside of GRR (e.g. Plaso). # Over time we will migrate fields into this proto, but for now it is a mix. KNOWLEDGE_BASE = aff4.Attribute("metadata:knowledge_base", rdf_client.KnowledgeBase, "Artifact Knowledge Base", "KnowledgeBase") GRR_CONFIGURATION = aff4.Attribute( "aff4:client_configuration", rdf_protodict.Dict, "Running configuration for the GRR client.", "Config") LIBRARY_VERSIONS = aff4.Attribute( "aff4:library_versions", rdf_protodict.Dict, "Running library versions for the client.", "Libraries") USERNAMES = aff4.Attribute("aff4:user_names", SpaceSeparatedStringArray, "A space separated list of system users.", "Usernames", index=client_index) # This information is duplicated from the INTERFACES attribute but is done # to allow for fast searching by mac address. MAC_ADDRESS = aff4.Attribute("aff4:mac_addresses", rdfvalue.RDFString, "A hex encoded MAC address.", "MAC", index=client_index) KERNEL = aff4.Attribute("aff4:kernel_version", rdfvalue.RDFString, "Kernel version string.", "KernelVersion") # Same for IP addresses. HOST_IPS = aff4.Attribute("aff4:host_ips", rdfvalue.RDFString, "An IP address.", "Host_ip", index=client_index) PING = aff4.Attribute( "metadata:ping", rdfvalue.RDFDatetime, "The last time the server heard from this client.", "LastCheckin", versioned=False, default=0) CLOCK = aff4.Attribute("metadata:clock", rdfvalue.RDFDatetime, "The last clock read on the client " "(Can be used to estimate client clock skew).", "Clock", versioned=False) CLIENT_IP = aff4.Attribute( "metadata:client_ip", rdfvalue.RDFString, "The ip address this client connected from.", "Client_ip", versioned=False) # This is the last foreman rule that applied to us LAST_FOREMAN_TIME = aff4.Attribute( "aff4:last_foreman_time", rdfvalue.RDFDatetime, "The last time the foreman checked us.", versioned=False) LAST_INTERFACES = aff4.Attribute( "aff4:last_interfaces", rdf_client.Interfaces, "Last seen network interfaces. Full history is maintained in the " "clientid/network object. Separated for performance reasons.", versioned=False) LAST_CRASH = aff4.Attribute("aff4:last_crash", rdf_client.ClientCrash, "Last client crash.", creates_new_object_version=False, versioned=False) VOLUMES = aff4.Attribute("aff4:volumes", rdf_client.Volumes, "Client disk volumes.") HARDWARE_INFO = aff4.Attribute("aff4:hardware_info", rdf_client.HardwareInfo, "Various hardware information.", default="") MEMORY_SIZE = aff4.Attribute( "aff4:memory_size", rdfvalue.ByteSize, "Amount of memory this client's machine has.")
def __init__(self, urn, **kwargs): if urn is None: urn = rdfvalue.RDFURN("aff4:/tmp").Add("%X" % utils.PRNG.GetULong()) super(TempImageFile, self).__init__(urn, **kwargs)
raise ValueError("AFF4 path must start with %s.", python_hack_root_urn) context = ["Platform:%s" % platform.title(), "Client Context"] maintenance_utils.UploadSignedConfigBlob(content, aff4_path=aff4_path, client_context=context, token=token) elif flags.FLAGS.subparser_name == "upload_exe": content = open(flags.FLAGS.file, "rb").read(1024 * 1024 * 30) context = [ "Platform:%s" % flags.FLAGS.platform.title(), "Client Context" ] if flags.FLAGS.dest_path: dest_path = rdfvalue.RDFURN(flags.FLAGS.dest_path) else: dest_path = grr_config.CONFIG.Get( "Executables.aff4_path", context=context).Add(os.path.basename(flags.FLAGS.file)) # Now upload to the destination. maintenance_utils.UploadSignedConfigBlob(content, aff4_path=dest_path, client_context=context, token=token) print "Uploaded to %s" % dest_path elif flags.FLAGS.subparser_name == "sign_component": maintenance_utils.SignComponentContent(flags.FLAGS.component_filename,
def CreateClientIndex(token=None): return aff4.FACTORY.Create(rdfvalue.RDFURN("aff4:/client_index"), aff4_type=AFF4ClientIndex, mode="rw", object_exists=True, token=token)
def setUp(self): super(BigQueryOutputPluginTest, self).setUp() self.client_id = self.SetupClients(1)[0] self.results_urn = self.client_id.Add("Results") self.base_urn = rdfvalue.RDFURN("aff4:/foo/bar")
def ToHuntURN(self): return rdfvalue.RDFURN("aff4:/hunts").Add(self.hunt_id)
class FileStore(aff4.AFF4Volume): """Filestore for files downloaded from clients. Modules can register for file content by creating paths under "aff4:/files". By default files created in this namespace can be read by users that have the URN (hash). See lib/aff4_objects/user_managers.py. Filestores are operated on according to their PRIORITY value, lowest first. """ PATH = rdfvalue.RDFURN("aff4:/files") CHUNK_SIZE = 5 * 512 * 1024 PRIORITY = 99 # default low priority for subclasses EXTERNAL = False def GetChildrenByPriority(self, allow_external=True): """Generator that yields active filestore children in priority order.""" for child in sorted(self.OpenChildren(), key=lambda x: x.PRIORITY): if not allow_external and child.EXTERNAL: continue if child.Get(child.Schema.ACTIVE): yield child def CheckHashes(self, hashes, external=True): """Checks a list of hashes for presence in the store. Sub stores need to pass back the original HashDigest objects since they carry state about the original file source. Only unique hashes are checked, if there is duplication in the hashes input it is the caller's responsibility to maintain any necessary mappings. Args: hashes: A list of Hash objects to check. external: If true, attempt to check stores defined as EXTERNAL. Yields: Tuples of (RDFURN, hash object) that exist in the store. """ hashes = set(hashes) for child in self.GetChildrenByPriority(allow_external=external): for urn, hash_obj in child.CheckHashes(hashes): yield urn, hash_obj hashes.discard(hash_obj) # Nothing to search for, we are done. if not hashes: break def AddFile(self, fd, sync=False, external=True): """Create a new file in the file store. We delegate the actual file addition to our contained implementations. Implementations can either implement the AddFile() method, returning a file like object which will be written on, or directly support the AddBlobToStore() method which can copy the VFSBlobImage efficiently. Args: fd: An AFF4 object open for read/write. sync: Should the file be synced immediately. external: If true, attempt to add files to stores defined as EXTERNAL. """ files_for_write = [] for sub_store in self.GetChildrenByPriority(allow_external=external): new_file = sub_store.AddFile(fd, sync=sync) if new_file: files_for_write.append(new_file) fd.Seek(0) while files_for_write: # If we got filehandles back, send them the data. data = fd.Read(self.CHUNK_SIZE) if not data: break for child in files_for_write: child.Write(data) for child in files_for_write: child.Close(sync=sync) def FindFile(self, fd, external=True): """Find an AFF4Stream in the file store. We delegate the actual file search to our contained implementations. Implementations need to implement the FindFile() method, which will return either a list of RDFURN's or a RDFURN. Args: fd: File open for reading. external: If true, attempt to check stores defined as EXTERNAL. Returns: A list of RDFURNs returned by the contained implementations. """ return_list = [] for sub_store in self.GetChildrenByPriority(allow_external=external): found = sub_store.FindFile(fd) if found: if isinstance(found, list): return_list.extend(found) else: return_list.append(found) return return_list class SchemaCls(aff4.AFF4Volume.SchemaCls): ACTIVE = aff4.Attribute("aff4:filestore_active", rdfvalue.RDFBool, "If true this filestore is active.", default=True)
def ToFlowURN(self): return rdfvalue.RDFURN(self.client_id).Add("flows").Add(self.flow_id)
class NSRLFileStore(HashFileStore): """FileStore with NSRL hashes.""" PATH = rdfvalue.RDFURN("aff4:/files/nsrl") PRIORITY = 1 EXTERNAL = False FILE_HASH_TYPE = NSRLFileStoreHash FILE_TYPES = {"M": rdf_nsrl.NSRLInformation.FileType.MALICIOUS_FILE, "S": rdf_nsrl.NSRLInformation.FileType.SPECIAL_FILE, "": rdf_nsrl.NSRLInformation.FileType.NORMAL_FILE} def GetChildrenByPriority(self, allow_external=True): return @staticmethod def ListHashes(token=None, age=aff4.NEWEST_TIME): return def CheckHashes(self, hashes, unused_external=True): """Checks a list of hashes for presence in the store. Only unique sha1 hashes are checked, if there is duplication in the hashes input it is the caller's responsibility to maintain any necessary mappings. Args: hashes: A list of Hash objects to check. unused_external: Ignored. Yields: Tuples of (RDFURN, hash object) that exist in the store. """ hash_map = {} for hsh in hashes: if hsh.HasField("sha1"): hash_urn = self.PATH.Add(str(hsh.sha1)) logging.info("Checking URN %s", str(hash_urn)) hash_map[hash_urn] = hsh for metadata in aff4.FACTORY.Stat(list(hash_map), token=self.token): yield metadata["urn"], hash_map[metadata["urn"]] def AddHash(self, sha1, md5, crc, file_name, file_size, product_code_list, op_system_code_list, special_code): """Adds a new file from the NSRL hash database. We create a new subject in: aff4:/files/nsrl/<sha1> with all the other arguments as attributes. Args: sha1: SHA1 digest as a hex encoded string. md5: MD5 digest as a hex encoded string. crc: File CRC as an integer. file_name: Filename. file_size: Size of file. product_code_list: List of products this file is part of. op_system_code_list: List of operating systems this file is part of. special_code: Special code (malicious/special/normal file). """ file_store_urn = self.PATH.Add(sha1) special_code = self.FILE_TYPES.get(special_code, self.FILE_TYPES[""]) with aff4.FACTORY.Create(file_store_urn, "NSRLFile", mode="w", token=self.token) as fd: fd.Set(fd.Schema.NSRL(sha1=sha1.decode("hex"), md5=md5.decode("hex"), crc32=crc, file_name=file_name, file_size=file_size, product_code=product_code_list, op_system_code=op_system_code_list, file_type=special_code)) def FindFile(self, fd): """Hash an AFF4Stream and find the RDFURN with the same hash. Args: fd: File open for reading. Returns: A RDFURN to the file in file store or False if not found. """ hashes = self._HashFile(fd) if not hashes: return False hash_urn = self.PATH.Add(str(hashes.sha1)) for data in aff4.FACTORY.Stat([hash_urn], token=self.token): return data["urn"] return False def AddFile(self, fd, sync=False): """Hash the AFF4Stream and add it to the NSRLFile's index. We take a file in the client space: aff4:/C.123123123/fs/os/usr/local/blah Hash it and check if there is a corresponsing NSRLFile at the following URN: aff4:/files/nsrl/123123123 Next, we add the file to the NSRL index, so we know which clients have the file. Args: fd: File open for reading. sync: Should the file be synced immediately. Returns: The URN of the NSRL file if it was found in the store. """ hash_urn = self.FindFile(fd) if not hash_urn: return False # Open file and add 'fd' to the index. try: with aff4.FACTORY.Open(hash_urn, "NSRLFile", mode="w", token=self.token) as hash_fd: hash_fd.AddIndex(fd.urn) return hash_urn except aff4.InstantiationError: pass return False
class GRRUser(aff4.AFF4Object): """An AFF4 object modeling a GRR User.""" # URN of the index for labels for users. labels_index_urn = rdfvalue.RDFURN("aff4:/index/labels/users") SYSTEM_USERS = set(["GRRWorker", "GRREnroller", "GRRCron", "test"]) class SchemaCls(aff4.AFF4Object.SchemaCls): """Schema for GRRUser.""" PENDING_NOTIFICATIONS = aff4.Attribute( "aff4:notification/pending", rdfvalue.NotificationList, "The notifications pending for the user.", default="", versioned=False) SHOWN_NOTIFICATIONS = aff4.Attribute( "aff4:notifications/shown", rdfvalue.NotificationList, "Notifications already shown to the user.", default="", versioned=False) SHOWN_GLOBAL_NOTIFICATIONS = aff4.Attribute( "aff4:global_notification/timestamp_list", GlobalNotificationSet, "Global notifications shown to this user.", default=GlobalNotificationSet(), versioned=False) GUI_SETTINGS = aff4.Attribute("aff4:gui/settings", rdfvalue.GUISettings, "GUI Settings", default="") PASSWORD = aff4.Attribute("aff4:user/password", CryptedPassword, "Encrypted Password for the user") def Notify(self, message_type, subject, msg, source): """Send a notification to the user in the UI. Args: message_type: One of aff4_grr.Notification.notification_types e.g. "ViewObject", "HostInformation", "GrantAccess". subject: The subject to use, normally a URN. msg: The message to display. source: The class doing the notification. Raises: TypeError: On invalid message_type. """ pending = self.Get(self.Schema.PENDING_NOTIFICATIONS) if pending is None: pending = self.Schema.PENDING_NOTIFICATIONS() if message_type not in rdfvalue.Notification.notification_types: raise TypeError("Invalid notification type %s" % message_type) pending.Append(type=message_type, subject=subject, message=msg, source=source, timestamp=long(time.time() * 1e6)) # Limit the notification to 50, expiring older notifications. while len(pending) > 50: pending.Pop(0) self.Set(self.Schema.PENDING_NOTIFICATIONS, pending) def ShowNotifications(self, reset=True): """A generator of current notifications.""" shown_notifications = self.Schema.SHOWN_NOTIFICATIONS() # Pending notifications first pending = self.Get(self.Schema.PENDING_NOTIFICATIONS) for notification in pending: shown_notifications.Append(notification) notifications = self.Get(self.Schema.SHOWN_NOTIFICATIONS) for notification in notifications: shown_notifications.Append(notification) # Shall we reset the pending notification state? if reset: self.Set(shown_notifications) self.Set(self.Schema.PENDING_NOTIFICATIONS()) self.Flush() return shown_notifications def Describe(self): """Return a description of this user.""" result = ["\nUsername: %s" % self.urn.Basename()] fd = aff4.FACTORY.Open(self.urn.Add("labels"), token=self.token) labels = [str(x) for x in fd.Get(fd.Schema.LABEL, [])] result.append("Labels: %s" % ",".join(labels)) if self.Get(self.Schema.PASSWORD) is None: result.append("Password: not set") else: result.append("Password: set") return "\n".join(result) def SetPassword(self, password): self.Set(self.Schema.PASSWORD().SetPassword(password)) def CheckPassword(self, password): password_obj = self.Get(self.Schema.PASSWORD) return password_obj and password_obj.CheckPassword(password) def GetPendingGlobalNotifications(self): storage = aff4.FACTORY.Create(GlobalNotificationStorage.DEFAULT_PATH, aff4_type="GlobalNotificationStorage", mode="r", token=self.token) current_notifications = storage.GetNotifications() shown_notifications = self.Get(self.Schema.SHOWN_GLOBAL_NOTIFICATIONS, default=GlobalNotificationSet()) result = [] for notification in current_notifications: if notification in shown_notifications: continue current_time = rdfvalue.RDFDatetime().Now() if (notification.show_from + notification.duration >= current_time and current_time >= notification.show_from): result.append(notification) return result def MarkGlobalNotificationAsShown(self, notification): shown_notifications = self.Get(self.Schema.SHOWN_GLOBAL_NOTIFICATIONS) shown_notifications.AddNotification(notification) self.Set(self.Schema.SHOWN_GLOBAL_NOTIFICATIONS, shown_notifications)
def setUp(self): super(CheckAccessHelperTest, self).setUp() self.helper = user_managers.CheckAccessHelper("test") self.subject = rdfvalue.RDFURN("aff4:/some/path")
from grr.lib import access_control from grr.lib import aff4 from grr.lib import rdfvalue from grr.lib import registry from grr.lib.aff4_objects import queue as aff4_queue from grr.lib.aff4_objects import sequential_collection from grr.lib.rdfvalues import structs as rdf_structs from grr.proto import jobs_pb2 class HuntResultNotification(rdf_structs.RDFProtoStruct): protobuf = jobs_pb2.HuntResultNotification RESULT_NOTIFICATION_QUEUE = rdfvalue.RDFURN("aff4:/hunt_results_queue") class HuntResultQueue(aff4_queue.Queue): """A global queue of hunt results which need to be processed.""" rdf_type = HuntResultNotification @classmethod def ClaimNotificationsForCollection(cls, token=None, start_time=None, lease_time=200, collection=None): """Return unclaimed hunt result notifications for collection. Args: