Beispiel #1
0
def _GetHWInfos(client_list, batch_size=10000, token=None):
    """Opens the given clients in batches and returns hardware information."""

    # This function returns a dict mapping each client_id to a set of reported
    # hardware serial numbers reported by this client.
    hw_infos = {}

    logging.info("%d clients to process.", len(client_list))

    c = 0

    for batch in utils.Grouper(client_list, batch_size):
        logging.info("Processing batch: %d-%d", c, c + batch_size)
        c += len(batch)

        client_objs = aff4.FACTORY.MultiOpen(batch,
                                             age=aff4.ALL_TIMES,
                                             token=token)

        for client in client_objs:
            hwi = client.GetValuesForAttribute(client.Schema.HARDWARE_INFO)

            hw_infos[client.urn] = set(["%s" % x.serial_number for x in hwi])

    return hw_infos
    def _GenerateConvertedValues(self, converter, grr_messages):
        """Generates converted values using given converter from given messages.

    Groups values in batches of BATCH_SIZE size and applies the converter
    to each batch.

    Args:
      converter: ExportConverter instance.
      grr_messages: An iterable (a generator is assumed) with GRRMessage values.

    Yields:
      Values generated by the converter.

    Raises:
      ValueError: if any of the GrrMessage objects doesn't have "source" set.
    """
        for batch in utils.Grouper(grr_messages, self.BATCH_SIZE):
            metadata_items = self._GetMetadataForClients(
                [gm.source for gm in batch])
            batch_with_metadata = zip(metadata_items,
                                      [gm.payload for gm in batch])

            for result in converter.BatchConvert(batch_with_metadata,
                                                 token=self.token):
                yield result
Beispiel #3
0
    def Start(self):
        """Retrieve all the clients for the AbstractClientStatsCollectors."""
        self.stats = aff4.FACTORY.Create(self.FILESTORE_STATS_URN,
                                         aff4_stats.FilestoreStats,
                                         mode="w",
                                         token=self.token)

        self._CreateConsumers()
        hashes = aff4.FACTORY.Open(self.HASH_PATH,
                                   token=self.token).ListChildren(limit=10**8)

        try:
            for urns in utils.Grouper(hashes, self.OPEN_FILES_LIMIT):
                for fd in aff4.FACTORY.MultiOpen(urns,
                                                 mode="r",
                                                 token=self.token,
                                                 age=aff4.NEWEST_TIME):

                    for consumer in self.consumers:
                        consumer.ProcessFile(fd)
                self.HeartBeat()

        finally:
            for consumer in self.consumers:
                consumer.Save(self.stats)
            self.stats.Close()
Beispiel #4
0
  def Execute(self, thread_count):
    """Runs the migration with a given thread count."""

    blob_urns = list(aff4.FACTORY.ListChildren("aff4:/blobs"))
    sys.stdout.write("Blobs to migrate: {}\n".format(len(blob_urns)))
    sys.stdout.write("Threads to use: {}\n".format(thread_count))

    self._total_count = len(blob_urns)
    self._migrated_count = 0
    self._start_time = rdfvalue.RDFDatetime.Now()

    batches = utils.Grouper(blob_urns, _BLOB_BATCH_SIZE)

    self._Progress()
    tp = pool.ThreadPool(processes=thread_count)
    tp.map(self._MigrateBatch, list(batches))
    self._Progress()

    if self._migrated_count == self._total_count:
      message = "\nMigration has been finished (migrated {} blobs).\n".format(
          self._migrated_count)
      sys.stdout.write(message)
    else:
      message = "Not all blobs have been migrated ({}/{})".format(
          self._migrated_count, self._total_count)
      raise AssertionError(message)
Beispiel #5
0
def AddFileWithUnknownHash(blob_ids):
    """Add a new file consisting of given blob IDs."""

    blob_refs = []
    offset = 0
    sha256 = hashlib.sha256()
    for blob_ids_batch in utils.Grouper(blob_ids, _BLOBS_READ_BATCH_SIZE):
        unique_ids = set(blob_ids_batch)
        data = data_store.REL_DB.ReadBlobs(unique_ids)
        for k, v in iteritems(data):
            if v is None:
                raise BlobNotFound(
                    "Couldn't find one of referenced blobs: %s" % k)

        for blob_id in blob_ids_batch:
            blob_data = data[blob_id]
            blob_refs.append(
                rdf_objects.BlobReference(
                    offset=offset,
                    size=len(blob_data),
                    blob_id=blob_id,
                ))
            offset += len(blob_data)

            sha256.update(blob_data)

    hash_id = rdf_objects.SHA256HashID.FromBytes(sha256.digest())
    data_store.REL_DB.WriteHashBlobReferences({hash_id: blob_refs})

    return hash_id
Beispiel #6
0
  def MigrateClients(self, client_urns):
    """Migrates entire VFS of given client list to the relational data store."""
    self._start_time = rdfvalue.RDFDatetime.Now()

    self._client_urns_to_migrate = client_urns
    self._client_urns_migrated = []
    self._client_urns_failed = []

    to_migrate_count = len(self._client_urns_to_migrate)
    sys.stdout.write("Clients to migrate: {}\n".format(to_migrate_count))

    batches = utils.Grouper(client_urns, self.client_batch_size)

    tp = pool.ThreadPool(processes=self.thread_count)
    tp.map(self.MigrateClientBatch, list(batches))

    migrated_count = len(self._client_urns_migrated)
    sys.stdout.write("Migrated clients: {}\n".format(migrated_count))

    if to_migrate_count == migrated_count:
      sys.stdout.write("All clients migrated successfully!\n")
    else:
      message = "Not all clients have been migrated ({}/{})".format(
          migrated_count, to_migrate_count)
      raise RuntimeError(message)
Beispiel #7
0
    def Stop(self, reason=None):
        super(GenericHunt, self).Stop(reason=reason)

        started_flows = grr_collections.RDFUrnCollection(
            self.started_flows_collection_urn)

        num_terminated_flows = 0
        self.Log("Hunt stop. Terminating all the started flows.")

        # Delete hunt flows states.
        for flows_batch in utils.Grouper(started_flows,
                                         self.__class__.STOP_BATCH_SIZE):
            with queue_manager.QueueManager(token=self.token) as manager:
                manager.MultiDestroyFlowStates(flows_batch)

            with data_store.DB.GetMutationPool() as mutation_pool:
                for f in flows_batch:
                    flow.GRRFlow.MarkForTermination(
                        f,
                        reason="Parent hunt stopped.",
                        mutation_pool=mutation_pool)

            num_terminated_flows += len(flows_batch)

        # Delete hunt's requests and responses to ensure no more
        # processing is going to occur.
        with queue_manager.QueueManager(token=self.token) as manager:
            manager.DestroyFlowStates(self.session_id)

        self.Log("%d flows terminated.", num_terminated_flows)
Beispiel #8
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        yield self.archive_generator.WriteFileHeader(
            "%s/%s/from_%s.yaml" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))
        yield self.archive_generator.WriteFileChunk(
            _SerializeToYaml(first_value))
        counter = 1
        for batch in utils.Grouper(exported_values, self.ROW_BATCH):
            counter += len(batch)
            # TODO(hanuszczak): YAML is supposed to be a unicode file format so we
            # should use `StringIO` here instead. However, because PyYAML dumps to
            # `bytes` instead of `unicode` we have to use `BytesIO`. It should be
            # investigated whether there is a way to adjust behaviour of PyYAML.
            buf = io.BytesIO()
            for value in batch:
                buf.write(b"\n")
                buf.write(_SerializeToYaml(value))

            yield self.archive_generator.WriteFileChunk(buf.getvalue())
        yield self.archive_generator.WriteFileFooter()

        counts_for_original_type = self.export_counts.setdefault(
            original_value_type.__name__, dict())
        counts_for_original_type[first_value.__class__.__name__] = counter
Beispiel #9
0
  def ProcessOneHunt(self, exceptions_by_hunt):
    """Reads results for one hunt and process them."""
    hunt_results_urn, results = (
        hunts_results.HuntResultQueue.ClaimNotificationsForCollection(
            token=self.token, lease_time=self.lifetime))
    logging.debug("Found %d results for hunt %s", len(results),
                  hunt_results_urn)
    if not results:
      return 0

    hunt_urn = rdfvalue.RDFURN(hunt_results_urn.Dirname())
    batch_size = self.BATCH_SIZE
    metadata_urn = hunt_urn.Add("ResultsMetadata")
    exceptions_by_plugin = {}
    num_processed_for_hunt = 0
    collection_obj = implementation.GRRHunt.ResultCollectionForHID(hunt_urn)
    try:
      with aff4.FACTORY.OpenWithLock(
          metadata_urn, lease_time=600, token=self.token) as metadata_obj:
        all_plugins, used_plugins = self.LoadPlugins(metadata_obj)
        num_processed = int(
            metadata_obj.Get(metadata_obj.Schema.NUM_PROCESSED_RESULTS))
        for batch in utils.Grouper(results, batch_size):
          results = list(
              collection_obj.MultiResolve(
                  [r.value.ResultRecord() for r in batch]))
          self.RunPlugins(hunt_urn, used_plugins, results, exceptions_by_plugin)

          hunts_results.HuntResultQueue.DeleteNotifications(
              batch, token=self.token)
          num_processed += len(batch)
          num_processed_for_hunt += len(batch)
          self.HeartBeat()
          metadata_obj.Set(
              metadata_obj.Schema.NUM_PROCESSED_RESULTS(num_processed))
          metadata_obj.UpdateLease(600)
          if self.CheckIfRunningTooLong():
            logging.warning("Run too long, stopping.")
            break

        metadata_obj.Set(metadata_obj.Schema.OUTPUT_PLUGINS(all_plugins))
        metadata_obj.Set(
            metadata_obj.Schema.NUM_PROCESSED_RESULTS(num_processed))
    except aff4.LockError:
      logging.warn(
          "ProcessHuntResultCollectionsCronFlow: "
          "Could not get lock on hunt metadata %s.", metadata_urn)
      return 0

    if exceptions_by_plugin:
      for plugin, exceptions in iteritems(exceptions_by_plugin):
        exceptions_by_hunt.setdefault(hunt_urn, {}).setdefault(
            plugin, []).extend(exceptions)

    logging.debug("Processed %d results.", num_processed_for_hunt)
    return len(results)
Beispiel #10
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        if not isinstance(first_value, rdf_structs.RDFProtoStruct):
            raise ValueError("The SQLite plugin only supports export-protos")
        yield self.archive_generator.WriteFileHeader(
            "%s/%s_from_%s.sql" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))
        table_name = "%s.from_%s" % (first_value.__class__.__name__,
                                     original_value_type.__name__)
        schema = self._GetSqliteSchema(first_value.__class__)

        # We will buffer the sql statements into an in-memory sql database before
        # dumping them to the zip archive. We rely on the PySQLite library for
        # string escaping.
        db_connection = sqlite3.connect(":memory:")
        db_cursor = db_connection.cursor()

        yield self.archive_generator.WriteFileChunk("BEGIN TRANSACTION;\n")
        with db_connection:
            buf = io.StringIO()
            buf.write(u"CREATE TABLE \"%s\" (\n  " % table_name)
            column_types = [(k, v.sqlite_type) for k, v in iteritems(schema)]
            buf.write(u",\n  ".join(
                [u"\"%s\" %s" % (k, v) for k, v in column_types]))
            buf.write(u"\n);")
            db_cursor.execute(buf.getvalue())
            yield self.archive_generator.WriteFileChunk(buf.getvalue() + u"\n")
            self._InsertValueIntoDb(table_name, schema, first_value, db_cursor)

        for sql in self._FlushAllRows(db_connection, table_name):
            yield sql
        counter = 1
        for batch in utils.Grouper(exported_values, self.ROW_BATCH):
            counter += len(batch)
            with db_connection:
                for value in batch:
                    self._InsertValueIntoDb(table_name, schema, value,
                                            db_cursor)
            for sql in self._FlushAllRows(db_connection, table_name):
                yield sql

        db_connection.close()
        yield self.archive_generator.WriteFileChunk("COMMIT;\n")
        yield self.archive_generator.WriteFileFooter()

        counts_for_original_type = self.export_counts.setdefault(
            original_value_type.__name__, dict())
        counts_for_original_type[first_value.__class__.__name__] = counter
Beispiel #11
0
def CleanVacuousVersions(clients=None, dry_run=True):
    """A script to remove no-op client versions.

  This script removes versions of a client when it is identical to the previous,
  in the sense that no versioned attributes were changed since the previous
  client version.

  Args:
    clients: A list of ClientURN, if empty cleans all clients.
    dry_run: whether this is a dry run
  """

    if not clients:
        index = client_index.CreateClientIndex()
        clients = index.LookupClients(["."])
    clients.sort()
    with data_store.DB.GetMutationPool() as pool:

        logging.info("checking %d clients", len(clients))
        for batch in utils.Grouper(clients, 10000):
            # TODO(amoser): This only works on datastores that use the Bigtable
            # scheme.
            client_infos = data_store.DB.MultiResolvePrefix(
                batch, ["aff4:", "aff4:"], data_store.DB.ALL_TIMESTAMPS)

            for client, type_list in client_infos:
                cleared = 0
                kept = 0
                updates = []
                for a, _, ts in type_list:
                    if ts != 0:
                        updates.append((ts, a))
                updates = sorted(updates)
                dirty = True
                for ts, a in updates:
                    if a == "aff4:type":
                        if dirty:
                            kept += 1
                            dirty = False
                        else:
                            cleared += 1
                            if not dry_run:
                                pool.DeleteAttributes(client, ["aff4:type"],
                                                      start=ts,
                                                      end=ts)
                                if pool.Size() > 1000:
                                    pool.Flush()
                    else:
                        dirty = True
                logging.info("%s: kept %d and cleared %d", client, kept,
                             cleared)
Beispiel #12
0
 def GetInput(self):
     """Yield client urns."""
     client_list = GetAllClients(token=self.token)
     logging.debug("Got %d clients", len(client_list))
     for client_group in utils.Grouper(client_list, self.client_chunksize):
         for fd in aff4.FACTORY.MultiOpen(client_group,
                                          mode="r",
                                          aff4_type=aff4_grr.VFSGRRClient,
                                          token=self.token):
             if isinstance(fd, aff4_grr.VFSGRRClient):
                 # Skip if older than max_age
                 oldest_time = (time.time() - self.max_age) * 1e6
             if fd.Get(aff4_grr.VFSGRRClient.SchemaCls.PING) >= oldest_time:
                 yield fd
Beispiel #13
0
    def Run(self):
        self.start = 0
        self.end = int(1e6 * (time.time() - self.MAX_AGE))

        client_urns = export_utils.GetAllClients(token=self.token)

        for batch in utils.Grouper(client_urns, 10000):
            with data_store.DB.GetMutationPool() as mutation_pool:
                for client_urn in batch:
                    mutation_pool.DeleteAttributes(client_urn.Add("stats"),
                                                   [u"aff4:stats"],
                                                   start=self.start,
                                                   end=self.end)
            self.HeartBeat()
Beispiel #14
0
def _IterateAllClients():
    """Fetches client data from the relational db."""
    all_client_ids = data_store.REL_DB.ReadAllClientIDs()
    for batch in utils.Grouper(all_client_ids, CLIENT_READ_BATCH_SIZE):
        client_map = data_store.REL_DB.MultiReadClientFullInfo(batch)
        fs_client_ids = [
            cid for (cid, client) in iteritems(client_map)
            if client.metadata.fleetspeak_enabled
        ]
        last_contact_times = _GetLastContactFromFleetspeak(fs_client_ids)
        for cid, last_contact in iteritems(last_contact_times):
            client_map[cid].metadata.ping = last_contact
        for client in itervalues(client_map):
            yield client
Beispiel #15
0
    def _MultiStream(cls, fds):
        """Effectively streams data from multiple opened BlobImage objects.

    Args:
      fds: A list of opened AFF4Stream (or AFF4Stream descendants) objects.

    Yields:
      Tuples (chunk, fd, exception) where chunk is a binary blob of data and fd
      is an object from the fds argument.

      If one or more chunks are missing, exception is a MissingBlobsError object
      and chunk is None. _MultiStream does its best to skip the file entirely if
      one of its chunks is missing, but in case of very large files it's still
      possible to yield a truncated file.
    """

        broken_fds = set()
        missing_blobs_fd_pairs = []
        for chunk_fd_pairs in utils.Grouper(
                cls._GenerateChunkIds(fds),
                cls.MULTI_STREAM_CHUNKS_READ_AHEAD):
            chunk_fds = list(map(operator.itemgetter(0), chunk_fd_pairs))
            results_map = data_store.DB.ReadBlobs(chunk_fds,
                                                  token=fds[0].token)

            for chunk_id, fd in chunk_fd_pairs:
                if chunk_id not in results_map or results_map[chunk_id] is None:
                    missing_blobs_fd_pairs.append((chunk_id, fd))
                    broken_fds.add(fd)

            for chunk, fd in chunk_fd_pairs:
                if fd in broken_fds:
                    continue

                yield fd, results_map[chunk], None

        if missing_blobs_fd_pairs:
            missing_blobs_by_fd = {}
            for chunk_id, fd in missing_blobs_fd_pairs:
                missing_blobs_by_fd.setdefault(fd, []).append(chunk_id)

            for fd, missing_blobs in iteritems(missing_blobs_by_fd):
                e = MissingBlobsError("%d missing blobs (multi-stream)" %
                                      len(missing_blobs),
                                      missing_chunks=missing_blobs)
                yield fd, None, e
Beispiel #16
0
def _IterateAllLegacyClients(token):
  """Fetches client data from the legacy db."""
  root_children = aff4.FACTORY.Open(
      aff4.ROOT_URN, token=token).OpenChildren(mode="r")
  for batch in utils.Grouper(root_children, CLIENT_READ_BATCH_SIZE):
    fs_client_map = {}
    non_fs_clients = []
    for child in batch:
      if not isinstance(child, aff4_grr.VFSGRRClient):
        continue
      if child.Get(child.Schema.FLEETSPEAK_ENABLED):
        fs_client_map[child.urn.Basename()] = child
      else:
        non_fs_clients.append(child)
    last_contact_times = _GetLastContactFromFleetspeak(viewkeys(fs_client_map))
    for client in non_fs_clients:
      yield client.Get(client.Schema.PING), client
    for cid, client in iteritems(fs_client_map):
      last_contact = last_contact_times.get(cid, client.Get(client.Schema.PING))
      yield last_contact, client
Beispiel #17
0
  def Convert(self, values, start_index=0, end_index=None):
    """Converts given collection to exported values.

    This method uses a threadpool to do the conversion in parallel. It
    blocks until everything is converted.

    Args:
      values: Iterable object with values to convert.
      start_index: Start from this index in the collection.
      end_index: Finish processing on the (index - 1) element of the
                 collection. If None, work till the end of the collection.

    Returns:
      Nothing. ConvertedBatch() should handle the results.
    """
    if not values:
      return

    try:
      total_batch_count = len(values) // self.batch_size
    except TypeError:
      total_batch_count = -1

    pool = ThreadPool.Factory(self.threadpool_prefix, self.threadpool_size)
    val_iterator = itertools.islice(values, start_index, end_index)

    pool.Start()
    try:
      for batch_index, batch in enumerate(
          utils.Grouper(val_iterator, self.batch_size)):
        logging.debug("Processing batch %d out of %d", batch_index,
                      total_batch_count)

        pool.AddTask(
            target=self.ConvertBatch,
            args=(batch,),
            name="batch_%d" % batch_index,
            inline=False)

    finally:
      pool.Stop()
Beispiel #18
0
    def MigrateClient(self, client_urn):
        """Migrates entire VFS of given client to the relational data store."""
        vfs = ListVfs(client_urn)

        path_infos = []

        for vfs_urn in vfs:
            _, vfs_path = vfs_urn.Split(2)
            path_type, components = rdf_objects.ParseCategorizedPath(vfs_path)

            path_info = rdf_objects.PathInfo(path_type=path_type,
                                             components=components)
            path_infos.append(path_info)

        data_store.REL_DB.InitPathInfos(client_urn.Basename(), path_infos)

        for vfs_group in utils.Grouper(vfs, self.vfs_group_size):
            stat_entries = dict()
            hash_entries = dict()

            for fd in aff4.FACTORY.MultiOpen(vfs_group, age=aff4.ALL_TIMES):
                _, vfs_path = fd.urn.Split(2)
                path_type, components = rdf_objects.ParseCategorizedPath(
                    vfs_path)
                path_info = rdf_objects.PathInfo(path_type=path_type,
                                                 components=components)

                for stat_entry in fd.GetValuesForAttribute(fd.Schema.STAT):
                    stat_path_info = path_info.Copy()
                    stat_path_info.timestamp = stat_entry.age
                    stat_entries[stat_path_info] = stat_entry

                for hash_entry in fd.GetValuesForAttribute(fd.Schema.HASH):
                    hash_path_info = path_info.Copy()
                    hash_path_info.timestamp = hash_entry.age
                    hash_entries[hash_path_info] = hash_entry

            data_store.REL_DB.MultiWritePathHistory(client_urn.Basename(),
                                                    stat_entries, hash_entries)
Beispiel #19
0
    def Execute(self, thread_count):
        """Runs the migration procedure.

    Args:
      thread_count: A number of threads to execute the migration with.

    Raises:
      AssertionError: If not all clients have been migrated.
      ValueError: If the relational database backend is not available.
    """
        if not data_store.RelationalDBWriteEnabled():
            raise ValueError("No relational database available.")

        sys.stdout.write("Collecting clients...\n")
        client_urns = _GetClientUrns()

        sys.stdout.write("Clients to migrate: {}\n".format(len(client_urns)))
        sys.stdout.write("Threads to use: {}\n".format(thread_count))

        self._total_count = len(client_urns)
        self._migrated_count = 0
        self._start_time = rdfvalue.RDFDatetime.Now()

        batches = utils.Grouper(client_urns, _CLIENT_BATCH_SIZE)

        self._Progress()
        tp = pool.ThreadPool(processes=thread_count)
        tp.map(self._MigrateBatch, list(batches))
        self._Progress()

        if self._migrated_count == self._total_count:
            message = "\nMigration has been finished (migrated {} clients).\n".format(
                self._migrated_count)
            sys.stdout.write(message)
        else:
            message = "Not all clients have been migrated ({}/{})".format(
                self._migrated_count, self._total_count)
            raise AssertionError(message)
Beispiel #20
0
    def ProcessSingleTypeExportedValues(self, original_value_type,
                                        exported_values):
        first_value = next(exported_values, None)
        if not first_value:
            return

        yield self.archive_generator.WriteFileHeader(
            "%s/%s/from_%s.csv" %
            (self.path_prefix, first_value.__class__.__name__,
             original_value_type.__name__))

        writer = utils.CsvWriter()
        # Write the CSV header based on first value class and write
        # the first value itself. All other values are guaranteed
        # to have the same class (see ProcessSingleTypeExportedValues definition).
        writer.WriteRow(self._GetCSVHeader(first_value.__class__))
        writer.WriteRow(self._GetCSVRow(first_value))

        chunk = writer.Content().encode("utf-8")
        yield self.archive_generator.WriteFileChunk(chunk)

        # Counter starts from 1, as 1 value has already been written.
        counter = 1
        for batch in utils.Grouper(exported_values, self.ROW_BATCH):
            counter += len(batch)

            writer = utils.CsvWriter()
            for value in batch:
                writer.WriteRow(self._GetCSVRow(value))

            chunk = writer.Content().encode("utf-8")
            yield self.archive_generator.WriteFileChunk(chunk)

        yield self.archive_generator.WriteFileFooter()

        self.export_counts.setdefault(
            original_value_type.__name__,
            dict())[first_value.__class__.__name__] = counter
Beispiel #21
0
    def CleanAff4Clients(self):
        """Cleans up old client data from aff4."""

        inactive_client_ttl = config.CONFIG[
            "DataRetention.inactive_client_ttl"]
        if not inactive_client_ttl:
            self.Log("TTL not set - nothing to do...")
            return

        exception_label = config.CONFIG[
            "DataRetention.inactive_client_ttl_exception_label"]

        index = client_index.CreateClientIndex(token=self.token)

        client_urns = index.LookupClients(["."])

        deadline = rdfvalue.RDFDatetime.Now() - inactive_client_ttl
        deletion_count = 0

        for client_group in utils.Grouper(client_urns, 1000):
            inactive_client_urns = []
            for client in aff4.FACTORY.MultiOpen(
                    client_group,
                    mode="r",
                    aff4_type=aff4_grr.VFSGRRClient,
                    token=self.token):
                if exception_label in client.GetLabelsNames():
                    continue

                if client.Get(client.Schema.LAST) < deadline:
                    inactive_client_urns.append(client.urn)

            aff4.FACTORY.MultiDelete(inactive_client_urns, token=self.token)
            deletion_count += len(inactive_client_urns)
            self.HeartBeat()

        self.Log("Deleted %d inactive clients." % deletion_count)
Beispiel #22
0
    def Handle(self, args, token=None):
        if args.count:
            end = args.offset + args.count
            # Read <count> clients ahead in case some of them fail to open / verify.
            batch_size = end + args.count
        else:
            end = sys.maxsize
            batch_size = end

        keywords = shlex.split(args.query)
        api_clients = []

        if data_store.RelationalDBReadEnabled():
            index = client_index.ClientIndex()

            # TODO(amoser): We could move the label verification into the
            # database making this method more efficient. Label restrictions
            # should be on small subsets though so this might not be worth
            # it.
            all_client_ids = set()
            for label in self.labels_whitelist:
                label_filter = ["label:" + label] + keywords
                all_client_ids.update(index.LookupClients(label_filter))

            index = 0
            for cid_batch in utils.Grouper(sorted(all_client_ids), batch_size):
                client_infos = data_store.REL_DB.MultiReadClientFullInfo(
                    cid_batch)

                for _, client_info in sorted(iteritems(client_infos)):
                    if not self._VerifyLabels(client_info.labels):
                        continue
                    if index >= args.offset and index < end:
                        api_clients.append(
                            ApiClient().InitFromClientInfo(client_info))
                    index += 1
                    if index >= end:
                        UpdateClientsFromFleetspeak(api_clients)
                        return ApiSearchClientsResult(items=api_clients)

        else:
            index = client_index.CreateClientIndex(token=token)
            all_urns = set()
            for label in self.labels_whitelist:
                label_filter = ["label:" + label] + keywords
                all_urns.update(index.LookupClients(label_filter))

            all_objs = aff4.FACTORY.MultiOpen(all_urns,
                                              aff4_type=aff4_grr.VFSGRRClient,
                                              token=token)

            index = 0
            for client_obj in sorted(all_objs):
                if not self._CheckClientLabels(client_obj):
                    continue
                if index >= args.offset and index < end:
                    api_clients.append(
                        ApiClient().InitFromAff4Object(client_obj))

                index += 1
                if index >= end:
                    break

        UpdateClientsFromFleetspeak(api_clients)
        return ApiSearchClientsResult(items=api_clients)
Beispiel #23
0
    def Generate(self, collection, token=None):
        """Generates archive from a given collection.

    Iterates the collection and generates an archive by yielding contents
    of every referenced AFF4Stream.

    Args:
      collection: Iterable with items that point to aff4 paths.
      token: User's ACLToken.

    Yields:
      Binary chunks comprising the generated archive.
    """
        clients = set()
        for fd_urn_batch in utils.Grouper(self._ItemsToUrns(collection),
                                          self.BATCH_SIZE):

            fds_to_write = {}
            for fd in aff4.FACTORY.MultiOpen(fd_urn_batch, token=token):
                self.total_files += 1

                if not self.predicate(fd):
                    self.ignored_files.append(utils.SmartUnicode(fd.urn))
                    continue

                # Any file-like object with data in AFF4 should inherit AFF4Stream.
                if isinstance(fd, aff4.AFF4Stream):
                    urn_components = fd.urn.Split()
                    clients.add(rdf_client.ClientURN(urn_components[0]))

                    content_path = os.path.join(self.prefix, *urn_components)
                    self.archived_files += 1

                    # Make sure size of the original file is passed. It's required
                    # when output_writer is StreamingTarWriter.
                    st = os.stat_result(
                        (0o644, 0, 0, 0, 0, 0, fd.size, 0, 0, 0))
                    fds_to_write[fd] = (content_path, st)

            if fds_to_write:
                prev_fd = None
                for fd, chunk, exception in aff4.AFF4Stream.MultiStream(
                        fds_to_write):
                    if exception:
                        logging.exception(exception)

                        self.archived_files -= 1
                        self.failed_files.append(utils.SmartUnicode(fd.urn))
                        continue

                    if prev_fd != fd:
                        if prev_fd:
                            yield self.archive_generator.WriteFileFooter()
                        prev_fd = fd

                        content_path, st = fds_to_write[fd]
                        yield self.archive_generator.WriteFileHeader(
                            content_path, st=st)

                    yield self.archive_generator.WriteFileChunk(chunk)

                if self.archive_generator.is_file_write_in_progress:
                    yield self.archive_generator.WriteFileFooter()

        if clients:
            for client_urn_batch in utils.Grouper(clients, self.BATCH_SIZE):
                for fd in aff4.FACTORY.MultiOpen(
                        client_urn_batch,
                        aff4_type=aff4_grr.VFSGRRClient,
                        token=token):
                    for chunk in self._GenerateClientInfo(fd):
                        yield chunk

        for chunk in self._GenerateDescription():
            yield chunk

        yield self.archive_generator.Close()
Beispiel #24
0
 def _MigrateVfsUrns(self, vfs_urns):
   """Migrates history of given list of VFS URNs."""
   for group in utils.Grouper(vfs_urns, self.history_vfs_group_size):
     self._MigrateVfsUrnGroup(group)
Beispiel #25
0
    def MigrateClients(self, client_urns):
        """Migrates entire VFS of given client list to the relational data store."""
        batches = utils.Grouper(client_urns, self.client_batch_size)

        tp = pool.ThreadPool(processes=self.thread_count)
        tp.map(self.MigrateClientBatch, list(batches))