Example #1
0
  def Run(self, force=False):
    """Do the actual work of the Cron. Will first check if DueToRun is True.

    CronJob object must be locked (i.e. opened via OpenWithLock) for Run() to be
    called.

    Args:
      force: If True, the job will run no matter what (i.e. even if DueToRun()
             returns False).

    Raises:
      LockError: if the object is not locked.
    """
    if not self.locked:
      raise aff4.LockError("CronJob must be locked for Run() to be called.")

    if self.KillOldFlows():
      return

    # If currently running flow has finished, update our state.
    current_flow_urn = self.Get(self.Schema.CURRENT_FLOW_URN)
    if current_flow_urn:
      current_flow = aff4.FACTORY.Open(current_flow_urn, token=self.token)
      runner = current_flow.GetRunner()
      if not runner.IsRunning():
        if runner.context.state == rdfvalue.Flow.State.ERROR:
          self.Set(self.Schema.LAST_RUN_STATUS,
                   rdfvalue.CronJobRunStatus(
                       status=rdfvalue.CronJobRunStatus.Status.ERROR))
          stats.STATS.IncrementCounter("cron_job_failure",
                                       fields=[self.urn.Basename()])
        else:
          self.Set(self.Schema.LAST_RUN_STATUS,
                   rdfvalue.CronJobRunStatus(
                       status=rdfvalue.CronJobRunStatus.Status.OK))

          start_time = self.Get(self.Schema.LAST_RUN_TIME)
          elapsed = time.time() - start_time.AsSecondsFromEpoch()
          stats.STATS.RecordEvent("cron_job_latency", elapsed,
                                  fields=[self.urn.Basename()])

        self.DeleteAttribute(self.Schema.CURRENT_FLOW_URN)
        self.Flush()

    if not force and not self.DueToRun():
      return

    cron_args = self.Get(self.Schema.CRON_ARGS)
    flow_urn = flow.GRRFlow.StartFlow(
        runner_args=cron_args.flow_runner_args,
        args=cron_args.flow_args, token=self.token, sync=False)

    self.Set(self.Schema.CURRENT_FLOW_URN, flow_urn)
    self.Set(self.Schema.LAST_RUN_TIME, rdfvalue.RDFDatetime().Now())
    self.Flush()

    flow_link = aff4.FACTORY.Create(self.urn.Add(flow_urn.Basename()),
                                    "AFF4Symlink", token=self.token)
    flow_link.Set(flow_link.Schema.SYMLINK_TARGET(flow_urn))
    flow_link.Close()
Example #2
0
    def DeleteRecords(self, ids):
        """Delete records identified by ids.

    Args:
      ids: A list of ids provided by ClaimRecords.

    Raises:
      LockError: If the queue is not locked.
    """
        if not self.locked:
            raise aff4.LockError("Queue must be locked to delete records.")

        data_store.DB.MultiDeleteAttributes(
            ids, [self.LOCK_ATTRIBUTE, self.VALUE_ATTRIBUTE], token=self.token)
Example #3
0
    def RefreshClaims(self, ids, timeout="30m"):
        """Refreshes claims on records identified by ids.

    Args:
      ids: A list of ids provided by ClaimRecords

      timeout: The new timeout for these claims.

    Raises:
      LockError: If the queue is not locked.

    """
        if not self.locked:
            raise aff4.LockError("Queue must be locked to refresh claims.")

        expiration = rdfvalue.RDFDatetime().Now() + rdfvalue.Duration(timeout)
        for subject in ids:
            data_store.DB.Set(subject,
                              self.LOCK_ATTRIBUTE,
                              expiration,
                              token=self.token,
                              sync=False)
        data_store.DB.Flush()
Example #4
0
  def Compact(self, callback=None, timestamp=None):
    """Compacts versioned attributes into the collection stream.

    Versioned attributes come from the datastore sorted by the timestamp
    in the decreasing order. This is the opposite of what we want in
    the collection (as items in the collection should be in chronological
    order).

    Compact's implementation can handle very large collections that can't
    be reversed in memory. It reads them in batches, reverses every batch
    individually, and then reads batches back in the reversed order and
    write their contents to the collection stream.

    Args:
      callback: An optional function without arguments that gets called
                periodically while processing is done. Useful in flows
                that have to heartbeat.
      timestamp: Only items added before this timestamp will be compacted.

    Raises:
      RuntimeError: if problems are encountered when reading back temporary
                    saved data.

    Returns:
      Number of compacted results.
    """
    if not self.locked:
      raise aff4.LockError("Collection must be locked before compaction.")

    compacted_count = 0

    batches_urns = []
    current_batch = []

    # This timestamp will be used to delete attributes. We don't want
    # to delete anything that was added after we started the compaction.
    freeze_timestamp = timestamp or rdfvalue.RDFDatetime().Now()

    def UpdateIndex():
      seek_index = self.Get(self.Schema.SEEK_INDEX, SeekIndex())

      prev_index_pair = seek_index.checkpoints and seek_index.checkpoints[-1]
      if (not prev_index_pair or
          self.size - prev_index_pair.index_offset >= self.INDEX_INTERVAL):
        new_index_pair = SeekIndexPair(index_offset=self.size,
                                       byte_offset=self.fd.Tell())
        seek_index.checkpoints.Append(new_index_pair)
        self.Set(self.Schema.SEEK_INDEX, seek_index)

    def DeleteVersionedDataAndFlush():
      """Removes versioned attributes and flushes the stream."""
      data_store.DB.DeleteAttributes(self.urn, [self.Schema.DATA.predicate],
                                     end=freeze_timestamp,
                                     token=self.token,
                                     sync=True)
      if self.IsJournalingEnabled():
        journal_entry = self.Schema.COMPACTION_JOURNAL(compacted_count,
                                                       age=freeze_timestamp)
        attrs_to_set = {self.Schema.COMPACTION_JOURNAL: [journal_entry]}
        aff4.FACTORY.SetAttributes(self.urn,
                                   attrs_to_set,
                                   set(),
                                   add_child_index=False,
                                   sync=True,
                                   token=self.token)

      if self.Schema.DATA in self.synced_attributes:
        del self.synced_attributes[self.Schema.DATA]

      self.Flush(sync=True)

    def HeartBeat():
      """Update the lock lease if needed and call the callback."""
      lease_time = config_lib.CONFIG["Worker.compaction_lease_time"]
      if self.CheckLease() < lease_time / 2:
        logging.info("%s: Extending compaction lease.", self.urn)
        self.UpdateLease(lease_time)
        stats.STATS.IncrementCounter("packed_collection_lease_extended")

      if callback:
        callback()

    HeartBeat()

    # We iterate over all versioned attributes. If we get more than
    # self.COMPACTION_BATCH_SIZE, we write the data to temporary
    # stream in the reversed order.
    for _, value, _ in data_store.DB.ResolvePrefix(
        self.urn,
        self.Schema.DATA.predicate,
        token=self.token,
        timestamp=(0, freeze_timestamp)):

      HeartBeat()

      current_batch.append(value)
      compacted_count += 1

      if len(current_batch) >= self.COMPACTION_BATCH_SIZE:
        batch_urn = rdfvalue.RDFURN("aff4:/tmp").Add("%X" %
                                                     utils.PRNG.GetULong())
        batches_urns.append(batch_urn)

        buf = cStringIO.StringIO()
        for data in reversed(current_batch):
          buf.write(struct.pack("<i", len(data)))
          buf.write(data)

        # We use AFF4Image to avoid serializing/deserializing data stored
        # in versioned attributes.
        with aff4.FACTORY.Create(batch_urn,
                                 aff4.AFF4Image,
                                 mode="w",
                                 token=self.token) as batch_stream:
          batch_stream.Write(buf.getvalue())

        current_batch = []

    # If there are no versioned attributes, we have nothing to do.
    if not current_batch and not batches_urns:
      return 0

    # The last batch of results can be written to our collection's stream
    # immediately, because we have to reverse the order of all the data
    # stored in versioned attributes.
    if current_batch:
      buf = cStringIO.StringIO()
      for data in reversed(current_batch):
        buf.write(struct.pack("<i", len(data)))
        buf.write(data)

      self.fd.Seek(0, 2)
      self.fd.Write(buf.getvalue())
      self.stream_dirty = True
      self.size += len(current_batch)
      UpdateIndex()

      # If current_batch was the only available batch, just write everything
      # and return.
      if not batches_urns:
        DeleteVersionedDataAndFlush()
        return compacted_count

    batches = {}
    for batch in aff4.FACTORY.MultiOpen(batches_urns,
                                        aff4_type=aff4.AFF4Image,
                                        token=self.token):
      batches[batch.urn] = batch

    if len(batches_urns) != len(batches):
      raise RuntimeError("Internal inconsistency can't read back all the "
                         "temporary batches.")

    # We read all the temporary batches in reverse order (batches itself
    # were reversed when they were written).
    self.fd.Seek(0, 2)
    for batch_urn in reversed(batches_urns):
      batch = batches[batch_urn]

      HeartBeat()

      data = batch.Read(len(batch))
      self.fd.Write(data)
      self.stream_dirty = True
      self.size += self.COMPACTION_BATCH_SIZE
      UpdateIndex()

      aff4.FACTORY.Delete(batch_urn, token=self.token)

    DeleteVersionedDataAndFlush()

    # Update system-wide stats.
    stats.STATS.IncrementCounter("packed_collection_compacted",
                                 delta=compacted_count)

    return compacted_count
Example #5
0
    def ClaimRecords(self,
                     limit=None,
                     timeout="30m",
                     record_filter=lambda x: False):
        """Returns and claims up to limit unclaimed records for timeout seconds.

    Returns a list of records which are now "claimed", a claimed record will
    generally be unavailable to be claimed until the claim times out. Note
    however that in case of an unexpected timeout or other error a record might
    be claimed twice at the same time. For this reason it should be considered
    weaker than a true lock.

    Args:
      limit: The number of records to claim.

      timeout: The duration of the claim.

      record_filter: A filter method to determine if the record should be
        returned. It will be called serially on each record and the record will
        be filtered (not returned or locked) if it returns True.

    Returns:
      A list (id, record) where record is a self.rdf_type and id is a record
      identifier which can be used to delete or release the record.

    Raises:
      LockError: If the queue is not locked.

    """
        if not self.locked:
            raise aff4.LockError("Queue must be locked to claim records.")

        now = rdfvalue.RDFDatetime().Now()

        results = []

        for subject, values in data_store.DB.ScanAttributes(
                self.urn.Add("Records"),
            [self.VALUE_ATTRIBUTE, self.LOCK_ATTRIBUTE],
                token=self.token):
            if self.VALUE_ATTRIBUTE not in values:
                # Unlikely case, but could happen if, say, a thread called RefreshClaims
                # so late that another thread already deleted the record.
                continue
            if self.LOCK_ATTRIBUTE in values and rdfvalue.RDFDatetime(
                    values[self.LOCK_ATTRIBUTE][1]) > now:
                continue
            rdf_value = self.rdf_type(values[  # pylint: disable=not-callable
                self.VALUE_ATTRIBUTE][1])
            if record_filter(rdf_value):
                continue
            results.append((subject, rdf_value))
            if limit is not None and len(results) == limit:
                break

        expiration = rdfvalue.RDFDatetime().Now() + rdfvalue.Duration(timeout)

        # TODO(user): Add bulk set method to datastore.
        for subject, _ in results:
            data_store.DB.Set(subject,
                              self.LOCK_ATTRIBUTE,
                              expiration,
                              token=self.token,
                              sync=False)
        data_store.DB.Flush()
        return results
Example #6
0
File: queue.py Project: ytisf/grr
    def ClaimRecords(self,
                     limit=10000,
                     timeout="30m",
                     start_time=None,
                     record_filter=lambda x: False,
                     max_filtered=1000):
        """Returns and claims up to limit unclaimed records for timeout seconds.

    Returns a list of records which are now "claimed", a claimed record will
    generally be unavailable to be claimed until the claim times out. Note
    however that in case of an unexpected timeout or other error a record might
    be claimed twice at the same time. For this reason it should be considered
    weaker than a true lock.

    Args:
      limit: The number of records to claim.

      timeout: The duration of the claim.

      start_time: The time to start claiming records at. Only records with a
        timestamp after this point will be claimed.

      record_filter: A filter method to determine if the record should be
        returned. It will be called serially on each record and the record will
        be filtered (not returned or locked) if it returns True.

      max_filtered: If non-zero, limits the number of results read when
        filtered. Specifically, if max_filtered filtered results are read
        sequentially without any unfiltered results, we stop looking for
        results.

    Returns:
      A list (id, record) where record is a self.rdf_type and id is a record
      identifier which can be used to delete or release the record.

    Raises:
      LockError: If the queue is not locked.

    """
        if not self.locked:
            raise aff4.LockError("Queue must be locked to claim records.")

        now = rdfvalue.RDFDatetime.Now()

        after_urn = None
        if start_time:
            after_urn = self._MakeURN(self.urn,
                                      start_time.AsMicroSecondsFromEpoch(), 0)
        results = []

        filtered_count = 0

        for subject, values in data_store.DB.ScanAttributes(
                self.urn.Add("Records"),
            [self.VALUE_ATTRIBUTE, self.LOCK_ATTRIBUTE],
                max_records=4 * limit,
                after_urn=after_urn,
                token=self.token):
            if self.VALUE_ATTRIBUTE not in values:
                # Unlikely case, but could happen if, say, a thread called RefreshClaims
                # so late that another thread already deleted the record. Go ahead and
                # clean this up.
                data_store.DB.DeleteAttributes(subject, [self.LOCK_ATTRIBUTE],
                                               token=self.token)
                continue
            if self.LOCK_ATTRIBUTE in values:
                timestamp = rdfvalue.RDFDatetime.FromSerializedString(
                    values[self.LOCK_ATTRIBUTE][1])
                if timestamp > now:
                    continue
            rdf_value = self.rdf_type.FromSerializedString(
                values[self.VALUE_ATTRIBUTE][1])
            if record_filter(rdf_value):
                filtered_count += 1
                if max_filtered and filtered_count >= max_filtered:
                    break
                continue
            results.append((subject, rdf_value))
            filtered_count = 0
            if len(results) >= limit:
                break

        expiration = rdfvalue.RDFDatetime.Now() + rdfvalue.Duration(timeout)

        with data_store.DB.GetMutationPool(token=self.token) as mutation_pool:
            for subject, _ in results:
                mutation_pool.Set(subject, self.LOCK_ATTRIBUTE, expiration)
        return results