Example #1
0
  def ClaimRecords(self,
                   limit=10000,
                   timeout="30m",
                   start_time=None,
                   record_filter=lambda x: False,
                   max_filtered=1000):
    """Returns and claims up to limit unclaimed records for timeout seconds.

    Returns a list of records which are now "claimed", a claimed record will
    generally be unavailable to be claimed until the claim times out. Note
    however that in case of an unexpected timeout or other error a record might
    be claimed twice at the same time. For this reason it should be considered
    weaker than a true lock.

    Args:
      limit: The number of records to claim.

      timeout: The duration of the claim.

      start_time: The time to start claiming records at. Only records with a
        timestamp after this point will be claimed.

      record_filter: A filter method to determine if the record should be
        returned. It will be called serially on each record and the record will
        be filtered (not returned or locked) if it returns True.

      max_filtered: If non-zero, limits the number of results read when
        filtered. Specifically, if max_filtered filtered results are read
        sequentially without any unfiltered results, we stop looking for
        results.

    Returns:
      A list (id, record) where record is a self.rdf_type and id is a record
      identifier which can be used to delete or release the record.

    Raises:
      LockError: If the queue is not locked.

    """
    if not self.locked:
      raise aff4.LockError("Queue must be locked to claim records.")

    with data_store.DB.GetMutationPool() as mutation_pool:
      return mutation_pool.QueueClaimRecords(
          self.urn,
          self.rdf_type,
          limit=limit,
          timeout=timeout,
          start_time=start_time,
          record_filter=record_filter,
          max_filtered=max_filtered)
Example #2
0
  def Run(self, force=False):
    """Do the actual work of the Cron. Will first check if DueToRun is True.

    CronJob object must be locked (i.e. opened via OpenWithLock) for Run() to be
    called.

    Args:
      force: If True, the job will run no matter what (i.e. even if DueToRun()
             returns False).

    Raises:
      LockError: if the object is not locked.
    """
    if not self.locked:
      raise aff4.LockError("CronJob must be locked for Run() to be called.")

    self.KillOldFlows()

    # If currently running flow has finished, update our state.
    current_flow_urn = self.Get(self.Schema.CURRENT_FLOW_URN)
    if current_flow_urn:
      current_flow = aff4.FACTORY.Open(current_flow_urn, token=self.token)
      runner = current_flow.GetRunner()
      if not runner.IsRunning():
        if runner.context.state == rdf_flow_runner.FlowContext.State.ERROR:
          self.Set(
              self.Schema.LAST_RUN_STATUS,
              rdf_cronjobs.CronJobRunStatus(
                  status=rdf_cronjobs.CronJobRunStatus.Status.ERROR))
          stats.STATS.IncrementCounter(
              "cron_job_failure", fields=[self.urn.Basename()])
        else:
          self.Set(
              self.Schema.LAST_RUN_STATUS,
              rdf_cronjobs.CronJobRunStatus(
                  status=rdf_cronjobs.CronJobRunStatus.Status.OK))

          start_time = self.Get(self.Schema.LAST_RUN_TIME)
          elapsed = time.time() - start_time.AsSecondsSinceEpoch()
          stats.STATS.RecordEvent(
              "cron_job_latency", elapsed, fields=[self.urn.Basename()])

        self.DeleteAttribute(self.Schema.CURRENT_FLOW_URN)
        self.Flush()

    if not force and not self.DueToRun():
      return

    # Make sure the flow is created with cron job as a parent folder.
    cron_args = self.Get(self.Schema.CRON_ARGS)
    cron_args.flow_runner_args.base_session_id = self.urn

    flow_urn = flow.StartFlow(
        runner_args=cron_args.flow_runner_args,
        args=cron_args.flow_args,
        token=self.token,
        sync=False)

    self.Set(self.Schema.CURRENT_FLOW_URN, flow_urn)
    self.Set(self.Schema.LAST_RUN_TIME, rdfvalue.RDFDatetime.Now())
    self.Flush()