Beispiel #1
0
    def testStuckNotificationGetsDeletedAfterTheFlowIsTerminated(self):
        worker_obj = self._TestWorker()
        initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100)
        stuck_flows_timeout = flow_runner.FlowRunner.stuck_flows_timeout

        try:
            with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()):
                session_id = flow.StartAFF4Flow(
                    flow_name=WorkerStuckableTestFlow.__name__,
                    client_id=self.client_id,
                    token=self.token,
                    sync=False)

                # Process all messages
                worker_obj.RunOnce()
                # Wait until worker thread starts processing the flow.
                WorkerStuckableTestFlow.WaitUntilWorkerStartsProcessing()

            # Set the time to max worker flow duration + 1 minute. The flow is
            # currently blocked because of the way how semaphores are set up.
            # Worker should consider the flow to be stuck and terminate it.
            future_time = (initial_time + rdfvalue.Duration("1m") +
                           stuck_flows_timeout)
            with test_lib.FakeTime(future_time.AsSecondsSinceEpoch()):
                worker_obj.RunOnce()

            killed_flow = aff4.FACTORY.Open(session_id, token=self.token)
            self.assertEqual(killed_flow.context.state,
                             rdf_flow_runner.FlowContext.State.ERROR)
            self.assertEqual(
                killed_flow.context.status,
                "Terminated by user test. Reason: Stuck in the worker")

            # Check that stuck notification has been removed.
            qm = queue_manager.QueueManager(token=self.token)
            notifications = qm.GetNotifications(queues.FLOWS)
            for n in notifications:
                self.assertFalse(n.in_progress)
        finally:
            # Release the semaphore so that worker thread unblocks and finishes
            # processing the flow.
            WorkerStuckableTestFlow.StopFlow()
            WorkerStuckableTestFlow.LetWorkerFinishProcessing()
            worker_obj.thread_pool.Join()
Beispiel #2
0
    def Fn(*args, **kwargs):
      """Wrapper around the decorated function."""

      if WITH_LIMITED_CALL_FREQUENCY_PASS_THROUGH:
        # This effectively turns off the caching.
        min_time = rdfvalue.Duration(0)
      else:
        min_time = min_time_between_calls

      key = (args, tuple(sorted(kwargs.items())))
      now = rdfvalue.RDFDatetime.Now()

      with lock:
        for k, prev_time in list(prev_times.items()):
          if now - prev_time >= min_time:
            prev_times.pop(k)
            prev_results.pop(k, None)
            result_locks.pop(k, None)

        try:
          # We eliminated all the old entries, so if the key is present
          # in the cache, it means that the data is fresh enough to be used.
          return prev_results[key]
        except KeyError:
          prev_time = None

        try:
          result_lock = result_locks[key]
        except KeyError:
          result_lock = threading.RLock()
          result_locks[key] = result_lock

      with result_lock:
        t = prev_times.get(key)

        if t == prev_time:
          result = f(*args, **kwargs)
          with lock:
            prev_times[key] = rdfvalue.RDFDatetime.Now()
            prev_results[key] = result

          return result
        else:
          return prev_results[key]
Beispiel #3
0
  def RunOnce(self, names=None, token=None):
    """Tries to lock and run cron jobs.

    Args:
      names: List of cron jobs to run.  If unset, run them all.
      token: security token.
    """
    del token

    leased_jobs = data_store.REL_DB.LeaseCronJobs(
        cronjob_ids=names, lease_time=rdfvalue.Duration("10m"))
    if not leased_jobs:
      return

    for job in leased_jobs:
      logging.info("Running cron job: %s", job.cron_job_id)
      self.RunJob(job)

    data_store.REL_DB.ReturnLeasedCronJobs(leased_jobs)
Beispiel #4
0
    def QueryAndOwn(self, queue, lease_seconds=10, limit=1):
        """Returns a list of Tasks leased for a certain time.

    Args:
      queue: The queue to query from.
      lease_seconds: The tasks will be leased for this long.
      limit: Number of values to fetch.

    Returns:
        A list of GrrMessage() objects leased.
    """
        if data_store.RelationalDBReadEnabled(category="client_messages"):
            return data_store.REL_DB.LeaseClientMessages(
                queue.Split()[0],
                lease_time=rdfvalue.Duration("%ds" % lease_seconds),
                limit=limit)
        with self.data_store.GetMutationPool() as mutation_pool:
            return mutation_pool.QueueQueryAndOwn(queue, lease_seconds, limit,
                                                  self.frozen_timestamp)
Beispiel #5
0
    def ProcessFlow(self, flow_processing_request):
        """The callback for the flow processing queue."""

        client_id = flow_processing_request.client_id
        flow_id = flow_processing_request.flow_id

        data_store.REL_DB.AckFlowProcessingRequests([flow_processing_request])

        try:
            rdf_flow = data_store.REL_DB.LeaseFlowForProcessing(
                client_id, flow_id, processing_time=rdfvalue.Duration("6h"))
        except db.ParentHuntIsNotRunningError:
            flow_base.TerminateFlow(client_id, flow_id, "Parent hunt stopped.")
            return

        logging.info("Processing Flow %s/%s/%d (%s).", client_id, flow_id,
                     rdf_flow.next_request_to_process,
                     rdf_flow.flow_class_name)

        flow_cls = registry.FlowRegistry.FlowClassByName(
            rdf_flow.flow_class_name)
        flow_obj = flow_cls(rdf_flow)

        if not flow_obj.IsRunning():
            logging.info(
                "Received a request to process flow %s on client %s that is not "
                "running.", flow_id, client_id)
            return

        processed = flow_obj.ProcessAllReadyRequests()
        if processed == 0:
            raise ValueError(
                "Unable to process any requests for flow %s on client %s." %
                (flow_id, client_id))

        while not self._ReleaseProcessedFlow(flow_obj):
            processed = flow_obj.ProcessAllReadyRequests()
            if processed == 0:
                raise ValueError(
                    "%s/%s: ReleaseProcessedFlow returned false but no "
                    "request could be processed (next req: %d)." %
                    (client_id, flow_id,
                     flow_obj.rdf_flow.next_request_to_process))
Beispiel #6
0
class OSBreakDown(AbstractClientStatsCronFlow):
  """Records relative ratios of OS versions in 7 day actives."""

  recency_window = rdfvalue.Duration("30d")

  def BeginProcessing(self):
    self.counters = [
        _ActiveCounter(rdf_stats.ClientGraphSeries.ReportType.OS_TYPE),
        _ActiveCounter(rdf_stats.ClientGraphSeries.ReportType.OS_RELEASE),
    ]

  def FinishProcessing(self):
    # Write all the counter attributes.
    for counter in self.counters:
      counter.Save(self.token)

  def _Process(self, labels, ping, system, uname):
    if not ping:
      return

    for label in labels:
      # Windows, Linux, Darwin
      self.counters[0].Add(system, label, ping)

      # Windows-2008ServerR2-6.1.7601SP1, Linux-Ubuntu-12.04,
      # Darwin-OSX-10.9.3
      self.counters[1].Add(uname, label, ping)

  def ProcessLegacyClient(self, ping, client):
    """Update counters for system, version and release attributes."""
    labels = self._GetClientLabelsList(client)
    system = client.Get(client.Schema.SYSTEM, "Unknown")
    uname = client.Get(client.Schema.UNAME, "Unknown")

    self._Process(labels, ping, system, uname)

  def ProcessClientFullInfo(self, client_full_info):
    labels = self._GetClientLabelsList(client_full_info)
    ping = client_full_info.metadata.ping
    system = client_full_info.last_snapshot.knowledge_base.os
    uname = client_full_info.last_snapshot.Uname()

    self._Process(labels, ping, system, uname)
Beispiel #7
0
  def testCronApprovalsReportPluginWithNoActivityToReport(self):
    report = report_plugins.GetReportByName(
        server_report_plugins.CronApprovalsReportPlugin.__name__)

    now = rdfvalue.RDFDatetime().Now()
    month_duration = rdfvalue.Duration("30d")

    api_report_data = report.GetReportData(
        stats_api.ApiGetReportArgs(
            name=report.__class__.__name__,
            start_time=now - month_duration,
            duration=month_duration),
        token=self.token)

    self.assertEqual(api_report_data.representation_type,
                     RepresentationType.AUDIT_CHART)
    self.assertCountEqual(api_report_data.audit_chart.used_fields,
                          ["action", "timestamp", "user", "urn"])
    self.assertEmpty(api_report_data.audit_chart.rows)
Beispiel #8
0
  def testUserFlowsReportPluginWithNoActivityToReport(self):
    report = report_plugins.GetReportByName(
        server_report_plugins.UserFlowsReportPlugin.__name__)

    now = rdfvalue.RDFDatetime().Now()
    month_duration = rdfvalue.Duration("30d")

    api_report_data = report.GetReportData(
        stats_api.ApiGetReportArgs(
            name=report.__class__.__name__,
            start_time=now - month_duration,
            duration=month_duration),
        token=self.token)

    self.assertEqual(
        api_report_data,
        rdf_report_plugins.ApiReportData(
            representation_type=RepresentationType.STACK_CHART,
            stack_chart=rdf_report_plugins.ApiStackChartReportData(x_ticks=[])))
Beispiel #9
0
    def testMostActiveUsersReportPluginWithNoActivityToReport(self):
        report = report_plugins.GetReportByName(
            server_report_plugins.MostActiveUsersReportPlugin.__name__)

        now = rdfvalue.RDFDatetime().Now()
        month_duration = rdfvalue.Duration("30d")

        api_report_data = report.GetReportData(stats_api.ApiGetReportArgs(
            name=report.__class__.__name__,
            start_time=now - month_duration,
            duration=month_duration),
                                               token=self.token)

        self.assertEqual(
            api_report_data,
            rdf_report_plugins.ApiReportData(
                representation_type=rdf_report_plugins.ApiReportData.
                RepresentationType.PIE_CHART,
                pie_chart=rdf_report_plugins.ApiPieChartReportData(data=[])))
Beispiel #10
0
    def _AnalyzeKeywords(self, keywords):
        """Extracts a start time from a list of keywords if present."""
        start_time = rdfvalue.RDFDatetime.Now() - rdfvalue.Duration("180d")
        filtered_keywords = []

        for k in keywords:
            if k.startswith(self.START_TIME_PREFIX):
                try:
                    start_time = rdfvalue.RDFDatetime.FromHumanReadable(
                        k[self.START_TIME_PREFIX_LEN:])
                except ValueError:
                    pass
            else:
                filtered_keywords.append(k)

        if not filtered_keywords:
            filtered_keywords.append(".")

        return start_time, filtered_keywords
Beispiel #11
0
    def testKeepsHuntsWithRetainLabel(self):
        exception_label_name = config.CONFIG[
            "DataRetention.hunts_ttl_exception_label"]

        for hunt_urn in self.hunts_urns[:3]:
            with aff4.FACTORY.Open(hunt_urn, mode="rw",
                                   token=self.token) as fd:
                fd.AddLabel(exception_label_name)

        with test_lib.ConfigOverrider(
            {"DataRetention.hunts_ttl": rdfvalue.Duration("10s")}):

            with test_lib.FakeTime(40 + 60 * self.NUM_HUNTS):
                self._RunCleanup()

            hunts_urns = list(
                aff4.FACTORY.Open("aff4:/hunts",
                                  token=self.token).ListChildren())
            self.assertEqual(len(hunts_urns), 3)
Beispiel #12
0
  def testHuntTermination(self):
    """This tests that hunts with a client limit terminate correctly."""
    with test_lib.FakeTime(1000, increment=1e-6):
      with implementation.StartHunt(
          hunt_name=standard.GenericHunt.__name__,
          flow_runner_args=rdf_flow_runner.FlowRunnerArgs(
              flow_name=transfer.GetFile.__name__),
          flow_args=transfer.GetFileArgs(
              pathspec=rdf_paths.PathSpec(
                  path="/tmp/evil.txt",
                  pathtype=rdf_paths.PathSpec.PathType.OS)),
          client_rule_set=self._CreateForemanClientRuleSet(),
          client_limit=5,
          client_rate=0,
          expiry_time=rdfvalue.Duration("1000s"),
          token=self.token) as hunt:
        hunt.Run()

      # Pretend to be the foreman now and dish out hunting jobs to all the
      # clients (Note we have 10 clients here).
      self.AssignTasksToClients()

      # Run the hunt.
      client_mock = hunt_test_lib.SampleHuntMock()
      hunt_test_lib.TestHuntHelper(
          client_mock,
          self.client_ids,
          check_flow_errors=False,
          token=self.token)

      hunt_obj = aff4.FACTORY.Open(
          hunt.session_id, age=aff4.ALL_TIMES, token=self.token)

      started, finished, errors = hunt_obj.GetClientsCounts()
      self.assertEqual(started, 5)
      self.assertEqual(finished, 5)
      self.assertEqual(errors, 2)

      hunt_obj = aff4.FACTORY.Open(
          hunt.session_id, age=aff4.ALL_TIMES, token=self.token)

      # Hunts are automatically paused when they reach the client limit.
      self.assertEqual(hunt_obj.Get(hunt_obj.Schema.STATE), "PAUSED")
Beispiel #13
0
    def testHuntActionsReportPluginWithNoActivityToReport(self):
        report = report_plugins.GetReportByName(
            server_report_plugins.HuntActionsReportPlugin.__name__)

        now = rdfvalue.RDFDatetime().Now()
        month_duration = rdfvalue.Duration("30d")

        api_report_data = report.GetReportData(stats_api.ApiGetReportArgs(
            name=report.__class__.__name__,
            start_time=now - month_duration,
            duration=month_duration),
                                               token=self.token)

        self.assertEqual(
            api_report_data,
            rdf_report_plugins.ApiReportData(
                representation_type=RepresentationType.AUDIT_CHART,
                audit_chart=rdf_report_plugins.ApiAuditChartReportData(
                    used_fields=["action", "timestamp", "user"], rows=[])))
Beispiel #14
0
    def setUp(self):
        super(SystemCronTestMixin, self).setUp()

        # This is not optimal, we create clients 0-19 with Linux, then
        # overwrite clients 0-9 with Windows, leaving 10-19 for Linux.
        client_ping_time = rdfvalue.RDFDatetime.Now() - rdfvalue.Duration("8d")
        self.SetupClients(20, system="Linux", ping=client_ping_time)
        self.SetupClients(10, system="Windows", ping=client_ping_time)

        for i in range(0, 10):
            client_id = "C.1%015x" % i
            with aff4.FACTORY.Open(client_id, mode="rw",
                                   token=self.token) as client:
                client.AddLabels([u"Label1", u"Label2"], owner="GRR")
                client.AddLabel(u"UserLabel", owner="jim")

            data_store.REL_DB.AddClientLabels(client_id, "GRR",
                                              [u"Label1", u"Label2"])
            data_store.REL_DB.AddClientLabels(client_id, "jim", [u"UserLabel"])
Beispiel #15
0
class ClientStats(rdf_structs.RDFProtoStruct):
    """A client stat object."""
    protobuf = jobs_pb2.ClientStats
    rdf_deps = [
        CpuSample,
        IOSample,
        rdfvalue.RDFDatetime,
    ]

    DEFAULT_SAMPLING_INTERVAL = rdfvalue.Duration("60s")

    @classmethod
    def Downsampled(cls, stats, interval=None):
        """Constructs a copy of given stats but downsampled to given interval.

    Args:
      stats: A `ClientStats` instance.
      interval: A downsampling interval.

    Returns:
      A downsampled `ClientStats` instance.
    """
        interval = interval or cls.DEFAULT_SAMPLING_INTERVAL

        result = cls(stats)
        result.cpu_samples = cls._Downsample(kind=CpuSample,
                                             samples=stats.cpu_samples,
                                             interval=interval)
        result.io_samples = cls._Downsample(kind=IOSample,
                                            samples=stats.io_samples,
                                            interval=interval)
        return result

    @classmethod
    def _Downsample(cls, kind, samples, interval):
        buckets = {}
        for sample in samples:
            bucket = buckets.setdefault(sample.timestamp.Floor(interval), [])
            bucket.append(sample)

        for bucket in itervalues(buckets):
            yield kind.FromMany(bucket)
Beispiel #16
0
    def Next(self):
        """Grab tasks for us from the server's queue."""
        with queue_manager.QueueManager(token=self.token) as manager:
            if data_store.RelationalDBEnabled():
                request_tasks = data_store.REL_DB.LeaseClientActionRequests(
                    self.client_id.Basename(),
                    lease_time=rdfvalue.Duration("10000s"),
                    limit=1)
                request_tasks = [
                    rdf_flow_objects.GRRMessageFromClientActionRequest(r)
                    for r in request_tasks
                ]
            else:
                request_tasks = manager.QueryAndOwn(self.client_id.Queue(),
                                                    limit=1,
                                                    lease_seconds=10000)

            request_tasks.extend(self._mock_task_queue)
            self._mock_task_queue[:] = []  # Clear the referenced list.

            for message in request_tasks:
                try:
                    responses = self.client_mock.HandleMessage(message)
                    logging.info(
                        "Called client action %s generating %s responses",
                        message.name,
                        len(responses) + 1)
                except Exception as e:  # pylint: disable=broad-except
                    logging.exception("Error %s occurred in client", e)
                    responses = [
                        self.client_mock.GenerateStatusMessage(
                            message, 1, status="GENERIC_ERROR")
                    ]

                # Now insert those on the flow state queue
                for response in responses:
                    self.PushToStateQueue(manager, response)

                # Additionally schedule a task for the worker
                manager.QueueNotification(session_id=message.session_id)

            return len(request_tasks)
Beispiel #17
0
  def testFlowStateUpdateUsingReturnProcessedFlow(self):
    hunt_obj = rdf_hunt_objects.Hunt(description="foo")
    self.db.WriteHuntObject(hunt_obj)
    hunt_id = hunt_obj.hunt_id

    client_id, flow_id = self._SetupHuntClientAndFlow(hunt_id=hunt_id)

    flow_obj = self.db.ReadFlowForProcessing(client_id, flow_id,
                                             rdfvalue.Duration("1m"))
    self.assertEqual(flow_obj.flow_state, rdf_flow_objects.Flow.FlowState.UNSET)

    flow_obj.flow_state = rdf_flow_objects.Flow.FlowState.ERROR
    self.db.ReturnProcessedFlow(flow_obj)

    results = self.db.ReadHuntFlows(
        hunt_id,
        0,
        10,
        filter_condition=db.HuntFlowsCondition.FAILED_FLOWS_ONLY)
    self.assertLen(results, 1)
Beispiel #18
0
  def ProcessHuntOutputPlugins(self):
    if data_store.RelationalDBReadEnabled("hunts"):
      # No processing needed for new style hunts.
      return

    if data_store.RelationalDBFlowsEnabled():
      job = rdf_cronjobs.CronJob(
          cron_job_id="some/id", lifetime=rdfvalue.Duration("1h"))
      run_state = rdf_cronjobs.CronJobRun(
          cron_job_id="some/id",
          status="RUNNING",
          started_at=rdfvalue.RDFDatetime.Now())
      process_results.ProcessHuntResultCollectionsCronJob(run_state, job).Run()
    else:
      flow_urn = flow.StartAFF4Flow(
          flow_name=process_results.ProcessHuntResultCollectionsCronFlow
          .__name__,
          token=self.token)
      flow_test_lib.TestFlowHelper(flow_urn, token=self.token)
      return flow_urn
Beispiel #19
0
    def GetReportData(self, get_report_args, token):
        """Filter the last week of user actions."""
        ret = rdf_report_plugins.ApiReportData(
            representation_type=RepresentationType.STACK_CHART)

        week_duration = rdfvalue.Duration.From(7, rdfvalue.DAYS)
        num_weeks = int(
            math.ceil(
                rdfvalue.Duration(get_report_args.duration).ToFractional(
                    rdfvalue.SECONDS) /
                week_duration.ToFractional(rdfvalue.SECONDS)))
        weeks = range(0, num_weeks)
        start_time = get_report_args.start_time
        end_time = start_time + num_weeks * week_duration
        user_activity = collections.defaultdict(
            lambda: {week: 0
                     for week in weeks})

        entries = self._LoadUserActivity(start_time=get_report_args.start_time,
                                         end_time=end_time,
                                         token=token)

        for username, timestamp, count in entries:
            week = (timestamp - start_time).ToInt(
                rdfvalue.SECONDS) // week_duration.ToInt(rdfvalue.SECONDS)
            if week in user_activity[username]:
                user_activity[username][week] += count

        user_activity = sorted(iteritems(user_activity))
        user_activity = [(user, data) for user, data in user_activity
                         if user not in access_control.SYSTEM_USERS]

        ret.stack_chart.data = [
            rdf_report_plugins.ApiReportDataSeries2D(
                label=user,
                points=(rdf_report_plugins.ApiReportDataPoint2D(x=x, y=y)
                        for x, y in sorted(data.items())))
            for user, data in user_activity
        ]

        return ret
Beispiel #20
0
 def testIndexedReads(self):
   spacing = 10
   with utils.Stubber(sequential_collection.IndexedSequentialCollection,
                      "INDEX_SPACING", spacing):
     urn = "aff4:/sequential_collection/testIndexedReads"
     collection = self._TestCollection(urn)
     data_size = 4 * spacing
     # TODO(amoser): Without using a mutation pool, this test is really
     # slow on MySQL data store.
     with data_store.DB.GetMutationPool() as pool:
       for i in range(data_size):
         collection.StaticAdd(
             rdfvalue.RDFURN(urn), rdfvalue.RDFInteger(i), mutation_pool=pool)
     with test_lib.FakeTime(rdfvalue.RDFDatetime.Now() +
                            rdfvalue.Duration("10m")):
       for i in range(data_size - 1, data_size - 20, -1):
         self.assertEqual(collection[i], i)
       for i in [spacing - 1, spacing, spacing + 1]:
         self.assertEqual(collection[i], i)
       for i in range(data_size - spacing + 5, data_size - spacing - 5, -1):
         self.assertEqual(collection[i], i)
Beispiel #21
0
    def testReturnProcessedFlow(self):
        client_id, flow_id = self._SetupClientAndFlow(
            next_request_to_process=1)

        processing_time = rdfvalue.Duration("60s")

        processed_flow = self.db.ReadFlowForProcessing(client_id, flow_id,
                                                       processing_time)

        # Let's say we processed one request on this flow.
        processed_flow.next_request_to_process = 2

        # There are some requests ready for processing but not #2.
        self.db.WriteFlowRequests([
            rdf_flow_objects.FlowRequest(client_id=client_id,
                                         flow_id=flow_id,
                                         request_id=1,
                                         needs_processing=True),
            rdf_flow_objects.FlowRequest(client_id=client_id,
                                         flow_id=flow_id,
                                         request_id=4,
                                         needs_processing=True)
        ])

        self.assertTrue(self.db.ReturnProcessedFlow(processed_flow))

        processed_flow = self.db.ReadFlowForProcessing(client_id, flow_id,
                                                       processing_time)
        # And another one.
        processed_flow.next_request_to_process = 3

        # But in the meantime, request 3 is ready for processing.
        self.db.WriteFlowRequests([
            rdf_flow_objects.FlowRequest(client_id=client_id,
                                         flow_id=flow_id,
                                         request_id=3,
                                         needs_processing=True)
        ])

        self.assertFalse(self.db.ReturnProcessedFlow(processed_flow))
Beispiel #22
0
class PurgeClientStatsCronJob(cronjobs.CronJobBase):
    """Deletes outdated client statistics."""

    frequency = rdfvalue.Duration("1w")

    # Keep stats for one month.
    MAX_AGE = 31 * 24 * 3600

    def Run(self):
        self.start = 0
        self.end = int(1e6 * (time.time() - self.MAX_AGE))

        client_urns = export_utils.GetAllClients(token=self.token)

        for batch in utils.Grouper(client_urns, 10000):
            with data_store.DB.GetMutationPool() as mutation_pool:
                for client_urn in batch:
                    mutation_pool.DeleteAttributes(client_urn.Add("stats"),
                                                   [u"aff4:stats"],
                                                   start=self.start,
                                                   end=self.end)
            self.HeartBeat()