Esempio n. 1
0
    def wasDataLostSinceLastHarvest(self, namespace, slot, reset_test_counters=False):
        lost_data_check_keys = ["last_slot_%d" % x for x in range(config.EXPECTED_MEMCACHE_SERVERS)]
        lost_data_check = memcache.get_multi(lost_data_check_keys, namespace=namespace)

        if reset_test_counters:
            memcache.delete_multi(lost_data_check_keys, namespace=namespace)
            next_lost_data = {}
            for key in lost_data_check_keys:
                next_lost_data[key] = 1
            memcache.offset_multi(next_lost_data, namespace=namespace, initial_value=0)

        if len(lost_data_check) != len(lost_data_check_keys):
            logging.warning("ProdEagle counters lost before %d" % slot)
            return True
        return False
Esempio n. 2
0
    def process_and_store(self, results):
        """Process the batch of results into new reports.

This function is executed transactionally, meaning either all reports are
processed and stored or none are. This ensures integrity in the number of
reports that are stored even when the DeadlineExceededError is raised.
"""

        # Batch-process the reports
        reports_to_store, keys_to_delete, counts = self.process_events(results)

        # Batch-store the new Reports
        ndb.put_multi(reports_to_store)

        # Update count in period
        memcache.offset_multi(counts, key_prefix="usagestats_parser_")

        # Batch-delete the ReportsToProcess entities
        ndb.delete_multi(keys_to_delete)
Esempio n. 3
0
 def testOffsetMulti(self):
   """Tests incrementing a batch of keys all at the same time."""
   self.assertFalse(memcache.set(self.key1, 5))
   self.assertFalse(memcache.set(self.key2, 'blue'))
   offsets = {self.key1: 4, self.key2: 10, self.key3: -2}
   result = memcache.offset_multi(offsets, initial_value=0)
   self.assertEqual({
       self.key1: None,
       self.key2: None,
       self.key3: None
   }, result)
   self.assertEqual(None, memcache.get(self.key3))
Esempio n. 4
0
    def wasDataLostSinceLastHarvest(self,
                                    namespace,
                                    slot,
                                    reset_test_counters=False):
        lost_data_check_keys = [
            "last_slot_%d" % x for x in range(config.EXPECTED_MEMCACHE_SERVERS)
        ]
        lost_data_check = memcache.get_multi(lost_data_check_keys,
                                             namespace=namespace)

        if reset_test_counters:
            memcache.delete_multi(lost_data_check_keys, namespace=namespace)
            next_lost_data = {}
            for key in lost_data_check_keys:
                next_lost_data[key] = 1
            memcache.offset_multi(next_lost_data,
                                  namespace=namespace,
                                  initial_value=0)

        if len(lost_data_check) != len(lost_data_check_keys):
            logging.warning("ProdEagle counters lost before %d" % slot)
            return True
        return False
Esempio n. 5
0
def incrBatch(counters, save_stats=config.SAVE_PRODEAGLE_STATS):
    try:
        cnm = counter_names.getDefaultCounterNamesManager()
        slot = counter_names.getEpochRounded()
        prefixed_counters = {}
        for name in counters:
            prefixed_counters[str(slot) + name] = counters[name]
        save_in_between_name = None
        if config.SAVE_IN_BETWEEN:
            save_in_between_name = (
                "save_in_between_%d" % counter_names.getEpochRounded(
                    utc_datetime=None, slot_size=config.SAVE_IN_BETWEEN))
            prefixed_counters[save_in_between_name] = 1
        existing = memcache.offset_multi(prefixed_counters,
                                         namespace=cnm.namespace,
                                         initial_value=0)
        new_counter_names = []
        for name in counters:
            if (counters[name] == existing[str(slot) + name]):
                new_counter_names += [name]
        (data_store_access, n_added_names) = cnm.addIfNew(new_counter_names)
        if config.SAVE_IN_BETWEEN and existing[save_in_between_name] == 1:
            try:
                taskqueue.Task(url=config.PRODEAGLE_HARVEST_URL,
                               params={
                                   "save_in_between": "1"
                               },
                               countdown=config.SAVE_IN_BETWEEN,
                               name="prodeagle-" + save_in_between_name).add()
            except:
                pass
        if save_stats:
            counters = Batch()
            if data_store_access:
                counters.incr("ProdEagle.Datastore.ReadAccess")
            if n_added_names:
                counters.incr("ProdEagle.NewNames", n_added_names)
                counters.incr("ProdEagle.Datastore.WriteAccess")
            if config.SAVE_IN_BETWEEN and existing[save_in_between_name] == 1:
                counters.incr("ProdEagle.SaveInBetween")
            counters.commit(save_stats=False)
    except:
        logging.warning("Couldn't increase the following counters: %s" %
                        ", ".join(counters.keys()))
Esempio n. 6
0
def incrBatch(counters, save_stats=config.SAVE_PRODEAGLE_STATS):
  try:
    cnm = counter_names.getDefaultCounterNamesManager()
    slot = counter_names.getEpochRounded()
    prefixed_counters = {}
    for name in counters:
      prefixed_counters[str(slot) + name] = counters[name]
    save_in_between_name = None
    if config.SAVE_IN_BETWEEN:
      save_in_between_name = ("save_in_between_%d" %
                              counter_names.getEpochRounded(utc_datetime=None,
                                  slot_size=config.SAVE_IN_BETWEEN))
      prefixed_counters[save_in_between_name] = 1
    existing = memcache.offset_multi(prefixed_counters,
                                     namespace=cnm.namespace,
                                     initial_value=0)
    new_counter_names = []
    for name in counters:
      if (counters[name] == existing[str(slot) + name]):
        new_counter_names += [name]
    (data_store_access, n_added_names) = cnm.addIfNew(new_counter_names)
    if config.SAVE_IN_BETWEEN and existing[save_in_between_name] == 1:
      try:
        taskqueue.Task(url=config.PRODEAGLE_HARVEST_URL,
                       params={"save_in_between": "1"},
                       countdown=config.SAVE_IN_BETWEEN,
                       name="prodeagle-" + save_in_between_name).add()
      except:
        pass
    if save_stats:
      counters = Batch()
      if data_store_access:
        counters.incr("ProdEagle.Datastore.ReadAccess")
      if n_added_names:
        counters.incr("ProdEagle.NewNames", n_added_names)
        counters.incr("ProdEagle.Datastore.WriteAccess")
      if config.SAVE_IN_BETWEEN and existing[save_in_between_name] == 1:
        counters.incr("ProdEagle.SaveInBetween")
      counters.commit(save_stats=False)
  except:
    logging.warning("Couldn't increase the following counters: %s"
                    % ", ".join(counters.keys()))
Esempio n. 7
0
    def testBatchIncrement(self):
        """Tests incrementing multiple keys with integer values."""

        memcache.set('low', 0)
        memcache.set('high', 100)

        memcache.offset_multi({'low': 1, 'high': -50})

        self.assertEqual(1, memcache.get('low'))
        self.assertEqual(50, memcache.get('high'))

        memcache.offset_multi({'low': 9, 'high': 0})

        self.assertEqual(10, memcache.get('low'))
        self.assertEqual(50, memcache.get('high'))

        memcache.offset_multi({'max': 5, 'min': -5}, initial_value=10)

        self.assertEqual(15, memcache.get('max'))
        self.assertEqual(5, memcache.get('min'))
Esempio n. 8
0
    def testBatchIncrement(self):
        """Tests incrementing multiple keys with integer values."""

        memcache.set('low', 0)
        memcache.set('high', 100)

        memcache.offset_multi({'low': 1, 'high': -50})

        self.assertEqual(1, memcache.get('low'))
        self.assertEqual(50, memcache.get('high'))

        memcache.offset_multi({'low': 9, 'high': 0})

        self.assertEqual(10, memcache.get('low'))
        self.assertEqual(50, memcache.get('high'))

        memcache.offset_multi(
            {'max': 5, 'min': -5}, initial_value=10)

        self.assertEqual(15, memcache.get('max'))
        self.assertEqual(5, memcache.get('min'))
Esempio n. 9
0
  def sample(self,
             reporter,
             getrandom=random.random,
             randrange=random.randrange):
    """Samples a set of reported key/values.

    Args:
      reporter: Reporter instance containing key/values to sample.
      getrandom: Used for testing.
      randrange: Used for testing.
    """
    # Update period start times if they're expired or non-existent.
    now = int(self.gettime())
    start_times = memcache.get_multi([c.start_key for c in self.configs])
    config_sets = {}
    for config in self.configs:
      start = start_times.get(config.start_key)
      if start is None or config.is_expired(start, now):
        config_sets[config.start_key] = now
        config_sets[config.counter_key] = 0
    if config_sets:
      memcache.set_multi(config_sets)

    # Flip coin for sample rate of all Keys on all configs.
    for key in reporter.all_keys():
      coin_flip = getrandom()
      for config in self.configs:
        if not config.should_sample(key, coin_flip):
          reporter.remove(key, config)

    # Increment counters for affected configs.
    counter_offsets = {}
    for config in self.configs:
      matching = reporter.get_keys(config)
      if matching:
        counter_offsets[config.counter_key] = len(matching)
    if not counter_offsets:
      return
    counter_results = memcache.offset_multi(counter_offsets, initial_value=0)

    # Apply the reservoir algorithm.
    value_sets = {}
    now_encoded = struct.pack('!l', now)
    for config in self.configs:
      matching = list(reporter.get_keys(config))
      counter = counter_results.get(config.counter_key)
      if counter is None:
        # Incrementing the config failed, so give up on these Key samples.
        continue
      counter = int(counter)  # Deal with wonky serialization types.
      for (value_index, sample_number) in zip(
          xrange(len(matching)), xrange(counter - len(matching), counter)):
        insert_index = None
        if sample_number < config.samples:
          insert_index = sample_number
        else:
          random_index = randrange(sample_number)
          if random_index < config.samples:
            insert_index = random_index
        if insert_index is not None:
          key = matching[value_index]
          value_key = config.position_key(insert_index)
          value = reporter.get(key, config)
          if value is not None:
            # Value may be none if this key was removed from the samples
            # list due to not passing the coin flip.
            value_encoded = struct.pack('!l', value)
            sample = '%s:%s:%s' % (
                config.adjust_value(key), now_encoded, value_encoded)
            value_sets[value_key] = sample
    memcache.set_multi(value_sets)
    def pop_counters(keys):
        """Return all counters in provided combinations and reset their counts.

        This will return a dict mapping the provided key values to a list of
        each of their current counter values.
        Example return value: {
            "MonkeyCombination": [1, 5, 0, 12],
            "GorillaCombination": [0, 0, 0, 9],
        }

        This will also clear out the current counts for all combinations listed
        in keys, so after calling this the counts for each specified
        combination's counter should be 0.

        Note: while pop_counters tries to do the get and pop as atomically as
        possible, it is not truly atomic. This means there are rare edge cases
        during which problematic memcache evictions and incr()s can happen
        between the results being retrieved and the counters being popped. When
        this happens, we detect the situation and pretend like this combination
        of counters has simply been evicted from memcache (by deleting the
        combination of counters). This situation should hopefully be very rare.

        Args:
            keys: list of names of counter combinations
        """
        results = {k: [0] * COUNTERS_PER_COMBINATION for k in keys}

        # Grab all accumulating counters...
        combined_counters = memcache.get_multi(keys)

        # ...and immediately offset them by the inverse of their current counts
        # as quickly as possible.
        negative_offsets = {k: -1 * count
                for k, count in combined_counters.iteritems()}
        offset_results = memcache.offset_multi(negative_offsets)

        # Now that we've tried to pop the counter values from the accumulators,
        # make sure that none of the pops caused an overflow rollover due to
        # the race condition described in the above docstring.
        for key in offset_results:
            offset_counter = offset_results[key]
            for i in range(COUNTERS_PER_COMBINATION):
                count = SynchronizedCounter._single_counter_value(
                        offset_counter, i)
                if count > WARNING_HIGH_COUNTER_VALUE:
                    # We must've rolled a counter over backwards due to the
                    # memcache race condition described above. Warn and clear
                    # this counter.
                    #
                    # We don't expect this to happen, but if it does we should
                    # know about it without crashing on the user. See above
                    # explanation.
                    #
                    # TODO(kamens): find a nicer way to protect this scenario
                    logging.error("SynchronizedCounter %s rolled over on pop" %
                            key)
                    SynchronizedCounter.delete_multi([key])

        # Prepare popped results in form {
        #   "counter combination A": [<counter 1>, ..., <counter 4>],
        #   "counter combination B": [<counter 1>, ..., <counter 4>],
        # }
        for key in combined_counters:
            combined_counter = combined_counters[key]

            for i in range(COUNTERS_PER_COMBINATION):
                results[key][i] = SynchronizedCounter._single_counter_value(
                        combined_counter, i)

        return results
Esempio n. 11
0
  def sample(self,
             reporter,
             getrandom=random.random,
             randrange=random.randrange):
    """Samples a set of reported key/values.

    Args:
      reporter: Reporter instance containing key/values to sample.
      getrandom: Used for testing.
      randrange: Used for testing.
    """
    # Update period start times if they're expired or non-existent.
    now = int(self.gettime())
    start_times = memcache.get_multi([c.start_key for c in self.configs])
    config_sets = {}
    for config in self.configs:
      start = start_times.get(config.start_key)
      if start is None or config.is_expired(start, now):
        config_sets[config.start_key] = now
        config_sets[config.counter_key] = 0
    if config_sets:
      memcache.set_multi(config_sets)

    # Flip coin for sample rate of all Keys on all configs.
    for key in reporter.all_keys():
      coin_flip = getrandom()
      for config in self.configs:
        if not config.should_sample(key, coin_flip):
          reporter.remove(key, config)

    # Increment counters for affected configs.
    counter_offsets = {}
    for config in self.configs:
      matching = reporter.get_keys(config)
      if matching:
        counter_offsets[config.counter_key] = len(matching)
    if not counter_offsets:
      return
    counter_results = memcache.offset_multi(counter_offsets, initial_value=0)

    # Apply the reservoir algorithm.
    value_sets = {}
    now_encoded = struct.pack('!l', now)
    for config in self.configs:
      matching = list(reporter.get_keys(config))
      counter = counter_results.get(config.counter_key)
      if counter is None:
        # Incrementing the config failed, so give up on these Key samples.
        continue
      counter = int(counter)  # Deal with wonky serialization types.
      for (value_index, sample_number) in zip(
          xrange(len(matching)), xrange(counter - len(matching), counter)):
        insert_index = None
        if sample_number < config.samples:
          insert_index = sample_number
        else:
          random_index = randrange(sample_number)
          if random_index < config.samples:
            insert_index = random_index
        if insert_index is not None:
          key = matching[value_index]
          value_key = config.position_key(insert_index)
          value = reporter.get(key, config)
          if value is not None:
            # Value may be none if this key was removed from the samples
            # list due to not passing the coin flip.
            value_encoded = struct.pack('!l', value)
            sample = '%s:%s:%s' % (
                config.adjust_value(key), now_encoded, value_encoded)
            value_sets[value_key] = sample
    memcache.set_multi(value_sets)