예제 #1
0
        def process(self, query, *args, **kwargs):
            # distinct key to be used to group query splits.
            key = 1

            # If query has a user set limit, then the query cannot be split.
            if query.HasField('limit'):
                return [(key, query)]

            # Compute the estimated numSplits if not specified by the user.
            if self._num_splits == 0:
                estimated_num_splits = ReadFromDatastore.get_estimated_num_splits(
                    self._project, self._datastore_namespace, self._query,
                    self._datastore)
            else:
                estimated_num_splits = self._num_splits

            logging.info("Splitting the query into %d splits",
                         estimated_num_splits)
            try:
                query_splits = query_splitter.get_splits(
                    self._datastore, query, estimated_num_splits,
                    helper.make_partition(self._project,
                                          self._datastore_namespace))
            except Exception:
                logging.warning("Unable to parallelize the given query: %s",
                                query,
                                exc_info=True)
                query_splits = [query]

            sharded_query_splits = []
            for split_query in query_splits:
                sharded_query_splits.append((key, split_query))
                key += 1

            return sharded_query_splits
예제 #2
0
    def check_get_splits(self, query, num_splits, num_entities, batch_size):
        """A helper method to test the query_splitter get_splits method.

    Args:
      query: the query to be split
      num_splits: number of splits
      num_entities: number of scatter entities contained in the fake datastore.
      batch_size: the number of entities returned by fake datastore in one req.
    """

        entities = fake_datastore.create_entities(num_entities)
        mock_datastore = MagicMock()
        # Assign a fake run_query method as a side_effect to the mock.
        mock_datastore.run_query.side_effect = \
            fake_datastore.create_run_query(entities, batch_size)

        split_queries = query_splitter.get_splits(mock_datastore, query,
                                                  num_splits)

        # if request num_splits is greater than num_entities, the best it can
        # do is one entity per split.
        expected_num_splits = min(num_splits, num_entities + 1)
        self.assertEqual(len(split_queries), expected_num_splits)

        expected_requests = QuerySplitterTest.create_scatter_requests(
            query, num_splits, batch_size, num_entities)

        expected_calls = []
        for req in expected_requests:
            expected_calls.append(call(req))

        self.assertEqual(expected_calls,
                         mock_datastore.run_query.call_args_list)
예제 #3
0
    def process(self, query, *args, **kwargs):
      # distinct key to be used to group query splits.
      key = 1

      # If query has a user set limit, then the query cannot be split.
      if query.HasField('limit'):
        return [(key, query)]

      # Compute the estimated numSplits if not specified by the user.
      if self._num_splits == 0:
        estimated_num_splits = ReadFromDatastore.get_estimated_num_splits(
            self._project, self._datastore_namespace, self._query,
            self._datastore)
      else:
        estimated_num_splits = self._num_splits

      logging.info("Splitting the query into %d splits", estimated_num_splits)
      try:
        query_splits = query_splitter.get_splits(
            self._datastore, query, estimated_num_splits,
            helper.make_partition(self._project, self._datastore_namespace))
      except Exception:
        logging.warning("Unable to parallelize the given query: %s", query,
                        exc_info=True)
        query_splits = [query]

      sharded_query_splits = []
      for split_query in query_splits:
        sharded_query_splits.append((key, split_query))
        key += 1

      return sharded_query_splits
  def check_get_splits(self, query, num_splits, num_entities, batch_size):
    """A helper method to test the query_splitter get_splits method.

    Args:
      query: the query to be split
      num_splits: number of splits
      num_entities: number of scatter entities contained in the fake datastore.
      batch_size: the number of entities returned by fake datastore in one req.
    """

    entities = fake_datastore.create_entities(num_entities)
    mock_datastore = MagicMock()
    # Assign a fake run_query method as a side_effect to the mock.
    mock_datastore.run_query.side_effect = \
        fake_datastore.create_run_query(entities, batch_size)

    split_queries = query_splitter.get_splits(mock_datastore, query, num_splits)

    # if request num_splits is greater than num_entities, the best it can
    # do is one entity per split.
    expected_num_splits = min(num_splits, num_entities + 1)
    self.assertEqual(len(split_queries), expected_num_splits)

    expected_requests = QuerySplitterTest.create_scatter_requests(
        query, num_splits, batch_size, num_entities)

    expected_calls = []
    for req in expected_requests:
      expected_calls.append(call(req))

    self.assertEqual(expected_calls, mock_datastore.run_query.call_args_list)