コード例 #1
0
        def process(self, query, *args, **kwargs):
            # distinct key to be used to group query splits.
            key = 1

            # If query has a user set limit, then the query cannot be split.
            if query.HasField('limit'):
                return [(key, query)]

            # Compute the estimated numSplits if not specified by the user.
            if self._num_splits == 0:
                estimated_num_splits = ReadFromDatastore.get_estimated_num_splits(
                    self._project, self._datastore_namespace, self._query,
                    self._datastore)
            else:
                estimated_num_splits = self._num_splits

            logging.info("Splitting the query into %d splits",
                         estimated_num_splits)
            try:
                query_splits = query_splitter.get_splits(
                    self._datastore, query, estimated_num_splits,
                    helper.make_partition(self._project,
                                          self._datastore_namespace))
            except Exception:
                logging.warning("Unable to parallelize the given query: %s",
                                query,
                                exc_info=True)
                query_splits = [query]

            sharded_query_splits = []
            for split_query in query_splits:
                sharded_query_splits.append((key, split_query))
                key += 1

            return sharded_query_splits
コード例 #2
0
    def process(self, query, *args, **kwargs):
      # distinct key to be used to group query splits.
      key = 1

      # If query has a user set limit, then the query cannot be split.
      if query.HasField('limit'):
        return [(key, query)]

      # Compute the estimated numSplits if not specified by the user.
      if self._num_splits == 0:
        estimated_num_splits = ReadFromDatastore.get_estimated_num_splits(
            self._project, self._datastore_namespace, self._query,
            self._datastore)
      else:
        estimated_num_splits = self._num_splits

      logging.info("Splitting the query into %d splits", estimated_num_splits)
      try:
        query_splits = query_splitter.get_splits(
            self._datastore, query, estimated_num_splits,
            helper.make_partition(self._project, self._datastore_namespace))
      except Exception:
        logging.warning("Unable to parallelize the given query: %s", query,
                        exc_info=True)
        query_splits = [query]

      sharded_query_splits = []
      for split_query in query_splits:
        sharded_query_splits.append((key, split_query))
        key += 1

      return sharded_query_splits