Ejemplo n.º 1
0
  def _to_map_job_config(cls,
                         mr_spec,


                         queue_name):
    """Converts model.MapreduceSpec back to JobConfig.

    This method allows our internal methods to use JobConfig directly.
    This method also allows us to expose JobConfig as an API during execution,
    despite that it is not saved into datastore.

    Args:
      mr_spec: model.MapreduceSpec.
      queue_name: queue name.

    Returns:
      The JobConfig object for this job.
    """
    mapper_spec = mr_spec.mapper

    api_version = mr_spec.params.get("api_version", 0)
    old_api = api_version == 0





    return cls(_lenient=old_api,
               job_name=mr_spec.name,
               job_id=mr_spec.mapreduce_id,

               mapper=util.for_name(mapper_spec.handler_spec),
               input_reader_cls=mapper_spec.input_reader_class(),
               input_reader_params=input_readers._get_params(mapper_spec),
               output_writer_cls=mapper_spec.output_writer_class(),
               output_writer_params=output_writers._get_params(mapper_spec),
               shard_count=mapper_spec.shard_count,
               queue_name=queue_name,
               user_params=mr_spec.params.get("user_params"),
               shard_max_attempts=mr_spec.params.get("shard_max_attempts"),
               done_callback_url=mr_spec.params.get("done_callback"),
               _force_writes=mr_spec.params.get("force_writes"),
               _base_path=mr_spec.params["base_path"],
               _task_max_attempts=mr_spec.params.get("task_max_attempts"),
               _task_max_data_processing_attempts=(
                   mr_spec.params.get("task_max_data_processing_attempts")),
               _hooks_cls=util.for_name(mr_spec.hooks_class_name),
               _app=mr_spec.params.get("app_id"),
               _api_version=api_version)
Ejemplo n.º 2
0
  def __iter__(self):
    """Create a generator for model instances for entities.

    Iterating through entities moves query range past the consumed entities.

    Yields:
      next model instance.
    """
    while True:
      if self._current_key_range is None:
        break

      while True:
        query = self._current_key_range.make_ascending_query(
            util.for_name(self._entity_kind))
        results = query.fetch(limit=self._batch_size)

        if not results:
          self._advance_key_range()
          break

        for model_instance in results:
          key = model_instance.key()

          self._current_key_range.advance(key)
          yield model_instance
Ejemplo n.º 3
0
  def _get_params(self, validator_parameter, name_prefix):
    """Retrieves additional user-supplied params for the job and validates them.

    Args:
      validator_parameter: name of the request parameter which supplies
        validator for this parameter set.
      name_prefix: common prefix for all parameter names in the request.

    Raises:
      Any exception raised by the 'params_validator' request parameter if
      the params fail to validate.
    """
    params_validator = self.request.get(validator_parameter)

    user_params = {}
    for key in self.request.arguments():
      if key.startswith(name_prefix):
        values = self.request.get_all(key)
        adjusted_key = key[len(name_prefix):]
        if len(values) == 1:
          user_params[adjusted_key] = values[0]
        else:
          user_params[adjusted_key] = values

    if params_validator:
      resolved_validator = util.for_name(params_validator)
      resolved_validator(user_params)

    return user_params
Ejemplo n.º 4
0
    def input_reader_class(self):
        """Get input reader class.

    Returns:
      input reader class object.
    """
        return util.for_name(self.input_reader_spec)
Ejemplo n.º 5
0
  def input_reader_class(self):
    """Get input reader class.

    Returns:
      input reader class object.
    """
    return util.for_name(self.input_reader_spec)
Ejemplo n.º 6
0
  def output_writer_class(self):
    """Get output writer class.

    Returns:
      output writer class object.
    """
    return self.output_writer_spec and util.for_name(self.output_writer_spec)
Ejemplo n.º 7
0
  def output_writer_class(self):
    """Get output writer class.

    Returns:
      output writer class object.
    """
    return self.output_writer_spec and util.for_name(self.output_writer_spec)
Ejemplo n.º 8
0
  def _get_params(self, validator_parameter, name_prefix):
    """Retrieves additional user-supplied params for the job and validates them.

    Args:
      validator_parameter: name of the request parameter which supplies
        validator for this parameter set.
      name_prefix: common prefix for all parameter names in the request.

    Raises:
      Any exception raised by the 'params_validator' request parameter if
      the params fail to validate.
    """
    params_validator = self.request.get(validator_parameter)

    user_params = {}
    for key in self.request.arguments():
      if key.startswith(name_prefix):
        values = self.request.get_all(key)
        adjusted_key = key[len(name_prefix):]
        if len(values) == 1:
          user_params[adjusted_key] = values[0]
        else:
          user_params[adjusted_key] = values

    if params_validator:
      resolved_validator = util.for_name(params_validator)
      resolved_validator(user_params)

    return user_params
    def _get_raw_entity_kind(cls, model_classpath):
        entity_type = util.for_name(model_classpath)
        if isinstance(entity_type, db.Model):
            return entity_type.kind()
        elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)):

            return entity_type._get_kind()
        else:
            return util.get_short_name(model_classpath)
  def _get_raw_entity_kind(cls, model_classpath):
    entity_type = util.for_name(model_classpath)
    if isinstance(entity_type, db.Model):
      return entity_type.kind()
    elif isinstance(entity_type, (ndb.Model, ndb.MetaModel)):

      return entity_type._get_kind()
    else:
      return util.get_short_name(model_classpath)
Ejemplo n.º 11
0
  def validate(cls, job_config):
    """Inherit docs."""
    super(ModelDatastoreInputReader, cls).validate(job_config)
    params = job_config.input_reader_params
    entity_kind = params[cls.ENTITY_KIND_PARAM]

    try:
      model_class = util.for_name(entity_kind)
    except ImportError, e:
      raise errors.BadReaderParamsError("Bad entity kind: %s" % e)
Ejemplo n.º 12
0
    def validate(cls, mapper_spec):
        """Validates mapper spec and all mapper parameters.

    Args:
      mapper_spec: The MapperSpec for this InputReader.

    Raises:
      BadReaderParamsError: required parameters are missing or invalid.
    """
        super(DatastoreInputReader, cls).validate(mapper_spec)
        params = mapper_spec.params
        keys_only = util.parse_bool(params.get(cls.KEYS_ONLY_PARAM, False))
        if keys_only:
            raise BadReaderParamsError("The keys_only parameter is obsolete. " "Use DatastoreKeyInputReader instead.")

        entity_kind_name = params[cls.ENTITY_KIND_PARAM]
        try:
            util.for_name(entity_kind_name)
        except ImportError, e:
            raise BadReaderParamsError("Bad entity kind: %s" % e)
Ejemplo n.º 13
0
    def validate(cls, mapper_spec):
        """Validates mapper spec and all mapper parameters.

    Args:
      mapper_spec: The MapperSpec for this InputReader.

    Raises:
      BadReaderParamsError: required parameters are missing or invalid.
    """
        super(DatastoreInputReader, cls).validate(mapper_spec)
        params = mapper_spec.params
        keys_only = util.parse_bool(params.get(cls.KEYS_ONLY_PARAM, False))
        if keys_only:
            raise BadReaderParamsError("The keys_only parameter is obsolete. "
                                       "Use DatastoreKeyInputReader instead.")

        entity_kind_name = params[cls.ENTITY_KIND_PARAM]
        try:
            util.for_name(entity_kind_name)
        except ImportError, e:
            raise BadReaderParamsError("Bad entity kind: %s" % e)
Ejemplo n.º 14
0
  def get_hooks(self):
    """Returns a hooks.Hooks class or None if no hooks class has been set."""
    if self.__hooks is None and self.hooks_class_name is not None:
      hooks_class = util.for_name(self.hooks_class_name)
      if not isinstance(hooks_class, type):
        raise ValueError("hooks_class_name must refer to a class, got %s" %
                         type(hooks_class).__name__)
      if not issubclass(hooks_class, hooks.Hooks):
        raise ValueError(
            "hooks_class_name must refer to a hooks.Hooks subclass")
      self.__hooks = hooks_class()

    return self.__hooks
Ejemplo n.º 15
0
    def __init__(self, filters, model_class_path):
        """Init.

    Args:
      filters: user supplied filters. Each filter should be a list or tuple of
        format (<property_name_as_str>, <query_operator_as_str>,
        <value_of_certain_type>). Value type should satisfy the property's type.
      model_class_path: full path to the model class in str.
    """
        self.filters = filters
        self.model_class_path = model_class_path
        self.model_class = util.for_name(self.model_class_path)
        self.prop, self.start, self.end = self._get_range_from_filters(self.filters, self.model_class)
Ejemplo n.º 16
0
  def get_hooks(self):
    """Returns a hooks.Hooks class or None if no hooks class has been set."""
    if self.__hooks is None and self.hooks_class_name is not None:
      hooks_class = util.for_name(self.hooks_class_name)
      if not isinstance(hooks_class, type):
        raise ValueError("hooks_class_name must refer to a class, got %s" %
                         type(hooks_class).__name__)
      if not issubclass(hooks_class, hooks.Hooks):
        raise ValueError(
            "hooks_class_name must refer to a hooks.Hooks subclass")
      self.__hooks = hooks_class(self)

    return self.__hooks
Ejemplo n.º 17
0
    def __init__(self, filters, model_class_path):
        """Init.

    Args:
      filters: user supplied filters. Each filter should be a list or tuple of
        format (<property_name_as_str>, <query_operator_as_str>,
        <value_of_certain_type>). Value type should satisfy the property's type.
      model_class_path: full path to the model class in str.
    """
        self.filters = filters
        self.model_class_path = model_class_path
        self.model_class = util.for_name(self.model_class_path)
        self.prop, self.start, self.end = self._get_range_from_filters(
            self.filters, self.model_class)
Ejemplo n.º 18
0
    def _iter_key_range(self, k_range):
        cursor = None
        while True:
            query = k_range.make_ascending_query(util.for_name(self._entity_kind))
            if cursor:
                query.with_cursor(cursor)

            results = query.fetch(limit=self._batch_size)
            if not results:
                break

            for model_instance in results:
                key = model_instance.key()
                yield key, model_instance
            cursor = query.cursor()
  def get_handler(self):
    """Get mapper handler instance.

    Returns:
      cached handler instance as callable.
    """
    if self.__handler is None:
      resolved_spec = util.for_name(self.handler_spec)
      if isinstance(resolved_spec, type):
        self.__handler = resolved_spec()
      elif isinstance(resolved_spec, types.MethodType):
        self.__handler = getattr(resolved_spec.im_class(),
                                 resolved_spec.__name__)
      else:
        self.__handler = resolved_spec
    return self.__handler
Ejemplo n.º 20
0
    def get_handler(self):
        """Get mapper handler instance.

    Returns:
      cached handler instance as callable.
    """
        if self.__handler is None:
            resolved_spec = util.for_name(self.handler_spec)
            if isinstance(resolved_spec, type):
                self.__handler = resolved_spec()
            elif isinstance(resolved_spec, types.MethodType):
                self.__handler = getattr(resolved_spec.im_class(),
                                         resolved_spec.__name__)
            else:
                self.__handler = resolved_spec
        return self.__handler
Ejemplo n.º 21
0
    def _iter_key_range(self, k_range):
        cursor = None
        while True:
            query = k_range.make_ascending_query(
                util.for_name(self._entity_kind))
            if cursor:
                query.with_cursor(cursor)

            results = query.fetch(limit=self._batch_size)
            if not results:
                break

            for model_instance in results:
                key = model_instance.key()
                yield key, model_instance
            cursor = query.cursor()
  def validate(cls, job_config):
    """Inherit docs."""
    super(ModelDatastoreInputReader, cls).validate(job_config)
    params = job_config.input_reader_params
    entity_kind = params[cls.ENTITY_KIND_PARAM]

    try:
      model_class = util.for_name(entity_kind)
    except ImportError as e:
      raise errors.BadReaderParamsError("Bad entity kind: %s" % e)
    if cls.FILTERS_PARAM in params:
      filters = params[cls.FILTERS_PARAM]
      if issubclass(model_class, db.Model):
        cls._validate_filters(filters, model_class)
      else:
        cls._validate_filters_ndb(filters, model_class)
      property_range.PropertyRange(filters, entity_kind)
    def validate(cls, job_config):
        """Inherit docs."""
        super(ModelDatastoreInputReader, cls).validate(job_config)
        params = job_config.input_reader_params
        entity_kind = params[cls.ENTITY_KIND_PARAM]

        try:
            model_class = util.for_name(entity_kind)
        except ImportError as e:
            raise errors.BadReaderParamsError("Bad entity kind: %s" % e)
        if cls.FILTERS_PARAM in params:
            filters = params[cls.FILTERS_PARAM]
            if issubclass(model_class, db.Model):
                cls._validate_filters(filters, model_class)
            else:
                cls._validate_filters_ndb(filters, model_class)
            property_range.PropertyRange(filters, entity_kind)
Ejemplo n.º 24
0
  def __iter__(self):
    self._query = self._key_range.make_ascending_query(
        util.for_name(self._query_spec.model_class_path),
        filters=self._query_spec.filters)

    if isinstance(self._query, db.Query):
      if self._cursor:
        self._query.with_cursor(self._cursor)
      for model_instance in self._query.run(
          batch_size=self._query_spec.batch_size,
          keys_only=self._query_spec.keys_only):
        yield model_instance
    else:
      self._query = self._query.iter(batch_size=self._query_spec.batch_size,
                                     keys_only=self._query_spec.keys_only,
                                     start_cursor=self._cursor,
                                     produce_cursors=True)
      for model_instance in self._query:
        yield model_instance
Ejemplo n.º 25
0
  def __iter__(self):
    self._query = self._key_range.make_ascending_query(
        util.for_name(self._query_spec.model_class_path),
        filters=self._query_spec.filters)

    if isinstance(self._query, db.Query):
      if self._cursor:
        self._query.with_cursor(self._cursor)
      for model_instance in self._query.run(
          batch_size=self._query_spec.batch_size,
          keys_only=self._query_spec.keys_only):
        yield model_instance
    else:
      self._query = self._query.iter(batch_size=self._query_spec.batch_size,
                                     keys_only=self._query_spec.keys_only,
                                     start_cursor=self._cursor,
                                     produce_cursors=True)
      for model_instance in self._query:
        yield model_instance
Ejemplo n.º 26
0
 def tx():
   operation = DatastoreAdminOperation.get(operation_key)
   if mapreduce_id in operation.active_job_ids:
     operation.active_jobs -= 1
     operation.completed_jobs += 1
     operation.active_job_ids.remove(mapreduce_id)
   if not operation.active_jobs:
     if operation.status == DatastoreAdminOperation.STATUS_ACTIVE:
       operation.status = DatastoreAdminOperation.STATUS_COMPLETED
     db.delete(DatastoreAdminOperationJob.all().ancestor(operation),
               config=db_config)
   operation.put(config=db_config)
   if 'done_callback_handler' in mapreduce_params:
     done_callback_handler = util.for_name(
         mapreduce_params['done_callback_handler'])
     if done_callback_handler:
       done_callback_handler(operation, mapreduce_id, mapreduce_state)
     else:
       logging.error('done_callbackup_handler %s was not found',
                     mapreduce_params['done_callback_handler'])
Ejemplo n.º 27
0
 def tx():
   operation = DatastoreAdminOperation.get(operation_key)
   if mapreduce_id in operation.active_job_ids:
     operation.active_jobs -= 1
     operation.completed_jobs += 1
     operation.active_job_ids.remove(mapreduce_id)
   if not operation.active_jobs:
     if operation.status == DatastoreAdminOperation.STATUS_ACTIVE:
       operation.status = DatastoreAdminOperation.STATUS_COMPLETED
     db.delete(DatastoreAdminOperationJob.all().ancestor(operation),
               config=db_config)
   operation.put(config=db_config)
   if 'done_callback_handler' in mapreduce_params:
     done_callback_handler = util.for_name(
         mapreduce_params['done_callback_handler'])
     if done_callback_handler:
       done_callback_handler(operation, mapreduce_id, mapreduce_state)
     else:
       logging.error('done_callbackup_handler %s was not found',
                     mapreduce_params['done_callback_handler'])