コード例 #1
0
  def __init__(self, model, description = ''):

    if not isinstance(model, graphlab.Model) and not is_path(model):
      raise TypeError('Model must be a GraphLab model or a path to a model.')

    super(ModelPredictiveObject, self).__init__(description)
    self._set_model(model)
コード例 #2
0
  def _set_model(self, model):
    self.dependencies = {'model':model}
    if is_path(model):
      # This is a path, can't do anything till we load it.
      self._model_methods = {}
      return

    self._model_methods = model._get_queryable_methods()
    if type(self._model_methods) != dict:
      raise RuntimeError("_get_queryable_methods for model %s should return a \
        dictionary" % model.__class__)

    for (method, description) in self._model_methods.iteritems():
      if type(description) != dict:
        raise RuntimeError("model %s _get_queryable_methods should use dict as method\
          description."% mode.__class__)

      for (param_name, param_type) in description.iteritems():
        if (param_type not in ['sframe', 'sarray']):
          raise RuntimeError("model %s _get_queryable_methods should only use \
            'sframe' or 'sarray' type. %s is not supported" % (model.__class__, param_type))
コード例 #3
0
  def _save_imp(self, po_path, dependency_path, aws_credentials):
    '''Save the predictive object to a directory

      The files for a predictive object are laid out the following way:

        po_path/definition/meta -- serialized json file about the predictive
          object, including: description, dependencies, etc.
        po_path/definition/definition -- cloudpickle-serialized PredictiveObject
        dependency_path -- all dependent GraphLab objects, each in its
          own directory:
          dependency_path/uri1/ -- serialized GraphLab object with uri1
          dependency_path/uri2/ -- serialized GraphLab object with uri2
      '''
    fu.create_directory(po_path)

    describe = {
      'description': self.description,
      'dependencies': {},
      'schema_version' : self.schema_version
    }

    for (uri, gl_obj) in self.dependencies.iteritems():

      # If it isn't already saved, save it.
      temp_path = None
      try:
        if not fu.is_path(gl_obj):
          obj_type = self._get_graphlab_object_type(gl_obj)
          temp_path = tempfile.mkdtemp()

          __logger__.info("Saving dependent GraphLab %s (%s) locally to '%s' " % (obj_type, uri, temp_path))
          gl_obj.save(temp_path)
          gl_obj = temp_path
        else:
          obj_type = get_graphlab_object_type(gl_obj)

        # Copy the saved object without loading it.
        save_path = os.path.join(dependency_path, uri)

        __logger__.info("Copying dependent GraphLab %s(%s) from '%s' to '%s' " % (obj_type, uri, gl_obj, save_path))

        if fu.is_s3_path(gl_obj) and fu.is_s3_path(save_path):
          fu.intra_s3_copy_model(gl_obj, save_path, aws_credentials)
        elif fu.is_local_path(gl_obj) and fu.is_s3_path(save_path):
          fu.s3_copy_model(gl_obj, save_path, aws_credentials)
        elif fu.is_local_path(gl_obj) and fu.is_local_path(save_path):
          # Useful for unit tests
          shutil.copytree(gl_obj, save_path)
        else:
          raise RuntimeError("Copy GraphLab object from S3 to local path is not supported. GraphLab object path: %s, save path: %s" % (gl_obj, save_path))
      finally:
        if temp_path:
          shutil.rmtree(temp_path)

      # add to the global describe dictionary
      describe['dependencies'][uri] = {
        'path': save_path,
        'type': obj_type
      }

    # persist the global description
    describe_path = self._get_describe_path(po_path)
    self._save_object(describe_path, describe)

    # persist the definition of myself
    definition_path = self._get_definition_path(po_path)
    try:
      with open(definition_path, 'wb') as f:
        _cloudpickle.dump(self, f)
    except Exception as e:
      __logger__.error('Unable to save object: %s' % (e.message))
      raise e