コード例 #1
0
  def save(self, path, dependency_path, aws_credentials = {}):
    """ Save predictive object to the given path

    Parameters
    ----------
    path : str
      The location to save the predictive object to
    """
    # only support saving to local or S3 for now
    if (not fu.is_s3_path(path)) and (not fu.is_local_path(path)):
      raise RuntimeError("Only save to local and S3 path is supported, cannot \
        save predictive object to path %s. " % path)

    if (fu.is_s3_path(path)):
      self._save_s3(path, dependency_path, aws_credentials)
    else:
      if os.path.exists(path):
        if os.path.isfile(path):
          __logger__.warning("Overwriting existing file '%s' when saving predictive object" % path)
        else:
          raise RuntimeError("Path %s already exists, please remove that and save again" % path)

      self._save_local(path, dependency_path, aws_credentials)

      tracker = _mt._get_metric_tracker()
      tracker.track('deploy.predictive_service.predictive_object',
          value=1,
          properties={
          'type': self.__class__.__name__,
          'dependencies': len(self.dependencies) if self.dependencies else 0}
      )
コード例 #2
0
  def _load_local(cls, path):
    path = fu.expand_full_path(path)

    if not os.path.isfile(path):
      raise RuntimeError("path %s is not a file, a tar.gz file is expected." % path)

    rm_tmp_tree = True
    try:
      po_path = tempfile.mkdtemp()
      fu.unzip_directory(path, po_path)

      po_object_path = os.path.join(po_path, 'definition')

      # load the describe
      describe = cls._load_object(cls._get_describe_path(po_object_path))

      po_schema_version = describe.get('schema_version', PREDICTIVE_OBJECT_SCHEMA_VERSION)
      if po_schema_version > PREDICTIVE_OBJECT_SCHEMA_VERSION:
          raise RuntimeError("Your GraphLab Create only supports Predictive "
              "Objects with schema version up to '%s', the Predictive Object "
              "you are trying to load has schema version '%s'. Please upgrade your "
              "GraphLab Create version to most up-to-date one." % \
              (PREDICTIVE_OBJECT_SCHEMA_VERSION, po_schema_version))

      po_obj = None

      # load the instance
      try:
        with open(cls._get_definition_path(po_object_path), 'rb') as f:
          po_obj = _pickle.load(f)
      except Exception as e:
        __logger__.error('Unable to load object: %s' % (e.message))
        raise

      po_obj.description = str(describe['description'])
      po_obj.schema_version = int(describe['schema_version'])

      __logger__.info('Loaded predictive object of type: %s' % (type(po_obj)))

      # load all dependencies
      dependencies = describe['dependencies']
      gl_objects = {}
      for (uri, gl_obj_info) in dependencies.iteritems():
        gl_obj_type = gl_obj_info['type']
        gl_obj_path = gl_obj_info['path']

        # v1.0 format is different from v1.1
        # v1.0 we save all dependent objects together with the PO definition and
        #   remember relative path of the dependent object
        # v1.1 we save absolute path of the GraphLab objects so the objects are
        #   directly loaded from s3
        if fu.is_local_path(gl_obj_path) and not os.path.isabs(gl_obj_path):
          rm_tmp_tree = False
          gl_obj_path = os.path.join(po_object_path, 'GraphLabObjects', uri)
          __logger__.info("Loading old format Predictive Object dependencies, changed \
            path from '%s' to '%s'" % (gl_obj_info['path'], gl_obj_path))

        gl_objects[uri] = cls._load_graphlab_object(gl_obj_type, gl_obj_path)

      po_obj.dependencies = gl_objects

    finally:
      # For v1.0 since we are downloading all GraphLab objects and load GraphLab
      # objects from there, we cannot really remove the folder. For v1.1 it is
      # fine because GraphLab server loads all GraphLab objects from S3 and manages
      # temp objects life time
      if rm_tmp_tree:
        shutil.rmtree(po_path)

    return po_obj
コード例 #3
0
  def _save_imp(self, po_path, dependency_path, aws_credentials):
    '''Save the predictive object to a directory

      The files for a predictive object are laid out the following way:

        po_path/definition/meta -- serialized json file about the predictive
          object, including: description, dependencies, etc.
        po_path/definition/definition -- cloudpickle-serialized PredictiveObject
        dependency_path -- all dependent GraphLab objects, each in its
          own directory:
          dependency_path/uri1/ -- serialized GraphLab object with uri1
          dependency_path/uri2/ -- serialized GraphLab object with uri2
      '''
    fu.create_directory(po_path)

    describe = {
      'description': self.description,
      'dependencies': {},
      'schema_version' : self.schema_version
    }

    for (uri, gl_obj) in self.dependencies.iteritems():

      # If it isn't already saved, save it.
      temp_path = None
      try:
        if not fu.is_path(gl_obj):
          obj_type = self._get_graphlab_object_type(gl_obj)
          temp_path = tempfile.mkdtemp()

          __logger__.info("Saving dependent GraphLab %s (%s) locally to '%s' " % (obj_type, uri, temp_path))
          gl_obj.save(temp_path)
          gl_obj = temp_path
        else:
          obj_type = get_graphlab_object_type(gl_obj)

        # Copy the saved object without loading it.
        save_path = os.path.join(dependency_path, uri)

        __logger__.info("Copying dependent GraphLab %s(%s) from '%s' to '%s' " % (obj_type, uri, gl_obj, save_path))

        if fu.is_s3_path(gl_obj) and fu.is_s3_path(save_path):
          fu.intra_s3_copy_model(gl_obj, save_path, aws_credentials)
        elif fu.is_local_path(gl_obj) and fu.is_s3_path(save_path):
          fu.s3_copy_model(gl_obj, save_path, aws_credentials)
        elif fu.is_local_path(gl_obj) and fu.is_local_path(save_path):
          # Useful for unit tests
          shutil.copytree(gl_obj, save_path)
        else:
          raise RuntimeError("Copy GraphLab object from S3 to local path is not supported. GraphLab object path: %s, save path: %s" % (gl_obj, save_path))
      finally:
        if temp_path:
          shutil.rmtree(temp_path)

      # add to the global describe dictionary
      describe['dependencies'][uri] = {
        'path': save_path,
        'type': obj_type
      }

    # persist the global description
    describe_path = self._get_describe_path(po_path)
    self._save_object(describe_path, describe)

    # persist the definition of myself
    definition_path = self._get_definition_path(po_path)
    try:
      with open(definition_path, 'wb') as f:
        _cloudpickle.dump(self, f)
    except Exception as e:
      __logger__.error('Unable to save object: %s' % (e.message))
      raise e