def save(self, path, dependency_path, aws_credentials = {}): """ Save predictive object to the given path Parameters ---------- path : str The location to save the predictive object to """ # only support saving to local or S3 for now if (not fu.is_s3_path(path)) and (not fu.is_local_path(path)): raise RuntimeError("Only save to local and S3 path is supported, cannot \ save predictive object to path %s. " % path) if (fu.is_s3_path(path)): self._save_s3(path, dependency_path, aws_credentials) else: if os.path.exists(path): if os.path.isfile(path): __logger__.warning("Overwriting existing file '%s' when saving predictive object" % path) else: raise RuntimeError("Path %s already exists, please remove that and save again" % path) self._save_local(path, dependency_path, aws_credentials) tracker = _mt._get_metric_tracker() tracker.track('deploy.predictive_service.predictive_object', value=1, properties={ 'type': self.__class__.__name__, 'dependencies': len(self.dependencies) if self.dependencies else 0} )
def _load_local(cls, path): path = fu.expand_full_path(path) if not os.path.isfile(path): raise RuntimeError("path %s is not a file, a tar.gz file is expected." % path) rm_tmp_tree = True try: po_path = tempfile.mkdtemp() fu.unzip_directory(path, po_path) po_object_path = os.path.join(po_path, 'definition') # load the describe describe = cls._load_object(cls._get_describe_path(po_object_path)) po_schema_version = describe.get('schema_version', PREDICTIVE_OBJECT_SCHEMA_VERSION) if po_schema_version > PREDICTIVE_OBJECT_SCHEMA_VERSION: raise RuntimeError("Your GraphLab Create only supports Predictive " "Objects with schema version up to '%s', the Predictive Object " "you are trying to load has schema version '%s'. Please upgrade your " "GraphLab Create version to most up-to-date one." % \ (PREDICTIVE_OBJECT_SCHEMA_VERSION, po_schema_version)) po_obj = None # load the instance try: with open(cls._get_definition_path(po_object_path), 'rb') as f: po_obj = _pickle.load(f) except Exception as e: __logger__.error('Unable to load object: %s' % (e.message)) raise po_obj.description = str(describe['description']) po_obj.schema_version = int(describe['schema_version']) __logger__.info('Loaded predictive object of type: %s' % (type(po_obj))) # load all dependencies dependencies = describe['dependencies'] gl_objects = {} for (uri, gl_obj_info) in dependencies.iteritems(): gl_obj_type = gl_obj_info['type'] gl_obj_path = gl_obj_info['path'] # v1.0 format is different from v1.1 # v1.0 we save all dependent objects together with the PO definition and # remember relative path of the dependent object # v1.1 we save absolute path of the GraphLab objects so the objects are # directly loaded from s3 if fu.is_local_path(gl_obj_path) and not os.path.isabs(gl_obj_path): rm_tmp_tree = False gl_obj_path = os.path.join(po_object_path, 'GraphLabObjects', uri) __logger__.info("Loading old format Predictive Object dependencies, changed \ path from '%s' to '%s'" % (gl_obj_info['path'], gl_obj_path)) gl_objects[uri] = cls._load_graphlab_object(gl_obj_type, gl_obj_path) po_obj.dependencies = gl_objects finally: # For v1.0 since we are downloading all GraphLab objects and load GraphLab # objects from there, we cannot really remove the folder. For v1.1 it is # fine because GraphLab server loads all GraphLab objects from S3 and manages # temp objects life time if rm_tmp_tree: shutil.rmtree(po_path) return po_obj
def _save_imp(self, po_path, dependency_path, aws_credentials): '''Save the predictive object to a directory The files for a predictive object are laid out the following way: po_path/definition/meta -- serialized json file about the predictive object, including: description, dependencies, etc. po_path/definition/definition -- cloudpickle-serialized PredictiveObject dependency_path -- all dependent GraphLab objects, each in its own directory: dependency_path/uri1/ -- serialized GraphLab object with uri1 dependency_path/uri2/ -- serialized GraphLab object with uri2 ''' fu.create_directory(po_path) describe = { 'description': self.description, 'dependencies': {}, 'schema_version' : self.schema_version } for (uri, gl_obj) in self.dependencies.iteritems(): # If it isn't already saved, save it. temp_path = None try: if not fu.is_path(gl_obj): obj_type = self._get_graphlab_object_type(gl_obj) temp_path = tempfile.mkdtemp() __logger__.info("Saving dependent GraphLab %s (%s) locally to '%s' " % (obj_type, uri, temp_path)) gl_obj.save(temp_path) gl_obj = temp_path else: obj_type = get_graphlab_object_type(gl_obj) # Copy the saved object without loading it. save_path = os.path.join(dependency_path, uri) __logger__.info("Copying dependent GraphLab %s(%s) from '%s' to '%s' " % (obj_type, uri, gl_obj, save_path)) if fu.is_s3_path(gl_obj) and fu.is_s3_path(save_path): fu.intra_s3_copy_model(gl_obj, save_path, aws_credentials) elif fu.is_local_path(gl_obj) and fu.is_s3_path(save_path): fu.s3_copy_model(gl_obj, save_path, aws_credentials) elif fu.is_local_path(gl_obj) and fu.is_local_path(save_path): # Useful for unit tests shutil.copytree(gl_obj, save_path) else: raise RuntimeError("Copy GraphLab object from S3 to local path is not supported. GraphLab object path: %s, save path: %s" % (gl_obj, save_path)) finally: if temp_path: shutil.rmtree(temp_path) # add to the global describe dictionary describe['dependencies'][uri] = { 'path': save_path, 'type': obj_type } # persist the global description describe_path = self._get_describe_path(po_path) self._save_object(describe_path, describe) # persist the definition of myself definition_path = self._get_definition_path(po_path) try: with open(definition_path, 'wb') as f: _cloudpickle.dump(self, f) except Exception as e: __logger__.error('Unable to save object: %s' % (e.message)) raise e