def _deserialize_output(self, task): """ Deserialize the output from a task. Parameters ---------- Task definition of interest. Returns ------- The output of the run-time task associated with the task definition. """ filepath = self._task_output_paths[task] non_hdfs_file_path = filepath # Unpickler has no support for passing in additional HADOOP_CONF_DIR # so we download HDFS folder first before calling to unpickler if _file_util.is_hdfs_path(filepath): non_hdfs_file_path = _make_temp_directory("job_output_") _file_util.download_from_hdfs(filepath, non_hdfs_file_path, hadoop_conf_dir=self.environment.hadoop_conf_dir, is_dir = True) unpickler = gl_pickle.GLUnpickler(non_hdfs_file_path) # We cannot delete this temporary file path becaue SFrame lazily load # the content from disk. But the temporary folder will be removed # eventually when the python session goes away return unpickler.load()
def load_artifact(filename): """ Load an artifact object. Parameters ---------- filename : A GLPickle archive filename. """ unpickler = gl_pickle.GLUnpickler(filename) # Get the version version = unpickler.load() # Load the constructor that knows how to load a empty object post_import_cls = unpickler.load() # Construct an empty object by following the module to the constructor module = eval(post_import_cls.__module__) # requires graphlab has been imported cls = module.__getattribute__(post_import_cls.__name__) # Load the object with the right version. obj = cls._load_version(unpickler, version) unpickler.close() # Return the object return obj
def _deserialize(file_path): """ Takes a path to a serialized job file. Returns the deserialized job object. """ unpickler = gl_pickle.GLUnpickler(file_path) ret = unpickler.load() unpickler.close() return ret
def _load_local(cls, path): path = fu.expand_full_path(path) if not os.path.exists(path): raise RuntimeError("Path %s does not exist." % path) try: unpickler = _gl_pickle.GLUnpickler(path) po_schema_version = unpickler.load() required_files = unpickler.load() # layout the required files before loading the function # Required files are moved to be managed separatly from the Predictive # object since schema version 4 (jumped to 6 to be inline with PS version) # so no longer need to deserialize the required files as part of load if po_schema_version <= 3: cls._deserialize_required_files(required_files) else: # Do not need to load the dependent files after schema version 3 # because the loading is handled in higher level by Predictive # service pass po_obj = unpickler.load() unpickler.close() except Exception as e: import traceback trace = traceback.format_exc() err_msg = "Traceback\n %s\n" % trace err_msg += "Error type : %s\n" % e.__class__.__name__ err_msg += "Error message : %s\n" % str(e) raise RuntimeError('Unable to load predictive object. Error: %s' % err_msg) if po_schema_version > _PREDICTIVE_OBJECT_SCHEMA_VERSION: raise RuntimeError("Your GraphLab Create only supports Predictive " "Objects with schema version up to '%s', the Predictive Object " "you are trying to load has schema version '%s'. Please upgrade your " "GraphLab Create version to most up-to-date one." % \ (_PREDICTIVE_OBJECT_SCHEMA_VERSION, po_schema_version)) return po_obj
def model_wrapper(unity_proxy): # Load the proxy object. This returns a proxy object with # 'temp_file' set to where the object is pickled. model_proxy = proxy_wrapper(unity_proxy) temp_file = model_proxy.temp_file # Setup the unpickler. unpickler = gl_pickle.GLUnpickler(temp_file) # Get the version version = unpickler.load() # Load the class name. cls_name = unpickler.load() cls = _get_class_from_name(cls_name) # Load the object with the right version. obj = cls._load_version(unpickler, version) # Return the object return obj
def _get_map_job_results(self,_silent=True): ''' Get results of all map jobs. Returns -------- job outputs : list A list of results from the job. if a certain job failed, the result would be None ''' result_folder = self.get_path_join_method()(self._exec_dir, 'output') __LOGGER__.info("Retrieving job results from %s..." % result_folder) if _file_util.is_local_path(result_folder): local_folder = result_folder else: local_folder = self._download_remote_folder_to_local(result_folder, silent=True) output = [] for t in self._stages[0]: try: task_output_file = self._task_output_paths[t] local_file = self.get_path_join_method()( local_folder, _os.path.split(task_output_file)[1]) unpickler = gl_pickle.GLUnpickler(local_file) output.append(unpickler.load()) except Exception as e: if not _silent: __LOGGER__.warning("Ignored exception when retrieving result for task %s, error: %s" % (t.name, e)) output.append(None) # Alert --cannot remove the temp result folder because the result SFrame # may depend on the files to exist on disk return output
def load_model(location): """ Load any GraphLab Create model that was previously saved. This function assumes the model (can be any model) was previously saved in GraphLab Create model format with model.save(filename). Parameters ---------- location : string Location of the model to load. Can be a local path or a remote URL. Because models are saved as directories, there is no file extension. Examples ---------- >>> model.save('my_model_file') >>> loaded_model = gl.load_model('my_model_file') """ _mt._get_metric_tracker().track('toolkit.model.load_model') # Check if the location is a dir_archive, if not, use glunpickler to load # as pure python model # We need to fix this sometime, but here is the explanation of the stupid # check below: # # If the location is a http location, skip the check, and directly proceed # to load model as dir_archive. This is because # 1) exists() does not work with http protocol, and # 2) GLUnpickler does not support http protocol = file_util.get_protocol(location) dir_archive_exists = False if protocol == '': model_path = file_util.expand_full_path(location) dir_archive_exists = file_util.exists( os.path.join(model_path, 'dir_archive.ini')) else: model_path = location if protocol in ['http', 'https']: dir_archive_exists = True else: import posixpath dir_archive_exists = file_util.exists( posixpath.join(model_path, 'dir_archive.ini')) if not dir_archive_exists: # Not a ToolkitError so try unpickling the model. unpickler = gl_pickle.GLUnpickler(location) # Get the version version = unpickler.load() # Load the class name. cls_name = unpickler.load() cls = _get_class_from_name(cls_name) # Load the object with the right version. model = cls._load_version(unpickler, version) unpickler.close() # Return the model return model else: _internal_url = _make_internal_url(location) return glconnect.get_unity().load_model(_internal_url)