def load(cls, path, dst, *args, **kwargs): # assume read mode, but also check args and kwargs mode = "r" if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") # open zip file and extract to dst with zipfile.ZipFile(get_path(path), mode, *args, **kwargs) as f: f.extractall(get_path(dst))
def load(cls, path, dst, *args, **kwargs): # assume read mode with inferred compression, but also check args and kwargs compression = cls.infer_compression(path) mode = "r" if not compression else "r:" + compression if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") # open zip file and extract to dst with tarfile.open(get_path(path), mode, *args, **kwargs) as f: f.extractall(get_path(dst))
def load(cls, path, dst, *args, **kwargs): # get the mode from args and kwargs, default to read mode with inferred compression if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") else: compression = cls.infer_compression(path) mode = "r" if not compression else "r:" + compression # open zip file and extract to dst with tarfile.open(get_path(path), mode, *args, **kwargs) as f: f.extractall(get_path(dst))
def __init__(self, path=None, fs=LocalFileSystem.default_instance, is_tmp=False, tmp_dir=None, **kwargs): if isinstance(fs, six.string_types): fs = LocalFileSystem(fs) # handle tmp paths manually since luigi uses the env tmp dir if not path: if not is_tmp: raise Exception("either path or is_tmp must be set") # if not set, get the tmp dir from the config and ensure that it exists cfg = Config.instance() if tmp_dir: tmp_dir = get_path(tmp_dir) else: tmp_dir = os.path.realpath( cfg.get_expanded("target", "tmp_dir")) if not fs.exists(tmp_dir): perm = cfg.get_expanded_int("target", "tmp_dir_perm") fs.mkdir(tmp_dir, perm=perm) # create a random path while True: basename = "luigi-tmp-{:09d}".format( random.randint(0, 999999999)) path = os.path.join(tmp_dir, basename) if not fs.exists(path): break # is_tmp might be a file extension if isinstance(is_tmp, six.string_types): if is_tmp[0] != ".": is_tmp = "." + is_tmp path += is_tmp else: # ensure path is not a target and does not contain a scheme path = fs._unscheme(get_path(path)) # make absolute when not starting with a variable if not path.startswith(("$", "~")): path = os.path.abspath(path) super(LocalTarget, self).__init__(path=path, is_tmp=is_tmp, fs=fs, **kwargs)
def move_from_local(self, src=None, perm=None, dir_perm=None, **kwargs): src = add_scheme(self.fs.local_fs.abspath(get_path(src)), "file") return FileSystemFileTarget.move_from(self, src, perm=perm, dir_perm=dir_perm, **kwargs)
def load(cls, path, *args, **kwargs): import ROOT ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch() import root_numpy return root_numpy.root2array(get_path(path), *args, **kwargs)
def move_to_local(self, dst=None, dir_perm=None, **kwargs): if dst: dst = add_scheme(_local_fs.abspath(get_path(dst)), "file") return FileSystemFileTarget.move_to(self, dst, dir_perm=dir_perm, **kwargs)
def dump(cls, path, session, output_names, *args, **kwargs): """ Takes a TensorFlow *session* object (compatible with the v1 API), converts its contained graph into a simpler version with variables translated into constant tensors, and saves it to a protobuf file at *path*. *output_numes* must be a list of names of output tensors to save. In turn, TensorFlow internally determines which subgraph(s) to convert and save. All *args* and *kwargs* are forwarded to :py:func:`tf.compat.v1.train.write_graph`. .. note:: When used with TensorFlow v2, this function requires the v1 API compatibility layer. When :py:attr:`tf.compat.v1` is not available, a *NotImplementedError* is raised. """ _, tf1 = cls.import_tf() # complain when the v1 compatibility layer is not existing if not tf1: raise NotImplementedError( "the v1 compatibility layer of TensorFlow v2 is missing, but " "required") # convert the graph constant_graph = tf1.graph_util.convert_variables_to_constants( session, session.graph.as_graph_def(), output_names) # default as_text value kwargs.setdefault("as_text", path.endswith((".pbtxt", ".pb.txt"))) # write the graph graph_dir, graph_name = os.path.split(get_path(path)) return tf1.train.write_graph(constant_graph, graph_dir, graph_name, *args, **kwargs)
def dump(cls, path, model, *args, **kwargs): # deprecation warning until v0.1 logger.warning( "law.contrib.keras.TFKerasModelFormatter is deprecated, please use " "law.contrib.tensorflow.TFKerasModelFormatter (named 'tf_keras_model') instead" ) model.save(get_path(path), *args, **kwargs)
def dump(cls, path, session, output_names, *args, **kwargs): import tensorflow as tf graph_dir, graph_name = os.path.split(get_path(path)) const_graph = tf.graph_util.convert_variables_to_constants( session, session.graph.as_graph_def(), output_names) tf.train.write_graph(const_graph, graph_dir, graph_name, *args, **kwargs)
def dump(cls, path, src, *args, **kwargs): # assume write mode, but also check args and kwargs mode = "w" if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") # open a new zip file and add all files in src with zipfile.ZipFile(get_path(path), mode, *args, **kwargs) as f: src = get_path(src) if os.path.isfile(src): f.write(src, os.path.basename(src)) else: for elem in os.listdir(src): f.write(os.path.join(src, elem), elem)
def infer_compression(cls, path): path = get_path(path) if path.endswith(".tar.gz") or path.endswith(".tgz"): return "gz" elif path.endswith(".tbz2") or path.endswith(".bz2"): return "bz2" else: return None
def bundle(self, dst_path): cmd = [rel_path(__file__, "bundle_repository.sh"), self.repo_path, get_path(dst_path)] cmd += [" ".join(self.exclude_files)] cmd += [" ".join(self.include_files)] code = interruptable_popen(cmd)[0] if code != 0: raise Exception("repository bundling failed")
def load(cls, path, create_session=None, as_text=None): """ Reads a saved TensorFlow graph from *path* and returns it. When *create_session* is *True*, a session object (compatible with the v1 API) is created and returned as the second value of a 2-tuple. The default value of *create_session* is *True* when TensorFlow v1 is detected, and *False* otherwise. When *as_text* is either *True*, or *None* and the file extension is ``".pbtxt"`` or ``".pb.txt"``, the content of the file at *path* is expected to be a human-readable text file. Otherwise, it is read as a binary protobuf file. Example: .. code-block:: python graph = TFConstantGraphFormatter.load("path/to/model.pb", create_session=False) graph, session = TFConstantGraphFormatter.load("path/to/model.pb", create_session=True) """ tf, tf1, tf_version = cls.import_tf() path = get_path(path) # default create_session value if create_session is None: create_session = tf_version[0] == "1" if create_session and not tf1: raise NotImplementedError( "the v1 compatibility layer of TensorFlow v2 is missing, " "but required by when create_session is True") # default as_text value if as_text is None: as_text = path.endswith((".pbtxt", ".pb.txt")) graph = tf.Graph() with graph.as_default(): graph_def = graph.as_graph_def() if as_text: # use a simple pb reader to load the file into graph_def from google.protobuf import text_format with open(path, "rb") as f: text_format.Merge(f.read(), graph_def) else: # use the gfile api depending on the TF version if tf_version[0] == "1": from tensorflow.python.platform import gfile with gfile.FastGFile(path, "rb") as f: graph_def.ParseFromString(f.read()) else: with tf.io.gfile.GFile(path, "rb") as f: graph_def.ParseFromString(f.read()) # import the graph_def (pb object) into the actual graph tf.import_graph_def(graph_def, name="") if create_session: session = tf1.Session(graph=graph) return graph, session else: return graph
def load(cls, path, *args, **kwargs): path = get_path(path) if path.endswith(".parquet"): import awkward as ak return ak.from_parquet(path, *args, **kwargs) # .pickle, .pkl return PickleFormatter.load(path, *args, **kwargs)
def bundle(self, dst_path): bundle_script = rel_path(__file__, "scripts", "bundle_repository.sh") cmd = [bundle_script, self.get_repo_path(), get_path(dst_path)] cmd += [" ".join(self.exclude_files)] cmd += [" ".join(self.include_files)] code = interruptable_popen(cmd, executable="/bin/bash")[0] if code != 0: raise Exception("repository bundling failed")
def dump(cls, path, obj, *args, **kwargs): path = get_path(path) if path.endswith(".parquet"): import awkward as ak return ak.to_parquet(obj, path, *args, **kwargs) # .pickle, .pkl return PickleFormatter.dump(path, obj, *args, **kwargs)
def load(cls, path, *args, **kwargs): # deprecation warning until v0.1 logger.warning( "law.contrib.keras.TFKerasModelFormatter is deprecated, please use " "law.contrib.tensorflow.TFKerasModelFormatter (named 'tf_keras_model') instead" ) import tensorflow as tf return tf.keras.models.load_model(get_path(path), *args, **kwargs)
def copy_to_local(self, dst=None, perm=None, dir_perm=None, **kwargs): if dst: dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file") dst = FileSystemFileTarget.copy_to(self, dst, perm=perm, dir_perm=dir_perm, **kwargs) return remove_scheme(dst)
def load(cls, path, *args, **kwargs): import ROOT tfile = ROOT.TFile.Open(get_path(path), *args, **kwargs) try: yield tfile finally: if tfile.IsOpen(): tfile.Close()
def bundle(self, dst_path): bundle_script = rel_path(__file__, "scripts", "bundle_cmssw.sh") cmd = [bundle_script, self.cmssw_path, get_path(dst_path)] if self.exclude != NO_STR: cmd += [self.exclude] code = interruptable_popen(cmd)[0] if code != 0: raise Exception("cmssw bundling failed")
def infer_compression(cls, path): path = get_path(path) if path.endswith((".tar.gz", ".tgz")): return "gz" elif path.endswith((".tar.bz2", ".tbz2", ".bz2")): return "bz2" elif path.endswith((".tar.xz", ".txz", ".lzma")): return "xz" else: return None
def load(cls, path, graph_def=None): import tensorflow as tf from tensorflow.python.platform import gfile if not graph_def: graph_def = tf.GraphDef() with gfile.FastGFile(get_path(path), "rb") as f: graph_def.ParseFromString(f.read()) return graph_def
def dump(cls, path, src, *args, **kwargs): # assume write mode with inferred compression, but also check args and kwargs compression = cls.infer_compression(path) mode = "w" if not compression else "w:" + compression if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") # get the filter callback that is forwarded to add() _filter = kwargs.pop("filter", None) # open a new zip file and add all files in src with tarfile.open(get_path(path), mode, *args, **kwargs) as f: src = get_path(src) if os.path.isfile(src): f.add(src, os.path.basename(src), filter=_filter) else: for elem in os.listdir(src): f.add(os.path.join(src, elem), elem, filter=_filter)
def dump(cls, path, model, *args, **kwargs): path = get_path(path) # the method for saving the model depends on the file extension if path.endswith(".json"): with open(path, "w") as f: f.write(model.to_json()) elif path.endswith((".yml", ".yaml")): with open(path, "w") as f: f.write(model.to_yaml()) else: # .hdf5, .h5, bundle return model.save(path, *args, **kwargs)
def bundle(self, dst_path): cmd = "{} \"{}\" \"{}\" \"{}\" \"{}\"".format( rel_path(__file__, "scripts", "bundle_repository.sh"), self.get_repo_path(), get_path(dst_path), " ".join(self.exclude_files), " ".join(self.include_files), ) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("repository bundling failed")
def bundle(self, dst_path): cmd = [ rel_path(__file__, "scripts", "bundle_cmssw.sh"), self.get_cmssw_path(), get_path(dst_path), ] if self.exclude != NO_STR: cmd += [self.exclude] cmd = quote_cmd(cmd) code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0] if code != 0: raise Exception("cmssw bundling failed")
def dump(cls, path, src, *args, **kwargs): # get the mode from args and kwargs, default to write mode with inferred compression if args: mode = args[0] args = args[1:] elif "mode" in kwargs: mode = kwargs.pop("mode") else: compression = cls.infer_compression(path) mode = "w" if not compression else "w:" + compression # get the filter callback that is forwarded to add() _filter = kwargs.pop("filter", None) # open a new zip file and add all files in src with tarfile.open(get_path(path), mode, *args, **kwargs) as f: srcs = [os.path.abspath(get_path(src)) for src in make_list(src)] common_prefix = os.path.commonprefix(srcs) for src in srcs: f.add(src, arcname=os.path.relpath(src, common_prefix), filter=_filter)
def load(cls, path, *args, **kwargs): path = get_path(path) if path.endswith(".root"): from coffea.nanoevents import NanoEventsFactory return NanoEventsFactory.from_root(path, *args, **kwargs) if path.endswith(".parquet"): from coffea.nanoevents import NanoEventsFactory return NanoEventsFactory.from_parquet(path, *args, **kwargs) # .coffea from coffea.util import load return load(path, *args, **kwargs)
def load(cls, path, *args, **kwargs): import tensorflow as tf path = get_path(path) # the method for loading the model depends on the file extension if path.endswith(".json"): with open(path, "r") as f: return tf.keras.models.model_from_json(f.read(), *args, **kwargs) elif path.endswith((".yml", ".yaml")): with open(path, "r") as f: return tf.keras.models.model_from_yaml(f.read(), *args, **kwargs) else: # .hdf5, .h5, bundle return tf.keras.models.load_model(path, *args, **kwargs)