Exemple #1
0
    def load(cls, path, dst, *args, **kwargs):
        # assume read mode, but also check args and kwargs
        mode = "r"
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")

        # open zip file and extract to dst
        with zipfile.ZipFile(get_path(path), mode, *args, **kwargs) as f:
            f.extractall(get_path(dst))
Exemple #2
0
    def load(cls, path, dst, *args, **kwargs):
        # assume read mode with inferred compression, but also check args and kwargs
        compression = cls.infer_compression(path)
        mode = "r" if not compression else "r:" + compression
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")

        # open zip file and extract to dst
        with tarfile.open(get_path(path), mode, *args, **kwargs) as f:
            f.extractall(get_path(dst))
Exemple #3
0
    def load(cls, path, dst, *args, **kwargs):
        # get the mode from args and kwargs, default to read mode with inferred compression
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")
        else:
            compression = cls.infer_compression(path)
            mode = "r" if not compression else "r:" + compression

        # open zip file and extract to dst
        with tarfile.open(get_path(path), mode, *args, **kwargs) as f:
            f.extractall(get_path(dst))
Exemple #4
0
    def __init__(self,
                 path=None,
                 fs=LocalFileSystem.default_instance,
                 is_tmp=False,
                 tmp_dir=None,
                 **kwargs):
        if isinstance(fs, six.string_types):
            fs = LocalFileSystem(fs)

        # handle tmp paths manually since luigi uses the env tmp dir
        if not path:
            if not is_tmp:
                raise Exception("either path or is_tmp must be set")

            # if not set, get the tmp dir from the config and ensure that it exists
            cfg = Config.instance()
            if tmp_dir:
                tmp_dir = get_path(tmp_dir)
            else:
                tmp_dir = os.path.realpath(
                    cfg.get_expanded("target", "tmp_dir"))
            if not fs.exists(tmp_dir):
                perm = cfg.get_expanded_int("target", "tmp_dir_perm")
                fs.mkdir(tmp_dir, perm=perm)

            # create a random path
            while True:
                basename = "luigi-tmp-{:09d}".format(
                    random.randint(0, 999999999))
                path = os.path.join(tmp_dir, basename)
                if not fs.exists(path):
                    break

            # is_tmp might be a file extension
            if isinstance(is_tmp, six.string_types):
                if is_tmp[0] != ".":
                    is_tmp = "." + is_tmp
                path += is_tmp
        else:
            # ensure path is not a target and does not contain a scheme
            path = fs._unscheme(get_path(path))
            # make absolute when not starting with a variable
            if not path.startswith(("$", "~")):
                path = os.path.abspath(path)

        super(LocalTarget, self).__init__(path=path,
                                          is_tmp=is_tmp,
                                          fs=fs,
                                          **kwargs)
Exemple #5
0
 def move_from_local(self, src=None, perm=None, dir_perm=None, **kwargs):
     src = add_scheme(self.fs.local_fs.abspath(get_path(src)), "file")
     return FileSystemFileTarget.move_from(self,
                                           src,
                                           perm=perm,
                                           dir_perm=dir_perm,
                                           **kwargs)
Exemple #6
0
    def load(cls, path, *args, **kwargs):
        import ROOT
        ROOT.PyConfig.IgnoreCommandLineOptions = True
        ROOT.gROOT.SetBatch()
        import root_numpy

        return root_numpy.root2array(get_path(path), *args, **kwargs)
Exemple #7
0
 def move_to_local(self, dst=None, dir_perm=None, **kwargs):
     if dst:
         dst = add_scheme(_local_fs.abspath(get_path(dst)), "file")
     return FileSystemFileTarget.move_to(self,
                                         dst,
                                         dir_perm=dir_perm,
                                         **kwargs)
Exemple #8
0
    def dump(cls, path, session, output_names, *args, **kwargs):
        """
        Takes a TensorFlow *session* object (compatible with the v1 API), converts its contained
        graph into a simpler version with variables translated into constant tensors, and saves it
        to a protobuf file at *path*. *output_numes* must be a list of names of output tensors to
        save. In turn, TensorFlow internally determines which subgraph(s) to convert and save. All
        *args* and *kwargs* are forwarded to :py:func:`tf.compat.v1.train.write_graph`.

        .. note::

            When used with TensorFlow v2, this function requires the v1 API compatibility layer.
            When :py:attr:`tf.compat.v1` is not available, a *NotImplementedError* is raised.
        """
        _, tf1 = cls.import_tf()

        # complain when the v1 compatibility layer is not existing
        if not tf1:
            raise NotImplementedError(
                "the v1 compatibility layer of TensorFlow v2 is missing, but "
                "required")

        # convert the graph
        constant_graph = tf1.graph_util.convert_variables_to_constants(
            session, session.graph.as_graph_def(), output_names)

        # default as_text value
        kwargs.setdefault("as_text", path.endswith((".pbtxt", ".pb.txt")))

        # write the graph
        graph_dir, graph_name = os.path.split(get_path(path))
        return tf1.train.write_graph(constant_graph, graph_dir, graph_name,
                                     *args, **kwargs)
Exemple #9
0
    def dump(cls, path, model, *args, **kwargs):
        # deprecation warning until v0.1
        logger.warning(
            "law.contrib.keras.TFKerasModelFormatter is deprecated, please use "
            "law.contrib.tensorflow.TFKerasModelFormatter (named 'tf_keras_model') instead"
        )

        model.save(get_path(path), *args, **kwargs)
Exemple #10
0
    def dump(cls, path, session, output_names, *args, **kwargs):
        import tensorflow as tf

        graph_dir, graph_name = os.path.split(get_path(path))

        const_graph = tf.graph_util.convert_variables_to_constants(
            session, session.graph.as_graph_def(), output_names)
        tf.train.write_graph(const_graph, graph_dir, graph_name, *args, **kwargs)
Exemple #11
0
    def dump(cls, path, src, *args, **kwargs):
        # assume write mode, but also check args and kwargs
        mode = "w"
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")

        # open a new zip file and add all files in src
        with zipfile.ZipFile(get_path(path), mode, *args, **kwargs) as f:
            src = get_path(src)
            if os.path.isfile(src):
                f.write(src, os.path.basename(src))
            else:
                for elem in os.listdir(src):
                    f.write(os.path.join(src, elem), elem)
Exemple #12
0
 def infer_compression(cls, path):
     path = get_path(path)
     if path.endswith(".tar.gz") or path.endswith(".tgz"):
         return "gz"
     elif path.endswith(".tbz2") or path.endswith(".bz2"):
         return "bz2"
     else:
         return None
Exemple #13
0
    def bundle(self, dst_path):
        cmd = [rel_path(__file__, "bundle_repository.sh"), self.repo_path, get_path(dst_path)]
        cmd += [" ".join(self.exclude_files)]
        cmd += [" ".join(self.include_files)]

        code = interruptable_popen(cmd)[0]
        if code != 0:
            raise Exception("repository bundling failed")
Exemple #14
0
    def load(cls, path, create_session=None, as_text=None):
        """
        Reads a saved TensorFlow graph from *path* and returns it. When *create_session* is *True*,
        a session object (compatible with the v1 API) is created and returned as the second value of
        a 2-tuple. The default value of *create_session* is *True* when TensorFlow v1 is detected,
        and *False* otherwise. When *as_text* is either *True*, or *None* and the file extension is
        ``".pbtxt"`` or ``".pb.txt"``, the content of the file at *path* is expected to be a
        human-readable text file. Otherwise, it is read as a binary protobuf file. Example:

        .. code-block:: python

            graph = TFConstantGraphFormatter.load("path/to/model.pb", create_session=False)

            graph, session = TFConstantGraphFormatter.load("path/to/model.pb", create_session=True)
        """
        tf, tf1, tf_version = cls.import_tf()
        path = get_path(path)

        # default create_session value
        if create_session is None:
            create_session = tf_version[0] == "1"
        if create_session and not tf1:
            raise NotImplementedError(
                "the v1 compatibility layer of TensorFlow v2 is missing, "
                "but required by when create_session is True")

        # default as_text value
        if as_text is None:
            as_text = path.endswith((".pbtxt", ".pb.txt"))

        graph = tf.Graph()
        with graph.as_default():
            graph_def = graph.as_graph_def()

            if as_text:
                # use a simple pb reader to load the file into graph_def
                from google.protobuf import text_format
                with open(path, "rb") as f:
                    text_format.Merge(f.read(), graph_def)

            else:
                # use the gfile api depending on the TF version
                if tf_version[0] == "1":
                    from tensorflow.python.platform import gfile
                    with gfile.FastGFile(path, "rb") as f:
                        graph_def.ParseFromString(f.read())
                else:
                    with tf.io.gfile.GFile(path, "rb") as f:
                        graph_def.ParseFromString(f.read())

            # import the graph_def (pb object) into the actual graph
            tf.import_graph_def(graph_def, name="")

        if create_session:
            session = tf1.Session(graph=graph)
            return graph, session
        else:
            return graph
Exemple #15
0
    def load(cls, path, *args, **kwargs):
        path = get_path(path)

        if path.endswith(".parquet"):
            import awkward as ak
            return ak.from_parquet(path, *args, **kwargs)

        # .pickle, .pkl
        return PickleFormatter.load(path, *args, **kwargs)
Exemple #16
0
    def bundle(self, dst_path):
        bundle_script = rel_path(__file__, "scripts", "bundle_repository.sh")
        cmd = [bundle_script, self.get_repo_path(), get_path(dst_path)]
        cmd += [" ".join(self.exclude_files)]
        cmd += [" ".join(self.include_files)]

        code = interruptable_popen(cmd, executable="/bin/bash")[0]
        if code != 0:
            raise Exception("repository bundling failed")
Exemple #17
0
    def dump(cls, path, obj, *args, **kwargs):
        path = get_path(path)

        if path.endswith(".parquet"):
            import awkward as ak
            return ak.to_parquet(obj, path, *args, **kwargs)

        # .pickle, .pkl
        return PickleFormatter.dump(path, obj, *args, **kwargs)
Exemple #18
0
    def load(cls, path, *args, **kwargs):
        # deprecation warning until v0.1
        logger.warning(
            "law.contrib.keras.TFKerasModelFormatter is deprecated, please use "
            "law.contrib.tensorflow.TFKerasModelFormatter (named 'tf_keras_model') instead"
        )

        import tensorflow as tf
        return tf.keras.models.load_model(get_path(path), *args, **kwargs)
Exemple #19
0
 def copy_to_local(self, dst=None, perm=None, dir_perm=None, **kwargs):
     if dst:
         dst = add_scheme(self.fs.local_fs.abspath(get_path(dst)), "file")
     dst = FileSystemFileTarget.copy_to(self,
                                        dst,
                                        perm=perm,
                                        dir_perm=dir_perm,
                                        **kwargs)
     return remove_scheme(dst)
Exemple #20
0
    def load(cls, path, *args, **kwargs):
        import ROOT

        tfile = ROOT.TFile.Open(get_path(path), *args, **kwargs)
        try:
            yield tfile
        finally:
            if tfile.IsOpen():
                tfile.Close()
Exemple #21
0
    def bundle(self, dst_path):
        bundle_script = rel_path(__file__, "scripts", "bundle_cmssw.sh")
        cmd = [bundle_script, self.cmssw_path, get_path(dst_path)]
        if self.exclude != NO_STR:
            cmd += [self.exclude]

        code = interruptable_popen(cmd)[0]
        if code != 0:
            raise Exception("cmssw bundling failed")
Exemple #22
0
 def infer_compression(cls, path):
     path = get_path(path)
     if path.endswith((".tar.gz", ".tgz")):
         return "gz"
     elif path.endswith((".tar.bz2", ".tbz2", ".bz2")):
         return "bz2"
     elif path.endswith((".tar.xz", ".txz", ".lzma")):
         return "xz"
     else:
         return None
Exemple #23
0
    def load(cls, path, graph_def=None):
        import tensorflow as tf
        from tensorflow.python.platform import gfile

        if not graph_def:
            graph_def = tf.GraphDef()

        with gfile.FastGFile(get_path(path), "rb") as f:
            graph_def.ParseFromString(f.read())

        return graph_def
Exemple #24
0
    def dump(cls, path, src, *args, **kwargs):
        # assume write mode with inferred compression, but also check args and kwargs
        compression = cls.infer_compression(path)
        mode = "w" if not compression else "w:" + compression
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")

        # get the filter callback that is forwarded to add()
        _filter = kwargs.pop("filter", None)

        # open a new zip file and add all files in src
        with tarfile.open(get_path(path), mode, *args, **kwargs) as f:
            src = get_path(src)
            if os.path.isfile(src):
                f.add(src, os.path.basename(src), filter=_filter)
            else:
                for elem in os.listdir(src):
                    f.add(os.path.join(src, elem), elem, filter=_filter)
Exemple #25
0
    def dump(cls, path, model, *args, **kwargs):
        path = get_path(path)

        # the method for saving the model depends on the file extension
        if path.endswith(".json"):
            with open(path, "w") as f:
                f.write(model.to_json())
        elif path.endswith((".yml", ".yaml")):
            with open(path, "w") as f:
                f.write(model.to_yaml())
        else:  # .hdf5, .h5, bundle
            return model.save(path, *args, **kwargs)
Exemple #26
0
    def bundle(self, dst_path):
        cmd = "{} \"{}\" \"{}\" \"{}\" \"{}\"".format(
            rel_path(__file__, "scripts", "bundle_repository.sh"),
            self.get_repo_path(),
            get_path(dst_path),
            " ".join(self.exclude_files),
            " ".join(self.include_files),
        )

        code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
        if code != 0:
            raise Exception("repository bundling failed")
Exemple #27
0
    def bundle(self, dst_path):
        cmd = [
            rel_path(__file__, "scripts", "bundle_cmssw.sh"),
            self.get_cmssw_path(),
            get_path(dst_path),
        ]
        if self.exclude != NO_STR:
            cmd += [self.exclude]
        cmd = quote_cmd(cmd)

        code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
        if code != 0:
            raise Exception("cmssw bundling failed")
Exemple #28
0
    def dump(cls, path, src, *args, **kwargs):
        # get the mode from args and kwargs, default to write mode with inferred compression
        if args:
            mode = args[0]
            args = args[1:]
        elif "mode" in kwargs:
            mode = kwargs.pop("mode")
        else:
            compression = cls.infer_compression(path)
            mode = "w" if not compression else "w:" + compression

        # get the filter callback that is forwarded to add()
        _filter = kwargs.pop("filter", None)

        # open a new zip file and add all files in src
        with tarfile.open(get_path(path), mode, *args, **kwargs) as f:
            srcs = [os.path.abspath(get_path(src)) for src in make_list(src)]
            common_prefix = os.path.commonprefix(srcs)
            for src in srcs:
                f.add(src,
                      arcname=os.path.relpath(src, common_prefix),
                      filter=_filter)
Exemple #29
0
    def load(cls, path, *args, **kwargs):
        path = get_path(path)

        if path.endswith(".root"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_root(path, *args, **kwargs)

        if path.endswith(".parquet"):
            from coffea.nanoevents import NanoEventsFactory
            return NanoEventsFactory.from_parquet(path, *args, **kwargs)

        # .coffea
        from coffea.util import load
        return load(path, *args, **kwargs)
Exemple #30
0
    def load(cls, path, *args, **kwargs):
        import tensorflow as tf

        path = get_path(path)

        # the method for loading the model depends on the file extension
        if path.endswith(".json"):
            with open(path, "r") as f:
                return tf.keras.models.model_from_json(f.read(), *args, **kwargs)
        elif path.endswith((".yml", ".yaml")):
            with open(path, "r") as f:
                return tf.keras.models.model_from_yaml(f.read(), *args, **kwargs)
        else:  # .hdf5, .h5, bundle
            return tf.keras.models.load_model(path, *args, **kwargs)