Example #1
0
    def upload_dir(
        self,
        dirname,
        blob,
        container_name=None,
        use_basename=True,
        workers=0,
        last_time=None,
        exclude: List[str] = None,
    ):
        """
        Uploads a local directory to Azure Blob service.

        Args:
            dirname: `str`. name of the directory to upload.
            blob: `str`. blob to upload to.
            container_name: `str`. the name of the container.
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. if provided,
                        it will only upload the file if changed after last_time.
            exclude: `list`. List of paths to exclude.
        """
        if not container_name:
            container_name, _, blob = self.parse_wasbs_url(blob)

        if use_basename:
            blob = append_basename(blob, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname, exclude=exclude) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_blob = os.path.join(blob, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    blob=file_blob,
                    container_name=container_name,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Example #2
0
    def test_get_files_in_path_context(self):
        dirname = tempfile.mkdtemp()
        fpath1 = dirname + "/test1.txt"
        with open(fpath1, "w") as f:
            f.write("data1")

        fpath2 = dirname + "/test2.txt"
        with open(fpath2, "w") as f:
            f.write("data2")

        dirname2 = tempfile.mkdtemp(prefix=dirname + "/")
        fpath3 = dirname2 + "/test3.txt"
        with open(fpath3, "w") as f:
            f.write("data3")

        dirname3 = tempfile.mkdtemp(prefix=dirname + "/")
        fpath4 = dirname3 + "/test4.txt"
        with open(fpath4, "w") as f:
            f.write("data1")

        fpath5 = dirname3 + "/test5.txt"
        with open(fpath5, "w") as f:
            f.write("data2")

        dirname4 = tempfile.mkdtemp(prefix=dirname3 + "/")
        fpath6 = dirname4 + "/test6.txt"
        with open(fpath6, "w") as f:
            f.write("data3")

        with get_files_in_path_context(dirname) as files:
            assert len(files) == 6
            assert set(files) == {
                fpath1, fpath2, fpath3, fpath4, fpath5, fpath6
            }

        with get_files_in_path_context(dirname,
                                       exclude=[dirname3.split("/")[-1]
                                                ]) as files:
            assert len(files) == 3
            assert set(files) == {fpath1, fpath2, fpath3}
Example #3
0
    def upload_dir(
        self,
        dirname,
        blob,
        bucket_name=None,
        use_basename=True,
        workers=0,
        last_time=None,
    ):
        """
        Uploads a local directory to to Google Cloud Storage.

        Args:
            dirname: `str`. name of the directory to upload.
            blob: `str`. blob to upload to.
            bucket_name: `str`. the name of the bucket.
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. If provided will only upload the file if changed after last_time.
        """
        if not bucket_name:
            bucket_name, blob = self.parse_gcs_url(blob)

        if use_basename:
            blob = append_basename(blob, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(filepath=f,
                                                         last_time=last_time):
                    continue

                file_blob = os.path.join(blob, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    blob=file_blob,
                    bucket_name=bucket_name,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Example #4
0
    def upload_dir(
        self,
        dirname,
        path_to,
        use_basename=True,
        workers=0,
        last_time=None,
        exclude: List[str] = None,
    ):
        if use_basename:
            path_to = append_basename(path_to, dirname)

        if dirname == path_to:
            return

        check_or_create_path(path_to, is_dir=True)
        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname, exclude=exclude) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_blob = os.path.join(path_to, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    path_to=file_blob,
                    use_basename=False,
                )

        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)
Example #5
0
def sync_events_summaries(
    events_path: str,
    events_kind: str,
    last_check: Optional[datetime],
    connection_name: str = None,
) -> Tuple[List, Dict]:
    current_events_path = get_path(events_path, events_kind)

    summaries = []
    last_values = {}
    with get_files_in_path_context(current_events_path) as files:
        for f in files:
            if last_check and not file_modified_since(filepath=f,
                                                      last_time=last_check):
                continue

            event_name = os.path.basename(f).split(".plx")[0]
            event = V1Events.read(kind=events_kind, name=event_name, data=f)
            if event.df.empty:
                continue

            # Get only the relpath from run uuid
            event_rel_path = os.path.relpath(f, CONTEXT_MOUNT_ARTIFACTS)
            summary = event.get_summary()
            run_artifact = V1RunArtifact(
                name=event_name,
                kind=events_kind,
                connection=connection_name,
                summary=summary,
                path=event_rel_path,
                is_input=False,
            )
            summaries.append(run_artifact)
            if events_kind == V1ArtifactKind.METRIC:
                last_values[event_name] = summary[
                    V1ArtifactKind.METRIC]["last"]

    return summaries, last_values
Example #6
0
    def upload_dir(
        self,
        dirname,
        key,
        bucket_name=None,
        overwrite=False,
        encrypt=False,
        acl=None,
        use_basename=True,
        workers=0,
        last_time=None,
    ):
        """
        Uploads a local directory to S3.

        Args:
            dirname: `str`. name of the directory to upload.
            key: `str`. S3 key that will point to the file.
            bucket_name: `str`. Name of the bucket in which to store the file.
            overwrite: `bool`. A flag to decide whether or not to overwrite the key
                if it already exists. If replace is False and the key exists, an
                error will be raised.
            encrypt: `bool`. If True, the file will be encrypted on the server-side
                by S3 and will be stored in an encrypted form while at rest in S3.
            acl: `str`. ACL to use for uploading, e.g. "public-read".
            use_basename: `bool`. whether or not to use the basename of the directory.
            last_time: `datetime`. if provided, it will only upload the file if changed after last_time.
        """
        if not bucket_name:
            bucket_name, key = self.parse_s3_url(key)

        if use_basename:
            key = append_basename(key, dirname)

        pool, future_results = self.init_pool(workers)

        # Turn the path to absolute paths
        dirname = os.path.abspath(dirname)
        with get_files_in_path_context(dirname) as files:
            for f in files:

                # If last time is provided we check if we should re-upload the file
                if last_time and not file_modified_since(
                    filepath=f, last_time=last_time
                ):
                    continue

                file_key = os.path.join(key, os.path.relpath(f, dirname))
                future_results = self.submit_pool(
                    workers=workers,
                    pool=pool,
                    future_results=future_results,
                    fn=self.upload_file,
                    filename=f,
                    key=file_key,
                    bucket_name=bucket_name,
                    overwrite=overwrite,
                    encrypt=encrypt,
                    acl=acl,
                    use_basename=False,
                )
        if workers:
            futures.wait(future_results)
            self.close_pool(pool=pool)