예제 #1
0
    def test_local_download(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename, filepath = fake.unique.file_name(), fake.unique.file_path(
            depth=3).lstrip("/")

        with tmp_context() as tmp:
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            # Relative path
            container[first].track_files(filename)
            # Absolute path
            container[second].track_files(tmp)

            container.sync()

            with tmp_context():
                with with_check_if_file_appears(f"artifacts/{filename}"):
                    container[first].download("artifacts/")

                with with_check_if_file_appears(filepath):
                    container[second].download()
예제 #2
0
    def test_s3_download(self, container: AttributeContainer, bucket,
                         environment):
        first = self.gen_key()
        prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}"
        filename, filepath = fake.unique.file_name(), fake.unique.file_path(
            depth=3).lstrip("/")

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")
            s3_client.meta.client.upload_file(filepath, bucket_name,
                                              f"{prefix}/{filepath}")

        container[first].track_files(f"s3://{bucket_name}/{prefix}")

        container.sync()

        with tempfile.TemporaryDirectory() as tmp:
            with with_check_if_file_appears(f"{tmp}/{filename}"):
                container[first].download(tmp)

        with tmp_context():
            with with_check_if_file_appears(filename):
                container[first].download()
예제 #3
0
    def test_sync_run(self, environment):
        custom_run_id = "-".join((fake.word() for _ in range(3)))

        with tmp_context() as tmp:
            # with test values
            key = self.gen_key()
            original_value = fake.word()
            updated_value = fake.word()

            # init run
            run = neptune.init(
                custom_run_id=custom_run_id,
                project=environment.project,
                **DISABLE_SYSLOG_KWARGS,
            )

            def get_next_run():
                return neptune.init(
                    custom_run_id=custom_run_id,
                    project=environment.project,
                    **DISABLE_SYSLOG_KWARGS,
                )

            self._test_sync(
                exp=run,
                get_next_exp=get_next_run,
                path=tmp,
                key=key,
                original_value=original_value,
                updated_value=updated_value,
            )
예제 #4
0
    def test_offline_sync(self, environment):
        with tmp_context() as tmp:
            # create run in offline mode
            run = neptune.init(
                mode="offline",
                project=environment.project,
                **DISABLE_SYSLOG_KWARGS,
            )
            # assign some values
            key = self.gen_key()
            val = fake.word()
            run[key] = val

            # and stop it
            run.stop()

            # run asynchronously
            result = runner.invoke(sync, ["--path", tmp, "-p", environment.project])
            assert result.exit_code == 0

            # offline mode doesn't support custom_run_id, we'll have to parse sync output to determine short_id
            sys_id_found = re.search(self.SYNCHRONIZED_SYSID_RE, result.stdout)
            assert len(sys_id_found.groups()) == 1
            sys_id = sys_id_found.group(1)

            run2 = neptune.init(run=sys_id, project=environment.project)
            assert run2[key].fetch() == val
예제 #5
0
    def test_s3_creation(self, container: AttributeContainer, bucket,
                         environment):
        first, second, prefix = (
            self.gen_key(),
            self.gen_key(),
            f"{environment.project}/{self.gen_key()}/{type(container).__name__}",
        )
        filename = fake.unique.file_name()

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")

        container[first].track_files(f"s3://{bucket_name}/{prefix}/{filename}")
        container[second].track_files(f"s3://{bucket_name}/{prefix}")

        container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
예제 #6
0
    def test_hash_cache(self, container: AttributeContainer):
        key = self.gen_key()
        filename = fake.file_name()

        with tmp_context():
            # create 2GB file
            with open(filename, "wb") as handler:
                handler.write(b"\0" * 2 * 2**30)

            # track it
            start = time.time()
            container[key].track_files(".", wait=True)
            initial_duration = time.time() - start

            # and track it again
            start = time.time()
            container[key].track_files(".", wait=True)
            retry_duration = time.time() - start

            assert (retry_duration * 2 < initial_duration
                    ), "Tracking again should be significantly faster"

            # append additional byte to file
            with open(filename, "ab") as handler:
                handler.write(b"\0")

            # and track updated file
            start = time.time()
            container[key].track_files(".", wait=True)
            updated_duration = time.time() - start

            assert (retry_duration * 2 < updated_duration
                    ), "Tracking updated file should take more time - no cache"
예제 #7
0
    def test_local_existing(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename, filepath = fake.file_name(), fake.file_path(
            depth=3).lstrip("/")

        with tmp_context() as tmp:
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            # Track all files - "a" and "b" to first artifact
            container[first].track_files(".")

            # Track only the "a" file to second artifact
            container[second].track_files(f"file://{tmp}/{filename}")
            container.sync()

            # Add "b" file to existing second artifact
            # so it should be now identical as first
            container[second].track_files(filepath,
                                          destination=str(
                                              Path(filepath).parent))
            container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
예제 #8
0
    def test_assignment(self, container: AttributeContainer):
        first, second = self.gen_key(), self.gen_key()
        filename = fake.unique.file_name()

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            container[first].track_files(filename)
            container.wait()
            container[second] = container[first].fetch()
            container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
예제 #9
0
    def test_s3_existing(self, container: AttributeContainer, bucket,
                         environment):
        first, second, prefix = (
            self.gen_key(),
            self.gen_key(),
            f"{environment.project}/{self.gen_key()}/{type(container).__name__}",
        )
        filename, filepath = fake.file_name(), fake.file_path(
            depth=3).lstrip("/")

        bucket_name, s3_client = bucket

        with tmp_context():
            with open(filename, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            os.makedirs(Path(filepath).parent, exist_ok=True)
            with open(filepath, "w", encoding="utf-8") as handler:
                handler.write(fake.paragraph(nb_sentences=5))

            s3_client.meta.client.upload_file(filename, bucket_name,
                                              f"{prefix}/{filename}")
            s3_client.meta.client.upload_file(filepath, bucket_name,
                                              f"{prefix}/{filepath}")

        # Track all files - "a" and "b" to first artifact
        container[first].track_files(f"s3://{bucket_name}/{prefix}/")

        # Track only the "a" file to second artifact
        container[second].track_files(
            f"s3://{bucket_name}/{prefix}/{filename}")
        container.sync()

        # Add "b" file to existing second artifact
        # so it should be now identical as first
        container[second].track_files(
            f"s3://{bucket_name}/{prefix}/{filepath}",
            destination=str(Path(filepath).parent),
        )
        container.sync()

        assert container[first].fetch_hash() == container[second].fetch_hash()
        assert (container[first].fetch_files_list() ==
                container[second].fetch_files_list())
예제 #10
0
    def test_log_images(self, container: AttributeContainer):
        key = self.gen_key()
        # images with size between 200KB - 12MB
        images = list(generate_image(size=2**n) for n in range(8, 12))

        container[key].log(images[0])
        container[key].log(images[1:])
        container.sync()

        with tmp_context():
            container[key].download_last("last")
            container[key].download("all")

            with Image.open("last/3.png") as img:
                assert img == image_to_png(image=images[-1])

            for i in range(4):
                with Image.open(f"all/{i}.png") as img:
                    assert img == image_to_png(image=images[i])
예제 #11
0
    def test_single_file(self, container: AttributeContainer, file_size: int):
        key = self.gen_key()
        filename = fake.file_name()
        downloaded_filename = fake.file_name()

        with tmp_context():
            # create 10MB file
            with open(filename, "wb") as file:
                file.write(b"\0" * file_size)
            container[key].upload(filename)

            container.sync()
            container[key].download(downloaded_filename)

            assert os.path.getsize(downloaded_filename) == file_size
            with open(downloaded_filename, "rb") as file:
                content = file.read()
                assert len(content) == file_size
                assert content == b"\0" * file_size
예제 #12
0
    def test_sync_project(self, environment):
        with tmp_context() as tmp:
            # with test values
            key = f"{self.gen_key()}-" + "-".join((fake.word() for _ in range(3)))
            original_value = fake.word()
            updated_value = fake.word()

            # init run
            project = neptune.init_project(name=environment.project)

            def get_next_project():
                return neptune.init_project(name=environment.project)

            self._test_sync(
                exp=project,
                get_next_exp=get_next_project,
                path=tmp,
                key=key,
                original_value=original_value,
                updated_value=updated_value,
            )
예제 #13
0
    def test_fileset(self, container: AttributeContainer):
        key = self.gen_key()
        large_filesize = 10 * 2 ** 20
        large_filename = fake.file_name()
        small_files = [
            (f"{uuid.uuid4()}.{fake.file_extension()}", fake.sentence().encode("utf-8"))
            for _ in range(100)
        ]

        with tmp_context():
            # create single large file (multipart) and a lot of very small files
            with open(large_filename, "wb") as file:
                file.write(b"\0" * large_filesize)
            for filename, contents in small_files:
                with open(filename, "wb") as file:
                    file.write(contents)

            small_filenames = [filename for filename, _ in small_files]
            # make sure there are no duplicates
            assert len({large_filename, *small_filenames}) == len(small_files) + 1

            # when one file as fileset uploaded
            container[key].upload_files([large_filename])

            # then check if will be downloaded
            container.sync()
            container[key].download("downloaded1.zip")

            with ZipFile("downloaded1.zip") as zipped:
                assert set(zipped.namelist()) == {large_filename, "/"}
                with zipped.open(large_filename, "r") as file:
                    content = file.read()
                    assert len(content) == large_filesize
                    assert content == b"\0" * large_filesize

            # when small files as fileset uploaded
            container[key].upload_files(small_filenames)

            # then check if everything will be downloaded
            container.sync()
            container[key].download("downloaded2.zip")

            with ZipFile("downloaded2.zip") as zipped:
                assert set(zipped.namelist()) == {large_filename, "/", *small_filenames}
                with zipped.open(large_filename, "r") as file:
                    content = file.read()
                    assert len(content) == large_filesize
                    assert content == b"\0" * large_filesize
                for filename, expected_content in small_files:
                    with zipped.open(filename, "r") as file:
                        content = file.read()
                        assert len(content) == len(expected_content)
                        assert content == expected_content

            # when first file is removed
            container[key].delete_files([large_filename])

            # then check if the rest will be downloaded
            container.sync()
            container[key].download("downloaded3.zip")

            with ZipFile("downloaded3.zip") as zipped:
                assert set(zipped.namelist()) == {"/", *small_filenames}
                for filename, expected_content in small_files:
                    with zipped.open(filename, "r") as file:
                        content = file.read()
                        assert len(content) == len(expected_content)
                        assert content == expected_content