Esempio n. 1
0
    def test_write_read_records(self, get_tmpdir):
        tmpdir = get_tmpdir
        r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
            data_write_dir=tmpdir))
        r.initialize()
        ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
        r.metadata.download_result = DownloadOperationResult(
            started=ts,
            status=FeedDownloader.State.in_progress.value,
            results=[])

        r.write_data(
            "feed1",
            "group1",
            chunk_id=0,
            data=
            b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}',
        )
        with timer("Read single record group", log_level="info"):
            found_count = 0
            for i in r.read("feed1", "group1", start_index=0):
                logger.info("Got record {}".format(i))
                found_count += 1
        logger.info("Repo metadata: {}".format(r.metadata))
        assert found_count > 0
Esempio n. 2
0
    def __init__(
        self,
        download_root_dir: str,
        config: DownloadOperationConfiguration,
        client: IFeedSource,
        fetch_all: bool = False,
    ):
        """
        :param config: configuration for doing the fetch
        :param client: the client to use to pull data
        :param force_full_flush: if true, ignore last sync timestamps and fetch all data from source
        """
        if not config:
            raise ValueError("Must have non-None config")
        if not download_root_dir:
            raise ValueError("Must have non-None download root directory path")

        self.config = config
        op_dir = os.path.join(download_root_dir, self.config.uuid)
        logger.debug(
            "Initializing downloader for operation {}. Will write to path: {}".
            format(config.uuid, op_dir))
        repo_meta = LocalFeedDataRepoMetadata(download_configuration=config,
                                              data_write_dir=op_dir)
        self.local_repo = LocalFeedDataRepo(metadata=repo_meta)

        self.service_client = client
        self.fetch_all = fetch_all
Esempio n. 3
0
 def test_initialize(self, get_tmpdir):
     tmpdir = get_tmpdir
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=tmpdir))
     assert os.listdir(tmpdir) == []
     r.initialize()
     assert os.listdir(tmpdir) == ["metadata.json"]
Esempio n. 4
0
    def reload_metadata(self):
        """
        Re-loads the metadata from the disk to the local instance, overwriting any local value in memory

        :return:
        """
        with open(self.metadata_file_path) as f:
            self.metadata = LocalFeedDataRepoMetadata.from_json(json.load(f))
Esempio n. 5
0
 def test_teardown(self, get_tmpdir):
     tmpdir = get_tmpdir
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=tmpdir))
     r.initialize()
     assert os.path.isdir(tmpdir) is True
     r.teardown()
     assert os.path.isdir(tmpdir) is False
Esempio n. 6
0
 def test_write_read_files(self, get_tmpdir, get_file):
     """
     Test writing chunks of binary data to LocalFeedDataRepo and reading using LocalFeedDataRepo.read_files()
     """
     tmpdir = get_tmpdir
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=tmpdir))
     r.initialize()
     ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
     meta = GroupDownloadResult(
         feed="feed1",
         group="group1",
         total_records=0,
         status=FeedDownloader.State.in_progress.value,
         started=datetime.datetime.utcnow(),
         group_metadata={},
     )
     r.metadata.download_result = DownloadOperationResult(
         started=ts,
         status=FeedDownloader.State.in_progress.value,
         results=[meta],
     )
     expected_output = []
     for chunk_number in range(2):
         with open(get_file(f"{chunk_number}.data"), "rb") as f:
             binary_content = f.read()
             expected_output.append(binary_content)
         group_metadata = {
             str(chunk_number): {
                 "test_value": str(chunk_number)
             }
         }
         r.write_data(
             "feed1",
             "group1",
             chunk_id=chunk_number,
             data=binary_content,
         )
         meta.total_records += 1
         meta.group_metadata.update(group_metadata)
     meta.status = FeedDownloader.State.complete.value
     meta.ended = datetime.datetime.utcnow()
     r.flush_metadata()
     r.reload_metadata()
     assert r.metadata.download_result.results[0].total_records == 2
     assert all([
         x in r.metadata.download_result.results[0].group_metadata
         for x in ["0", "1"]
     ])
     found_count = 0
     for idx, file_data in enumerate(r.read_files("feed1", "group1")):
         found_count += 1
         assert isinstance(file_data, FileData)
         assert file_data.data == expected_output[idx]
         assert str(idx) in meta.group_metadata
         assert file_data.metadata["test_value"] == str(idx)
     assert found_count == 2
Esempio n. 7
0
 def from_disk(cls, path):
     """
     Create a new repo instance from an existing one on disk, loading metadata
     :param path:
     :return:
     """
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=path))
     r.reload_metadata()
     return r
Esempio n. 8
0
 def test_metadata_flush_reload(self, get_tmpdir):
     tmpdir = get_tmpdir
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=tmpdir))
     r.initialize()
     ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
     r.metadata.download_result = DownloadOperationResult(
         started=ts,
         status=FeedDownloader.State.in_progress.value,
         results=[])
     r.flush_metadata()
     r.metadata = None
     r.reload_metadata()
     assert r.metadata.download_result.started == ts
     assert (r.metadata.download_result.status ==
             FeedDownloader.State.in_progress.value)