Ejemplo n.º 1
0
    def __init__(self,
                 download_root_dir: str,
                 config: DownloadOperationConfiguration,
                 client: IFeedSource,
                 fetch_all: bool = False):
        """
        :param config: configuration for doing the fetch
        :param client: the client to use to pull data
        :param force_full_flush: if true, ignore last sync timestamps and fetch all data from source
        """
        if not config:
            raise ValueError('Must have non-None config')
        if not download_root_dir:
            raise ValueError('Must have non-None download root directory path')

        self.config = config
        op_dir = os.path.join(download_root_dir, self.config.uuid)
        logger.debug(
            'Initializing downloader for operation {}. Will write to path: {}'.
            format(config.uuid, op_dir))
        repo_meta = LocalFeedDataRepoMetadata(download_configuration=config,
                                              data_write_dir=op_dir)
        self.local_repo = LocalFeedDataRepo(metadata=repo_meta)

        self.service_client = client
        self.fetch_all = fetch_all
Ejemplo n.º 2
0
    def reload_metadata(self):
        """
        Re-loads the metadata from the disk to the local instance, overwriting any local value in memory

        :return:
        """
        with open(self.metadata_file_path) as f:
            self.metadata = LocalFeedDataRepoMetadata.from_json(json.load(f))
Ejemplo n.º 3
0
 def from_disk(cls, path):
     """
     Create a new repo instance from an existing one on disk, loading metadata
     :param path:
     :return:
     """
     r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(
         data_write_dir=path))
     r.reload_metadata()
     return r
Ejemplo n.º 4
0
def test_LocalFeedDataRepo():
    tmpdir = tempfile.mkdtemp(prefix="anchoretest_repo-")
    r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(data_write_dir=tmpdir))
    try:
        assert os.listdir(tmpdir) == []

        r.initialize()
        assert os.listdir(tmpdir) == ["metadata.json"]
        ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)
        r.metadata.download_result = DownloadOperationResult(
            started=ts, status=FeedDownloader.State.in_progress.value, results=[]
        )
        r.flush_metadata()
        r.metadata = None
        r.reload_metadata()
        assert r.metadata.download_result.started == ts
        assert (
            r.metadata.download_result.status == FeedDownloader.State.in_progress.value
        )

        r.write_data(
            "feed1",
            "group1",
            chunk_id=0,
            data=b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}',
        )

        with timer("Read single record group", log_level="info"):
            found_count = 0
            for i in r.read("feed1", "group1", start_index=0):
                logger.info("Got record {}".format(i))
                found_count += 1

        logger.info("Repo metadata: {}".format(r.metadata))

        assert found_count > 0

    finally:
        logger.info("Done with repo test")
        r.teardown()