def __init__(self, download_root_dir: str, config: DownloadOperationConfiguration, client: IFeedSource, fetch_all: bool = False): """ :param config: configuration for doing the fetch :param client: the client to use to pull data :param force_full_flush: if true, ignore last sync timestamps and fetch all data from source """ if not config: raise ValueError('Must have non-None config') if not download_root_dir: raise ValueError('Must have non-None download root directory path') self.config = config op_dir = os.path.join(download_root_dir, self.config.uuid) logger.debug( 'Initializing downloader for operation {}. Will write to path: {}'. format(config.uuid, op_dir)) repo_meta = LocalFeedDataRepoMetadata(download_configuration=config, data_write_dir=op_dir) self.local_repo = LocalFeedDataRepo(metadata=repo_meta) self.service_client = client self.fetch_all = fetch_all
def reload_metadata(self): """ Re-loads the metadata from the disk to the local instance, overwriting any local value in memory :return: """ with open(self.metadata_file_path) as f: self.metadata = LocalFeedDataRepoMetadata.from_json(json.load(f))
def from_disk(cls, path): """ Create a new repo instance from an existing one on disk, loading metadata :param path: :return: """ r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata( data_write_dir=path)) r.reload_metadata() return r
def test_LocalFeedDataRepo(): tmpdir = tempfile.mkdtemp(prefix="anchoretest_repo-") r = LocalFeedDataRepo(metadata=LocalFeedDataRepoMetadata(data_write_dir=tmpdir)) try: assert os.listdir(tmpdir) == [] r.initialize() assert os.listdir(tmpdir) == ["metadata.json"] ts = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) r.metadata.download_result = DownloadOperationResult( started=ts, status=FeedDownloader.State.in_progress.value, results=[] ) r.flush_metadata() r.metadata = None r.reload_metadata() assert r.metadata.download_result.started == ts assert ( r.metadata.download_result.status == FeedDownloader.State.in_progress.value ) r.write_data( "feed1", "group1", chunk_id=0, data=b'{"next_token": "something", "data": [{"somekey": "somevalue"}]}', ) with timer("Read single record group", log_level="info"): found_count = 0 for i in r.read("feed1", "group1", start_index=0): logger.info("Got record {}".format(i)) found_count += 1 logger.info("Repo metadata: {}".format(r.metadata)) assert found_count > 0 finally: logger.info("Done with repo test") r.teardown()