def upload_and_download():
    filepath = "testdata/alertfiles/ztf_public_20210120.tar.gz"
    bucket = "ztf-alert-archive-prototyping-tmp"

    n_alerts = 500
    n_uploaders = 6
    n_downloaders = 6
    max_s3_concurrency = 20

    bs = Blobstore(bucket, max_concurrency=max_s3_concurrency)
    with mp.Manager() as manager:
        alerts_from_disk = CloseableQueue(manager.Event(), mp.Queue(100))
        uploaded_alert_urls = CloseableQueue(manager.Event(), mp.Queue(100))

        # Read files from the tarball
        reader_proc = mp.Process(
            target=read_alerts,
            name="DiskRead",
            args=(filepath, alerts_from_disk, n_alerts),
        )

        # Upload them to S3
        uploader_procs = []
        for i in range(n_uploaders):
            upload_proc = mp.Process(
                target=upload_alerts,
                name=f"Upload-{i}",
                args=(bs, alerts_from_disk, uploaded_alert_urls),
            )
            upload_proc.start()
            uploader_procs.append(upload_proc)

        # Download them back down
        downloader_procs = []
        for i in range(n_downloaders):
            download_proc = mp.Process(
                target=redownload_alerts,
                name=f"Download-{i}",
                args=(bs, uploaded_alert_urls),
            )
            download_proc.start()
            downloader_procs.append(download_proc)

        reader_proc.start()

        # Wait for everything to be read
        reader_proc.join()
        logging.debug("done reading, waiting for uploads")
        logging.debug("waiting for upload processes to exit")
        for p in uploader_procs:
            p.join()
        logging.debug("waiting for download processes to exit")
        for p in downloader_procs:
            p.join()
        logging.debug("done with shutdown")
def upload_and_download():
    filepath = "testdata/alertfiles/ztf_public_20210120.tar.gz"
    bucket = "ztf-alert-archive-prototyping-tmp"

    n_alerts = 500
    n_uploaders = 12
    n_downloaders = 12
    max_s3_concurrency = 20

    bs = Blobstore(bucket, max_concurrency=max_s3_concurrency)

    alerts_from_disk = CloseableQueue(1000)
    uploaded_alert_urls = CloseableQueue(1000)

    # Read files from the tarball
    reader_thread = threading.Thread(
        target=read_alerts,
        name="DiskRead",
        args=(filepath, alerts_from_disk, n_alerts),
    )

    # Upload them to S3
    uploader_threads = []
    for i in range(n_uploaders):
        upload_thread = threading.Thread(
            target=upload_alerts,
            name=f"Upload-{i}",
            args=(bs, alerts_from_disk, uploaded_alert_urls),
        )
        upload_thread.start()
        uploader_threads.append(upload_thread)

    # Download them back down
    downloader_threads = []
    for i in range(n_downloaders):
        download_thread = threading.Thread(
            target=redownload_alerts,
            name=f"Download-{i}",
            args=(bs, uploaded_alert_urls),
        )
        download_thread.start()
        downloader_threads.append(download_thread)

    reader_thread.start()

    # Wait for everything to be read
    reader_thread.join()
    logging.debug("done reading, waiting for uploads")
    logging.debug("waiting for upload threads to exit")
    for thread in uploader_threads:
        thread.join()
    logging.debug("waiting for download threads to exit")
    for thread in downloader_threads:
        thread.join()
    logging.debug("done with shutdown")
def redownload_alerts(bs: Blobstore, urls: queue.Queue):
    while True:
        try:
            url = urls.get()
        except QueueClosed:
            logging.debug(f"download queue complete, exiting")
            return
        logging.debug(f"download {url} start")
        start = time.monotonic()
        alert = bs.download_alert(url)
        logging.debug(f"got alert back out: {alert.candidate_id} - took {time.monotonic() - start}")
Ejemplo n.º 4
0
async def upload_and_download():
    filepath = "testdata/alertfiles/ztf_public_20210120.tar.gz"
    bucket = "ztf-alert-archive-prototyping-tmp"
    bs = Blobstore(bucket)
    iterator = iterate_tarfile(filepath)

    tasks = []
    for alert in iterator:
        print(f"file read: {alert.candidate_id}")
        tasks.append(bs.upload_alert_async(alert))

    urls = await asyncio.gather(*tasks)
    tasks = []
    for url in urls:
        print(f"uploaded to: {url}")
        tasks.append(bs.download_alert_async(url))

    downloads = await asyncio.gather(*tasks)
    for download in downloads:
        print(f"downloaded: {download.candidate_id}")
def upload_alerts(bs: Blobstore, alerts: queue.Queue, urls: queue.Queue):
    while True:
        try:
            alert = alerts.get()
        except QueueClosed as e:
            logging.debug(f"upload queue complete, exiting: {e}")
            break
            # Is it empty because we're fast, or because there's nothing left?
        start = time.monotonic()
        url = bs.upload_alert(alert)
        logging.debug(f"upload {url} done - took {time.monotonic() - start}")
        urls.put(url)
    urls.close()
async def upload_and_download():
    filepath = "testdata/alertfiles/ztf_public_20210120.tar.gz"
    bucket = "ztf-alert-archive-prototyping-tmp"

    n_alerts = 500
    n_uploaders = 8
    n_downloaders = 8
    max_s3_concurrency = 16

    bs = Blobstore(bucket, max_concurrency=max_s3_concurrency)

    alerts_from_disk = asyncio.Queue(1000)
    uploaded_alert_urls = asyncio.Queue(1000)
    all_alerts_read = asyncio.Event()

    # Read files from the tarball
    reader_task = asyncio.create_task(
        read_alerts(filepath, alerts_from_disk, all_alerts_read, n_alerts))

    # Upload them to S3
    uploader_tasks = []
    for i in range(n_uploaders):
        upload_coroutine = upload_alerts(bs, alerts_from_disk,
                                         uploaded_alert_urls)
        uploader_tasks.append(asyncio.create_task(upload_coroutine))

    # Download them back down
    downloader_tasks = []
    for i in range(n_downloaders):
        download_coroutine = redownload_alerts(bs, uploaded_alert_urls)
        downloader_tasks.append(asyncio.create_task(download_coroutine))

    # Wait for everything to be read
    await all_alerts_read.wait()
    print("done reading, waiting for uploads")
    # Wait for everything to be uploaded
    await alerts_from_disk.join()
    print("done uploading, waiting for downloads")
    # Wait for everything to be redownloaded
    await uploaded_alert_urls.join()
    print("done downloading")
    # Shut down running tasks
    print("canceling uploads")
    for task in uploader_tasks:
        task.cancel()
    print("canceling downloads")
    for task in downloader_tasks:
        task.cancel()
    print("done with shutdown")
Ejemplo n.º 7
0
async def test_concurrency_limit_exceeded(alert_record):
    """
    Try to create 3 sessions with a concurrency limit of 2.
    """
    bs = Blobstore("region", "bucket", 2)
    session1 = await bs.session()
    await session1.__aenter__()

    session2 = await bs.session()
    await session2.__aenter__()

    session3 = await bs.session()
    with pytest.raises(asyncio.TimeoutError):
        await asyncio.wait_for(session3.__aenter__(), 0.1)

    await session2.__aexit__(None, None, None)
    await session3.__aenter__()
    await session3.__aexit__(None, None, None)
    await session1.__aexit__(None, None, None)
Ejemplo n.º 8
0
    def __init__(
        self,
        s3_region: str,
        bucket: str,
        db_path: Union[pathlib.Path, str],
        create_if_missing: bool = False,
    ):
        """Legacy constructor."""
        self.db_path = pathlib.Path(db_path)
        self.index = IndexDB(db_path, create_if_missing)
        self.blobstore = Blobstore(s3_region, bucket)

        meta_path = Database._meta_path(db_path)
        if meta_path.exists():
            with open(meta_path, "r") as f:
                self.meta = DBMeta.read_from_file(f)
        else:
            self.meta = DBMeta(bucket, s3_region)
            self.meta.compute_keyranges(self.index)
        with open(meta_path, "w") as f:
            self.meta.write_to_file(f)
Ejemplo n.º 9
0
async def test_session_urls(alert_record):
    bs = Blobstore("region", "bucket", 2)
    async with await bs.session() as session:
        url = session.url_for(alert_record)
        assert url == "s3://bucket/alerts/v2/1/cid"
Ejemplo n.º 10
0
def test_create_blobstore():
    Blobstore("region", "bucket", 2)
Ejemplo n.º 11
0
def blobstore(s3_server, s3_bucket):
    # Create a blobstore backed by bucket and server.
    bs = Blobstore("us-west-2", s3_bucket, 1)
    bs._endpoint = s3_server
    return bs