Ejemplo n.º 1
0
def main():
    t1 = time.time()
    hub.init(processes=True, n_workers=psutil.cpu_count(), memory_limit=55e9)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to coco2017 dataset",
        default="./data/COCOdataset2017",
    )
    parser.add_argument(
        "output_path",
        metavar="N",
        type=str,
        help="Dataset output path",
        default="COCOdataset2017",
    )
    parser.add_argument("year", metavar="Y", type=str, default="2017")
    args = parser.parse_args()
    tags = ["train", "val"]
    ds = {tag: load_dataset(args, tag) for tag in tags}
    for tag in ds:
        print(f"{tag}: {len(ds[tag])} samples")
    ds = dataset.concat([ds[tag] for tag in tags])
    # ds = ds["train"]
    ds.store(f"{args.output_path}")
    t2 = time.time()
    logger.info(f"Pipeline took {(t2 - t1) / 60} minutes")
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--config", help="configuration name, used to find subdirectory in $root/config/",
                        action="store", default='')
    args = parser.parse_args()
    
    hub.init(configName=args.config)
    startAllListeners(CPL.cfg.get('hub', 'listeners', doFlush=True))
    hub.run()
Ejemplo n.º 3
0
        "download": t3 - t2,
        "write_to_fs": t1 - t0,
    }


def upload_and_download(samples=30, chunksize=None, name="hub"):
    """
    Uploads dataset into S3 and then downlods using hub package
    """
    ds = generate_dataset([(samples, 256, 256), (samples, 256, 256)],
                          chunksize=1)
    t1 = time.time()
    ds = ds.store(f"{BUCKET}/transfer/upload")
    t2 = time.time()
    ds.store("/tmp/download")
    t3 = time.time()
    return {"name": name, "upload": t2 - t1, "download": t3 - t2}


if __name__ == "__main__":
    samples = 64
    chunksize = None
    import hub

    hub.init(processes=True, n_workers=8, threads_per_worker=1)

    r1 = upload_and_download(samples, chunksize=chunksize)
    r2 = aws_cli_copy(samples, chunksize=chunksize)

    report([r1, r2])