예제 #1
0
def register():
    platform_client = PlatformClient(API_SERVER, EMAIL, PASSWORD)
    platform_client.create_dataset(
        DATASET_NAME, description="Raw singlephase ELVO scans in NPY form.")

    gcs_client = storage.Client()
    bucket = gcs_client.get_bucket('elvo-platform')
    blob: storage.Blob
    for blob in bucket.list_blobs(prefix='elvo/raw/numpy/'):
        if not blob.name.endswith('.npy'):
            continue

        filename = blob.name.split('/')[-1]
        sample_name = filename[:-len('.npy')]
        gcs_url = f'gs://{bucket.name}/{blob.name}'
        print(f"Registering sample={sample_name} with url={gcs_url}",
              flush=True)

        start = time.time()
        success = platform_client.register_sample(sample_name,
                                                  DATASET_NAME,
                                                  gcs_url,
                                                  image_type='3D')
        end = time.time()
        if success:
            print(f"Registered {sample_name} in {end - start} seconds")
        else:
            print(f"Found {sample_name} exists in {end - start} seconds")
예제 #2
0
def load_individual():
    client = PlatformClient(API_SERVER, EMAIL, PASSWORD)
    client.create_dataset(
        DATASET_INDIVIDUAL,
        description="Individual CIFAR-10 numpy arrays",
    )

    training_files = [
        'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4',
        'data_batch_5'
    ]
    test_files = ['test_batch']

    threads = []
    for filename in training_files:
        t = threading.Thread(target=_load_individual,
                             args=(client, filename, 'training'))
        t.start()
        threads.append(t)

    for filename in test_files:
        t = threading.Thread(target=_load_individual,
                             args=(client, filename, 'test'))
        t.start()
        threads.append(t)

    for t in threads:
        t.join()
예제 #3
0
def load():
    client = PlatformClient(API_SERVER, EMAIL, PASSWORD)

    client.create_dataset(DATASET_NAME,
                          description="Raw multiphase ELVO scans in NPZ form.")

    dir: str
    files: List[str]
    for dir, _, files in os.walk(
            '/research/rih-cs/datasets/elvo-multiphase/v1.0'):
        for file in files:
            if file.endswith('.npz'):
                sample_name = file.split('.')[0]
                label = 'positive' if file.startswith('P') else 'negative'
                data_url = f'{DATA_PREFIX}/{file}'
                print(f"Registering sample={sample_name} with"
                      f" label={label} and url={data_url}", flush=True)
                start = time.time()
                success = client.register_sample(
                    sample_name,
                    DATASET_NAME,
                    data_url=data_url,
                    image_type='3D',
                    label=label,
                )
                end = time.time()
                if success:
                    print(f"Registered {file} in {end - start} seconds")
                else:
                    print(f"Found {file} exists in {end - start} seconds")
예제 #4
0
def load_batches():
    client = PlatformClient(API_SERVER, EMAIL, PASSWORD)
    client.create_dataset(
        DATASET_BATCHES,
        description="CIFAR-10 batches from"
        " http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")

    training_files = [
        'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4',
        'data_batch_5'
    ]
    test_files = ['test_batch']

    for filename in training_files:
        filepath = os.path.join('cifar-10-batches-py/', filename)
        print('Loading training file', filepath)
        start = time.time()
        split_class = 'training'
        with open(filepath, 'rb') as f:
            client.create_sample(
                filename,
                DATASET_BATCHES,
                data_url=f'gs://elvo-platform/test/platform/data'
                f'/{DATASET_BATCHES}/{filename}.pkl',
                data_content=f,
                split=split_class)
        end = time.time()
        print(f'Took {end - start} seconds', flush=True)

    for filename in test_files:
        filepath = os.path.join('cifar-10-batches-py/', filename)
        print('Loading test file', filepath)
        start = time.time()
        split_class = 'test'
        with open(filepath, 'rb') as f:
            client.create_sample(
                filename,
                DATASET_BATCHES,
                data_url=f'gs://elvo-platform/test/platform/data'
                f'/{DATASET_BATCHES}/{filename}.pkl',
                data_content=f,
                split=split_class)
        end = time.time()
        print(f'Took {end - start} seconds', flush=True)
예제 #5
0
def register_mnist_az():
    client = PlatformClient(API_SERVER, EMAIL, PASSWORD)
    dataset_name = f'mnist-az'

    client.create_dataset(dataset_name,
                          description="MNIST on Azure in PNG form.")

    dir: str
    files: List[str]
    for dir, _, files in os.walk('mnist_png'):
        for file in files:
            if file.endswith('.png'):
                label = dir.split('/')[-1]
                split = dir.split('/')[1]
                start = time.time()
                sample_name = f"{file.split('.')[0]}-{split}"
                new_dir = dir.replace('mnist_png', 'data')
                data_url = f'az://ml-platform/{new_dir}/{file}'
                print(
                    f"Registering {sample_name} with label {label},"
                    f" split {split}, and data_url {data_url}",
                    flush=True)
                ret = client.register_sample(
                    sample_name,
                    dataset_name,
                    data_url=data_url,
                    validate=False,
                    label=label,
                    split=split,
                )
                end = time.time()
                if ret:
                    print(f"REGISTERED: processed {file}"
                          f" in {end - start} seconds")
                else:
                    print(f"ALREADY EXISTS: processed {file}"
                          f" in {end - start} seconds")
예제 #6
0
import pathlib
from blueno import PlatformClient

API_SERVER = ''
EMAIL = ''
PASSWORD = ''
DATASET = ''

if __name__ == '__main__':
    client = PlatformClient(API_SERVER, EMAIL, PASSWORD)
    print(f'Creating dataset: {DATASET}')
    client.create_dataset(DATASET,
                          description='First version of the multiphase'
                          ' segmentation data')

    root_dir = pathlib.Path(
        '/research/rih-cs/datasets/elvo-multiphase/segmentation_data')
    for dirpath in root_dir.iterdir():
        for filepath in dirpath.iterdir():
            if filepath.name.endswith('.jpg'):
                sample_name = filepath.name[:-len('.jpg')]
                label = sample_name[0]  # either 'P' or 'N'
                url = f'gs://elvo-platform/multiphase/processed' \
                    f'/{DATASET}/{filepath.name}'
                print(f'Uploading sample {sample_name} with label {label} to'
                      f' {url} from {str(filepath)}')
                with open(filepath, 'rb') as f:
                    client.create_sample(sample_name,
                                         DATASET,
                                         data_url=url,
                                         data_content=f,
예제 #7
0
def client():
    PlatformClient.retry_limit = 0
    return PlatformClient(API_SERVER, EMAIL, PASSWORD)