def register(): platform_client = PlatformClient(API_SERVER, EMAIL, PASSWORD) platform_client.create_dataset( DATASET_NAME, description="Raw singlephase ELVO scans in NPY form.") gcs_client = storage.Client() bucket = gcs_client.get_bucket('elvo-platform') blob: storage.Blob for blob in bucket.list_blobs(prefix='elvo/raw/numpy/'): if not blob.name.endswith('.npy'): continue filename = blob.name.split('/')[-1] sample_name = filename[:-len('.npy')] gcs_url = f'gs://{bucket.name}/{blob.name}' print(f"Registering sample={sample_name} with url={gcs_url}", flush=True) start = time.time() success = platform_client.register_sample(sample_name, DATASET_NAME, gcs_url, image_type='3D') end = time.time() if success: print(f"Registered {sample_name} in {end - start} seconds") else: print(f"Found {sample_name} exists in {end - start} seconds")
def load_individual(): client = PlatformClient(API_SERVER, EMAIL, PASSWORD) client.create_dataset( DATASET_INDIVIDUAL, description="Individual CIFAR-10 numpy arrays", ) training_files = [ 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5' ] test_files = ['test_batch'] threads = [] for filename in training_files: t = threading.Thread(target=_load_individual, args=(client, filename, 'training')) t.start() threads.append(t) for filename in test_files: t = threading.Thread(target=_load_individual, args=(client, filename, 'test')) t.start() threads.append(t) for t in threads: t.join()
def load(): client = PlatformClient(API_SERVER, EMAIL, PASSWORD) client.create_dataset(DATASET_NAME, description="Raw multiphase ELVO scans in NPZ form.") dir: str files: List[str] for dir, _, files in os.walk( '/research/rih-cs/datasets/elvo-multiphase/v1.0'): for file in files: if file.endswith('.npz'): sample_name = file.split('.')[0] label = 'positive' if file.startswith('P') else 'negative' data_url = f'{DATA_PREFIX}/{file}' print(f"Registering sample={sample_name} with" f" label={label} and url={data_url}", flush=True) start = time.time() success = client.register_sample( sample_name, DATASET_NAME, data_url=data_url, image_type='3D', label=label, ) end = time.time() if success: print(f"Registered {file} in {end - start} seconds") else: print(f"Found {file} exists in {end - start} seconds")
def load_batches(): client = PlatformClient(API_SERVER, EMAIL, PASSWORD) client.create_dataset( DATASET_BATCHES, description="CIFAR-10 batches from" " http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz") training_files = [ 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5' ] test_files = ['test_batch'] for filename in training_files: filepath = os.path.join('cifar-10-batches-py/', filename) print('Loading training file', filepath) start = time.time() split_class = 'training' with open(filepath, 'rb') as f: client.create_sample( filename, DATASET_BATCHES, data_url=f'gs://elvo-platform/test/platform/data' f'/{DATASET_BATCHES}/{filename}.pkl', data_content=f, split=split_class) end = time.time() print(f'Took {end - start} seconds', flush=True) for filename in test_files: filepath = os.path.join('cifar-10-batches-py/', filename) print('Loading test file', filepath) start = time.time() split_class = 'test' with open(filepath, 'rb') as f: client.create_sample( filename, DATASET_BATCHES, data_url=f'gs://elvo-platform/test/platform/data' f'/{DATASET_BATCHES}/{filename}.pkl', data_content=f, split=split_class) end = time.time() print(f'Took {end - start} seconds', flush=True)
def register_mnist_az(): client = PlatformClient(API_SERVER, EMAIL, PASSWORD) dataset_name = f'mnist-az' client.create_dataset(dataset_name, description="MNIST on Azure in PNG form.") dir: str files: List[str] for dir, _, files in os.walk('mnist_png'): for file in files: if file.endswith('.png'): label = dir.split('/')[-1] split = dir.split('/')[1] start = time.time() sample_name = f"{file.split('.')[0]}-{split}" new_dir = dir.replace('mnist_png', 'data') data_url = f'az://ml-platform/{new_dir}/{file}' print( f"Registering {sample_name} with label {label}," f" split {split}, and data_url {data_url}", flush=True) ret = client.register_sample( sample_name, dataset_name, data_url=data_url, validate=False, label=label, split=split, ) end = time.time() if ret: print(f"REGISTERED: processed {file}" f" in {end - start} seconds") else: print(f"ALREADY EXISTS: processed {file}" f" in {end - start} seconds")
import pathlib from blueno import PlatformClient API_SERVER = '' EMAIL = '' PASSWORD = '' DATASET = '' if __name__ == '__main__': client = PlatformClient(API_SERVER, EMAIL, PASSWORD) print(f'Creating dataset: {DATASET}') client.create_dataset(DATASET, description='First version of the multiphase' ' segmentation data') root_dir = pathlib.Path( '/research/rih-cs/datasets/elvo-multiphase/segmentation_data') for dirpath in root_dir.iterdir(): for filepath in dirpath.iterdir(): if filepath.name.endswith('.jpg'): sample_name = filepath.name[:-len('.jpg')] label = sample_name[0] # either 'P' or 'N' url = f'gs://elvo-platform/multiphase/processed' \ f'/{DATASET}/{filepath.name}' print(f'Uploading sample {sample_name} with label {label} to' f' {url} from {str(filepath)}') with open(filepath, 'rb') as f: client.create_sample(sample_name, DATASET, data_url=url, data_content=f,
def client(): PlatformClient.retry_limit = 0 return PlatformClient(API_SERVER, EMAIL, PASSWORD)