Python DataManager.DataManager 예제들, pynet.datasets.DataManager.DataManager Python 예제들

예제 #1

0

파일 보기

    def setUp(self):
        """ Setup test.
        """
        data = fetch_cifar(datasetdir="/tmp/cifar")
        self.manager = DataManager(input_path=data.input_path,
                                   labels=["label"],
                                   metadata_path=data.metadata_path,
                                   number_of_folds=10,
                                   batch_size=10,
                                   stratify_label="category",
                                   test_size=0.1,
                                   sample_size=0.01)

        class Net(nn.Module):
            def __init__(self):
                super(Net, self).__init__()
                self.conv1 = nn.Conv2d(3, 6, 5)
                self.pool = nn.MaxPool2d(2, 2)
                self.conv2 = nn.Conv2d(6, 16, 5)
                self.fc1 = nn.Linear(16 * 5 * 5, 120)
                self.fc2 = nn.Linear(120, 84)
                self.fc3 = nn.Linear(84, 10)

            def forward(self, x):
                x = self.pool(func.relu(self.conv1(x)))
                x = self.pool(func.relu(self.conv2(x)))
                x = x.view(-1, 16 * 5 * 5)
                x = func.relu(self.fc1(x))
                x = func.relu(self.fc2(x))
                x = self.fc3(x)
                return x

        self.cl = DeepLearningInterface(model=Net(),
                                        optimizer_name="SGD",
                                        momentum=0.9,
                                        learning_rate=0.001,
                                        loss_name="CrossEntropyLoss",
                                        metrics=["accuracy"])

예제 #2

0

파일 보기

from pynet.history import History
from pynet.losses import MSELoss, NCCLoss, RCNetLoss, PCCLoss
from pynet.plotting import Board, update_board
import matplotlib.pyplot as plt

setup_logging(level="debug")
logger = logging.getLogger("pynet")
losses = pynet.get_tools(tool_name="losses")

outdir = "/neurospin/nsap/tmp/registration"
data = fetch_registration(datasetdir=outdir)
manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      number_of_folds=2,
                      batch_size=8,
                      sampler="random",
                      stratify_label="studies",
                      projection_labels={"studies": ["abide"]},
                      test_size=0.1,
                      add_input=True,
                      sample_size=0.1)

#############################################################################
# Training
# --------
#
# From the available models load the VoxelMorphRegister, VTNetRegister or
# ADDNet  and start the training.
# Note that the two first estimate a non linear deformation and require
# the input data to be afinely registered. The ADDNet estimate an affine
# transform. We will see in the next section how to combine them in an
# efficient way.

예제 #3

0

파일 보기

파일: multi_modal_orientation.py 프로젝트: soo1234/pynet

# neural network is evaluated on the validation set, but not trained on it.
# If the validation loss starts to grow, it means that the network is
# overfitting the training set, and that it is time to stop the training.
#
# The following cell create stratified test, train, and validation loaders.

from pynet.datasets import fetch_orientation
from pynet.datasets import DataManager

data = fetch_orientation(
    datasetdir="/tmp/orientation",
    flatten=True)
manager = DataManager(
    input_path=data.input_path,
    labels=["label"],
    metadata_path=data.metadata_path,
    number_of_folds=10,
    batch_size=1000,
    stratify_label="label",
    test_size=0.1)


#############################################################################
# Displaying some images of the test dataset.

from pynet.plotting import plot_data

dataset = manager["test"]
sample = dataset.inputs.reshape(-1, data.height, data.width)
sample = np.expand_dims(sample, axis=1)
plot_data(sample, nb_samples=5)

예제 #4

0

파일 보기

파일: vae_vanilla.py 프로젝트: rlouiset/pynet

from pynet.plotting import Board, update_board


#############################################################################
# The model will be trained on MNIST - handwritten digits dataset. The input
# is an image in R(28×28).

def flatten(arr):
    return arr.flatten()

data = fetch_minst(datasetdir="/neurospin/nsap/datasets/minst")
manager = DataManager(
    input_path=data.input_path,
    metadata_path=data.metadata_path,
    stratify_label="label",
    number_of_folds=10,
    batch_size=64,
    test_size=0,
    input_transforms=[flatten],
    add_input=True,
    sample_size=0.05)


#############################################################################
# The Model
# ---------
#
# The model is composed of two sub-networks:
#
# 1. Given x (image), encode it into a distribution over the latent space -
#    referred to as Q(z|x).
# 2. Given z in latent space (code representation of an image), decode it into

예제 #5

0

파일 보기

the activation map of the last convolutional layer in our model.

Load the data
-------------

Load some images and apply the ImageNet transformation.
You may need to change the 'datasetdir' parameter.
"""

from pynet.datasets import DataManager, fetch_gradcam
from pynet.plotting import plot_data

data = fetch_gradcam(datasetdir="/tmp/gradcam")
manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      number_of_folds=2,
                      batch_size=5,
                      test_size=1)
dataset = manager["test"]
print(dataset.inputs.shape)
plot_data(dataset.inputs, nb_samples=5, random=False, rgb=True)

#############################################################################
# Explore different architectures
# -------------------------------
#
# Let's automate this procedure for different networks.
# We need to reload the data for the inception network.
# You may need to change the 'datasetdir' parameter.

import os

예제 #6

0

파일 보기

파일: vae_gan_2.py 프로젝트: rlouiset/pynet

        if not isinstance(imgtype, list):
            imgtype = [imgtype]
        imgtype = [typemap[key] for key in imgtype]
    transformed_data = []
    for channel_id in range(len(data)):
        if channel_id not in imgtype:
            continue
        arr = data[channel_id]
        transformed_data.append(downsample(arr, scale=3))
    return np.asarray(transformed_data)


manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      stratify_label="grade",
                      number_of_folds=10,
                      batch_size=batch_size,
                      test_size=0,
                      input_transforms=[transformer],
                      sample_size=0.2)

########################
# Loss
# ----


def calc_gradient_penalty(model, x, x_gen, w=10):
    """ WGAN-GP gradient penalty.
    """
    assert (x.size() == x_gen.size()), "Real and sampled sizes do not match."
    alpha_size = tuple((len(x), *(1, ) * (x.dim() - 1)))
    alpha_t = torch.cuda.FloatTensor if x.is_cuda else torch.Tensor

예제 #7

0

파일 보기

#
# Use the fetcher of the pynet package.

from pynet.datasets import DataManager, fetch_brats
from pynet.plotting import plot_data
from pynet.transforms import RandomFlipDimensions, Offset

data = fetch_brats(datasetdir="/neurospin/nsap/datasets/brats")
manager = DataManager(
    input_path=data.input_path,
    metadata_path=data.metadata_path,
    output_path=data.output_path,
    projection_labels=None,
    number_of_folds=10,
    batch_size=1,
    stratify_label="grade",
    #input_transforms=[
    #    RandomFlipDimensions(ndims=3, proba=0.5, with_channels=True),
    #    Offset(nb_channels=4, factor=0.1)],
    sampler="random",
    add_input=True,
    test_size=0.1,
    pin_memory=True)
dataset = manager["test"][:1]
print(dataset.inputs.shape, dataset.outputs.shape)
plot_data(dataset.inputs, channel=1, nb_samples=5)
plot_data(dataset.outputs, channel=1, nb_samples=5)

#############################################################################
# Training
# --------

예제 #8

0

파일 보기

파일: sementic_segmentation.py 프로젝트: rlouiset/pynet

# You may need to change the 'datasetdir' parameter.

import os
import numpy as np
from pynet.datasets import DataManager, fetch_echocardiography
from pynet.plotting import plot_data
from pynet.utils import setup_logging

setup_logging(level="info")

data = fetch_echocardiography(datasetdir="/tmp/echocardiography")
manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      output_path=data.output_path,
                      number_of_folds=2,
                      stratify_label="label",
                      sampler="random",
                      batch_size=10,
                      test_size=0.1,
                      sample_size=0.2)
dataset = manager["test"]
print(dataset.inputs.shape, dataset.outputs.shape)
data = np.concatenate((dataset.inputs, dataset.outputs), axis=1)
plot_data(data, nb_samples=5)

#############################################################################
# Optimisation
# ------------
#
# From the available models load the UNet, and start the training.
# You may need to change the 'outdir' parameter.

예제 #9

0

파일 보기

from pynet.plotting import plot_history
from pynet.history import History
from pynet.losses import MSELoss, NCCLoss, RCNetLoss
import matplotlib.pyplot as plt

setup_logging(level="debug")
logger = logging.getLogger("pynet")

outdir = "/neurospin/nsap/tmp/registration"
data = fetch_registration(
    datasetdir=outdir)
manager = DataManager(
    input_path=data.input_path,
    metadata_path=data.metadata_path,
    number_of_folds=10,
    batch_size=1,
    sampler="random",
    #stratify_label="centers",
    test_size=0.1,
    add_input=True,
    sample_size=1)

#############################################################################
# Training
# --------
#
# From the available models load the VoxelMorphRegister, VTNetRegister or
# ADDNet  and start the training.
# Note that the two first estimate a non linear deformation and require
# the input data to be afinely registered. The ADDNet estimate an affine
# transform. We will see in the next section how to combine them in an
# efficient way.

예제 #10

0

파일 보기

파일: genomic_prediction.py 프로젝트: VincentFrouin/pynet

Load some data.
You may need to change the 'datasetdir' parameter.
"""

import os
from pynet.datasets import DataManager, fetch_genomic_pred
from pynet.utils import setup_logging

setup_logging(level="info")

data = fetch_genomic_pred(datasetdir="/tmp/genomic_pred")
manager = DataManager(input_path=data.input_path,
                      labels=["env0"],
                      metadata_path=data.metadata_path,
                      number_of_folds=2,
                      batch_size=5,
                      test_size=0.2,
                      continuous_labels=True)

#############################################################################
# Basic inspection

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

train_dataset = manager["train"][0]
X_train = train_dataset.inputs[train_dataset.indices]
y_train = train_dataset.labels[train_dataset.indices]
test_dataset = manager["test"]

예제 #11

0

파일 보기

import os
import numpy as np
from pynet.datasets import DataManager, fetch_echocardiography
from pynet.plotting import plot_data
from pynet.utils import setup_logging

setup_logging(level="info")

data = fetch_echocardiography(
    datasetdir="/tmp/echocardiography")
manager = DataManager(
    input_path=data.input_path,
    metadata_path=data.metadata_path,
    output_path=data.output_path,
    number_of_folds=2,
    stratify_label="label",
    sampler="weighted_random",
    batch_size=10,
    test_size=0.1,
    sample_size=(1 if "CI_MODE" not in os.environ else 0.05))
dataset = manager["test"]
print(dataset.inputs.shape, dataset.outputs.shape)
data = np.concatenate((dataset.inputs, dataset.outputs), axis=1)
plot_data(data, nb_samples=5)


#############################################################################
# Optimisation
# ------------
#
# From the available models load the UNet, and start the training.

예제 #12

0

파일 보기

파일: multi_modal_orientation.py 프로젝트: VincentFrouin/pynet

#
# A validation step is a useful way to avoid overfitting. At each epoch, the
# neural network is evaluated on the validation set, but not trained on it.
# If the validation loss starts to grow, it means that the network is
# overfitting the training set, and that it is time to stop the training.
#
# The following cell create stratified test, train, and validation loaders.

from pynet.datasets import fetch_orientation
from pynet.datasets import DataManager

data = fetch_orientation(datasetdir="/tmp/orientation", flatten=True)
manager = DataManager(input_path=data.input_path,
                      labels=["label"],
                      metadata_path=data.metadata_path,
                      number_of_folds=10,
                      batch_size=1000,
                      stratify_label="label",
                      test_size=0.1,
                      sample_size=(1 if "CI_MODE" not in os.environ else 0.1))

#############################################################################
# Displaying some images of the test dataset.

from pynet.plotting import plot_data

dataset = manager["test"]
sample = dataset.inputs.reshape(-1, data.height, data.width)
sample = np.expand_dims(sample, axis=1)
plot_data(sample, nb_samples=5)

#############################################################################

예제 #13

0

파일 보기

파일: predicting_autism.py 프로젝트: VincentFrouin/pynet

import matplotlib.pyplot as plt

setup_logging(level="info")
logger = logging.getLogger("pynet")

use_toy = False
dtype = "all"

data = fetch_impac(datasetdir="/neurospin/nsap/datasets/impac",
                   mode="train",
                   dtype=dtype)
nb_features = data.nb_features
manager = DataManager(input_path=data.input_path,
                      labels=["participants_asd"],
                      metadata_path=data.metadata_path,
                      number_of_folds=3,
                      batch_size=128,
                      sampler="random",
                      test_size=2,
                      sample_size=1)

if use_toy:
    toy_data = {}
    nb_features = 50
    for name, nb_samples in (("train", 1000), ("test", 2)):
        x1 = torch.randn(nb_samples, 50)
        x2 = torch.randn(nb_samples, 50) + 1.5
        x = torch.cat([x1, x2], dim=0)
        y1 = torch.zeros(nb_samples, 1)
        y2 = torch.ones(nb_samples, 1)
        y = torch.cat([y1, y2], dim=0)
        toy_data[name] = (x, y)

예제 #14

0

파일 보기

"""

#############################################################################
# Import the dataset
# ------------------
#
# You may need to change the 'datasetdir' parameter.

import numpy as np
from pynet.datasets import DataManager, fetch_echocardiography
from pynet.plotting import plot_data

data = fetch_echocardiography(datasetdir="/tmp/echocardiography")
manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      output_path=data.output_path,
                      number_of_folds=10,
                      batch_size=10,
                      test_size=0.1)
dataset = manager["test"]
data = np.concatenate((dataset.inputs, dataset.outputs), axis=1)
plot_data(data, nb_samples=5)

#############################################################################
# Optimisation
# ------------
#
# From the available models load the UNet, and start the training.
# You may need to change the 'outdir' parameter.

import os
import torch

예제 #15

0

파일 보기

board = Board(port=8097, host="http://localhost", env="data-augmentation")
compose_transforms = Transformer()
compose_transforms.register(flip,
                            probability=0.5,
                            axis=0,
                            apply_to=["input", "output"])
compose_transforms.register(add_blur,
                            probability=1,
                            sigma=4,
                            apply_to=["input"])
manager = DataManager(input_path=data.input_path,
                      metadata_path=data.metadata_path,
                      output_path=data.output_path,
                      number_of_folds=2,
                      batch_size=2,
                      test_size=0.1,
                      sample_size=0.1,
                      sampler=None,
                      add_input=True,
                      data_augmentation_transforms=[compose_transforms])
loaders = manager.get_dataloader(train=True, validation=False, fold_index=0)
for dataitem in loaders.train:
    print("-" * 50)
    print(dataitem.inputs.shape, dataitem.outputs.shape, dataitem.labels)
    images = [
        dataitem.inputs[0, 0].numpy(), dataitem.inputs[0, 1].numpy(),
        dataitem.outputs[0, 0].numpy(), dataitem.outputs[0, 1].numpy(),
        dataitem.outputs[0, 4].numpy(), dataitem.outputs[0, 5].numpy()
    ]
    images = np.asarray(images)
    images = np.expand_dims(images, axis=1)

예제 #16

0

파일 보기

import os
import sys
from pynet.datasets import DataManager, fetch_height_biobank
from pynet.utils import setup_logging

# This example cannot run in CI : it accesses NS intra filesystems
if "CI_MODE" in os.environ:
    sys.exit(0)

setup_logging(level="info")

data = fetch_height_biobank(datasetdir="/neurospin/tmp/height_bb")
manager = DataManager(input_path=data.input_path,
                      labels=["Height"],
                      metadata_path=data.metadata_path,
                      number_of_folds=2,
                      batch_size=5,
                      test_size=0.2,
                      continuous_labels=True)

#############################################################################
# Basic inspection

import numpy as np
import matplotlib.pyplot as plt

train_dataset = manager["train"][0]
X_train = train_dataset.inputs[train_dataset.indices]
y_train = train_dataset.labels[train_dataset.indices]
test_dataset = manager["test"]
X_test = test_dataset.inputs[test_dataset.indices]