예제 #1
0
    def loadData(self):
        config = self.config
        if self.config.mode != 'vect-scalar' and self.config.data_class_name is None:  # image training
            strided = ~np.any(config.MFP) and config.mode == 'img-img'

            self.get_batch_kwargs = dict(
                batch_size=config.batch_size,
                strided=strided,
                flip=config.flip_data,
                grey_augment_channels=config.grey_augment_channels,
                ret_info=config.lazy_labels,
                ret_example_weights=config.use_example_weights,
                warp_on=config.warp_on,
                ignore_thresh=config.example_ignore_threshold)

            # the source is replaced in self.testModel to be valid
            self.get_batch_kwargs_test = dict(
                batch_size=config.monitor_batch_size,
                strided=strided,
                flip=config.flip_data,
                grey_augment_channels=config.grey_augment_channels,
                ret_info=config.lazy_labels,
                ret_example_weights=config.use_example_weights,
                warp_on=False,
                ignore_thresh=config.example_ignore_threshold)  # no warp

            self.data = CNNData.CNNData(
                config.patch_size, config.dimensions.pred_stride,
                config.dimensions.offset, config.n_dim, config.n_lab,
                config.anisotropic_data, config.mode, config.zchxy_order,
                config.border_mode, config.pre_process, config.upright_x,
                True if config.target == 'regression' else False,
                config.target if config.target in ['malis', 'affinity'] else
                False)  # return affinity graph instead of boundaries

            self.data.addDataFromFile(config.data_path, config.label_path,
                                      config.d_files, config.l_files,
                                      config.cube_prios, config.valid_cubes,
                                      config.downsample_xy)

            if self.config.preview_data_path is not None:
                data = trainutils.h5Load(self.config.preview_data_path)
                if not (isinstance(data, list)
                        or isinstance(data, (tuple, list))):
                    #data = np.transpose(data, (1,2,0)) # this was only a hack for I
                    data = [
                        data,
                    ]

                data = [d.astype(np.float32) / 255 for d in data]
                self.preview_data = data
            else:
                self.preview_data = None

        else:  # non-image training
            self.get_batch_kwargs = dict(batch_size=config.batch_size)
            self.get_batch_kwargs.update(self.config.data_batch_kwargs)
            # the source is replaced in self.testModel to be valid
            self.get_batch_kwargs_test = dict(
                batch_size=config.monitor_batch_size)
            if isinstance(self.config.data_class_name, tuple):
                Data = trainutils.import_variable_from_file(
                    *self.config.data_class_name)
            else:
                Data = getattr(traindata, self.config.data_class_name)

            self.data = Data(**self.config.data_load_kwargs)
            self.preview_data = None
예제 #2
0
    def _read_images(self, d_path, l_path, d_files, l_files, downsample_xy):
        """
        Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
        But image stacks are returned as (z,ch,x,y) and label as (z,x,y,) irrespective of the order in the file.
        If the image files have no channel this dimension is extended to a singleton dimension.
        """
        data, label, info = [], [], []
        if len(d_files) != len(l_files):
            raise ValueError(
                "d_files and l_files must be lists of same length!")
        for (d_f, d_key), (l_f, l_key) in zip(d_files, l_files):
            print 'Loading %s' % d_f,
            d = ut.h5Load(d_path + d_f, d_key)
            print 'Loading %s' % l_f
            l = ut.h5Load(l_path + l_f, l_key)
            try:
                info_1 = ut.h5Load(l_path + l_f, 'info')
                info.append(info_1)
            except KeyError:
                info.append(None)

            if not self.zchxy_order:
                if len(d.shape) == 4:
                    self.n_ch = d.shape[0]
                    print "Data has %i channels" % self.n_ch
                elif len(d.shape) == 3:  # We have no channels in data
                    self.n_ch = 1
                    d = d[None, :, :, :]  # add (empty) 0-axis

                if l.size == 0:
                    l = np.zeros_like(d[0], dtype=self.ldtype)
                elif self.mode == 'img-scalar':
                    assert len(l.shape) == 1, "Scalar labels must be 1d"

    # Transpose such that access is optimal
                d = np.transpose(d, (3, 0, 1, 2))  # (ch,x,y,z)-->(z,ch,x,y)
                if self.mode == 'img-img':
                    l = np.transpose(l, (2, 0, 1))  #    (x,y,z)-->(z,x,y)
                    d, l = _stripCubes(d, l, self.offset, self.ldtype)

            else:  # data in memory layout:
                if len(d.shape) == 4:
                    self.n_ch = d.shape[1]
                    print "Data has %i channels" % self.n_ch
                elif len(d.shape) == 3:  # We have no channels in data
                    self.n_ch = 1
                    d = d[:, None, :, :]  # add (empty) 0-axis

                if l.size == 0:
                    sh = (d.shape[0], ) + d.shape[2:]
                    l = np.zeros_like(sh, dtype=self.ldtype)
                elif self.mode == 'img-scalar':
                    assert len(l.shape) == 1, "Scalar labels must be 1d"

                if self.mode == 'img-img':
                    d, l = _stripCubes(d, l, self.offset, self.ldtype)

            # determine normalisation depending on int or float type
            if d.dtype in [
                    np.int, np.int8, np.int16, np.int32, np.uint32, np.uint,
                    np.uint8, np.uint16, np.uint32, np.uint32
            ]:
                m = 255
            else:
                m = 1

            d = np.ascontiguousarray(d, dtype=np.float32) / m
            if (self.ldtype is not l.dtype
                    and np.issubdtype(l.dtype, np.integer)):
                m = l.max()
                M = np.iinfo(self.ldtype).max
                if m > M:
                    raise ValueError(
                        "Loading of data: labels must be cast to %s, but %s cannot store value %g, maximum allowed value: %g. You may try to renumber labels."
                        % (self.ldtype, self.ldtype, m, M))

            l = np.ascontiguousarray(l, dtype=self.ldtype)

            if downsample_xy:
                f = int(downsample_xy)
                l_sh = l.shape
                cut = np.mod(l_sh, f)

                d = d[:, :, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]]
                sh = d[:, :, ::f, ::f].shape
                new_d = np.zeros(sh, dtype=np.float32)

                l = l[:, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]]
                sh = l[:, ::f, ::f].shape
                new_l = np.zeros(sh, dtype=self.ldtype)

                for i in xrange(f):
                    for j in xrange(f):
                        new_d += d[:, :, i::f, j::f]
                        new_l += l[:, i::f, j::f]

                d = new_d / f**2
                l = new_l / f**2

            gc.collect()

            print "Internal data.shape=%s, label.shape=%s" % (d.shape, l.shape)
            print '---'
            data.append(d)
            label.append(l)

        return data, label, info
예제 #3
0
    def _read_images(self, d_path, l_path, d_files, l_files, downsample_xy):
        """
        Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z)
        But image stacks are returned as (z,ch,x,y) and label as (z,x,y,) irrespective of the order in the file.
        If the image files have no channel this dimension is extended to a singleton dimension.
        """
        data, label, info = [], [], []
        if len(d_files) != len(l_files):
            raise ValueError("d_files and l_files must be lists of same length!")
        for (d_f, d_key), (l_f, l_key) in zip(d_files, l_files):
            print 'Loading %s' % d_f,
            d = ut.h5Load(d_path + d_f, d_key)
            print 'Loading %s' % l_f
            l = ut.h5Load(l_path + l_f, l_key)
            try:
                info_1 = ut.h5Load(l_path + l_f, 'info')
                info.append(info_1)
            except KeyError:
                info.append(None)

            if not self.zchxy_order:
                if len(d.shape) == 4:
                    self.n_ch = d.shape[0]
                    print "Data has %i channels" % self.n_ch
                elif len(d.shape) == 3:  # We have no channels in data
                    self.n_ch = 1
                    d = d[None, :, :, :]  # add (empty) 0-axis

                if l.size == 0:
                    l = np.zeros_like(d[0], dtype=self.ldtype)
                elif self.mode == 'img-scalar':
                    assert len(l.shape) == 1, "Scalar labels must be 1d"

    # Transpose such that access is optimal
                d = np.transpose(d, (3, 0, 1, 2))  # (ch,x,y,z)-->(z,ch,x,y)
                if self.mode == 'img-img':
                    l = np.transpose(l, (2, 0, 1))  #    (x,y,z)-->(z,x,y)
                    d, l = _stripCubes(d, l, self.offset, self.ldtype)

            else:  # data in memory layout:
                if len(d.shape) == 4:
                    self.n_ch = d.shape[1]
                    print "Data has %i channels" % self.n_ch
                elif len(d.shape) == 3:  # We have no channels in data
                    self.n_ch = 1
                    d = d[:, None, :, :]  # add (empty) 0-axis

                if l.size == 0:
                    sh = (d.shape[0], ) + d.shape[2:]
                    l = np.zeros_like(sh, dtype=self.ldtype)
                elif self.mode == 'img-scalar':
                    assert len(l.shape) == 1, "Scalar labels must be 1d"

                if self.mode == 'img-img':
                    d, l = _stripCubes(d, l, self.offset, self.ldtype)

            # determine normalisation depending on int or float type
            if d.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32,
                           np.uint, np.uint8, np.uint16, np.uint32, np.uint32]:
                m = 255
            else:
                m = 1

            d = np.ascontiguousarray(d, dtype=np.float32) / m
            if (self.ldtype is not l.dtype and np.issubdtype(l.dtype, np.integer)):
                m = l.max()
                M = np.iinfo(self.ldtype).max
                if m > M:
                    raise ValueError("Loading of data: labels must be cast to %s, but %s cannot store value %g, maximum allowed value: %g. You may try to renumber labels."
                                     % (self.ldtype, self.ldtype, m, M))

            l = np.ascontiguousarray(l, dtype=self.ldtype)

            if downsample_xy:
                f = int(downsample_xy)
                l_sh = l.shape
                cut = np.mod(l_sh, f)

                d = d[:, :, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]]
                sh = d[:, :, ::f, ::f].shape
                new_d = np.zeros(sh, dtype=np.float32)

                l = l[:, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]]
                sh = l[:, ::f, ::f].shape
                new_l = np.zeros(sh, dtype=self.ldtype)

                for i in xrange(f):
                    for j in xrange(f):
                        new_d += d[:, :, i::f, j::f]
                        new_l += l[:, i::f, j::f]

                d = new_d / f**2
                l = new_l / f**2

            gc.collect()

            print "Internal data.shape=%s, label.shape=%s" % (d.shape, l.shape)
            print '---'
            data.append(d)
            label.append(l)

        return data, label, info
예제 #4
0
    def loadData(self):
        config = self.config
        if self.config.mode != 'vect-scalar' and self.config.data_class_name is None:  # image training
            strided = ~np.any(config.MFP) and config.mode == 'img-img'

            self.get_batch_kwargs = dict(
                batch_size=config.batch_size,
                strided=strided,
                flip=config.flip_data,
                grey_augment_channels=config.grey_augment_channels,
                ret_info=config.lazy_labels,
                ret_example_weights=config.use_example_weights,
                warp_on=config.warp_on,
                ignore_thresh=config.example_ignore_threshold)

            # the source is replaced in self.testModel to be valid
            self.get_batch_kwargs_test = dict(
                batch_size=config.monitor_batch_size,
                strided=strided,
                flip=config.flip_data,
                grey_augment_channels=config.grey_augment_channels,
                ret_info=config.lazy_labels,
                ret_example_weights=config.use_example_weights,
                warp_on=False,
                ignore_thresh=config.example_ignore_threshold)  # no warp

            self.data = CNNData.CNNData(
                config.patch_size, config.dimensions.pred_stride,
                config.dimensions.offset, config.n_dim, config.n_lab,
                config.anisotropic_data, config.mode, config.zchxy_order,
                config.border_mode, config.pre_process, config.upright_x, True
                if config.target == 'regression' else False, config.target
                if config.target in ['malis', 'affinity'] else False)  # return affinity graph instead of boundaries

            self.data.addDataFromFile(config.data_path, config.label_path,
                                      config.d_files, config.l_files,
                                      config.cube_prios, config.valid_cubes,
                                      config.downsample_xy)

            if self.config.preview_data_path is not None:
                data = trainutils.h5Load(self.config.preview_data_path)
                if not (isinstance(data, list) or isinstance(data, (tuple, list))):
                    #data = np.transpose(data, (1,2,0)) # this was only a hack for I
                    data = [data, ]

                data = [d.astype(np.float32) / 255 for d in data]
                self.preview_data = data
            else:
                self.preview_data = None

        else:  # non-image training
            self.get_batch_kwargs = dict(batch_size=config.batch_size)
            self.get_batch_kwargs.update(self.config.data_batch_kwargs)
            # the source is replaced in self.testModel to be valid
            self.get_batch_kwargs_test = dict(batch_size=config.monitor_batch_size)
            if isinstance(self.config.data_class_name, tuple):
                Data = trainutils.import_variable_from_file(*self.config.data_class_name)
            else:
                Data = getattr(traindata, self.config.data_class_name)

            self.data = Data(**self.config.data_load_kwargs)
            self.preview_data = None