Example #1
0
    def iterator(self,
                 mode=None,
                 batch_size=None,
                 num_batches=None,
                 topo=None,
                 targets=None,
                 rng=None):
        """
        Method inherited from the Dataset.
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets
        self.cur_idx = -1

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(
                self.data_n_rows,
                batch_size=1,
                num_batches=num_batches,
                rng=None)
            return self
        else:
            raise NotImplementedError(
                'other iteration scheme not supported for now!')
Example #2
0
def test_correct_sequential_slices():
    iterator = SequentialSubsetIterator(10, 3, 4)
    sl = iterator.next()
    assert sl.start == 0
    assert sl.stop == 3
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 3
    assert sl.stop == 6
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 6
    assert sl.stop == 9
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 9
    assert sl.stop == 10
    assert sl.step is None
Example #3
0
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 topo=None, targets=None, rng=None):
        """
        method inherited from Dataset
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(self.data_n_rows,
                                            batch_size, num_batches, rng=None)
            return self
        else:
            raise NotImplementedError('other iteration scheme not supported for now!')
Example #4
0
def test_misc_exceptions():
    raised = False
    try:
        SubsetIterator.__new__(SubsetIterator).next()
    except NotImplementedError:
        raised = True
    assert raised
    raised = False
    try:
        SubsetIterator(1, 2, 3)
    except NotImplementedError:
        raised = True
    assert raised
    raised = False
    try:
        SequentialSubsetIterator(10, 3, 3, rng=0)
    except ValueError:
        raised = True
    assert raised
Example #5
0
def test_correct_sequential_slices():
    iterator = SequentialSubsetIterator(10, 3, 4)
    sl = iterator.next()
    assert sl.start == 0
    assert sl.stop == 3
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 3
    assert sl.stop == 6
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 6
    assert sl.stop == 9
    assert sl.step is None
    sl = iterator.next()
    assert sl.start == 9
    assert sl.stop == 10
    assert sl.step is None
Example #6
0
def test_sequential_num_batches_and_batch_size():
    try:
        # This should be fine, we have enough examples for 4 batches
        # (with one under-sized batch).
        iterator = SequentialSubsetIterator(10, 3, 4)
        for i in range(4):
            iterator.next()
    except Exception as e:
        assert False
    raised = False
    try:
        iterator.next()
    except StopIteration:
        raised = True
    assert raised
    try:
        # This should be fine, we have enough examples for 4 batches
        # (with one to spare).
        iterator = SequentialSubsetIterator(10, 3, 3)
        for i in range(3):
            iterator.next()
    except Exception:
        assert False
    raised = False
    try:
        iterator.next()
    except StopIteration:
        raised = True
    assert raised
    try:
        # This should fail, since you can't make 5 batches of 3 from 10.
        iterator = SequentialSubsetIterator(10, 3, 5)
    except ValueError:
        return
    assert False
Example #7
0
def test_sequential_num_batches_and_batch_size():
    try:
        # This should be fine, we have enough examples for 4 batches
        # (with one under-sized batch).
        iterator = SequentialSubsetIterator(10, 3, 4)
        for i in range(4):
            iterator.next()
    except Exception as e:
        assert False
    raised = False
    try:
        iterator.next()
    except StopIteration:
        raised = True
    assert raised
    try:
        # This should be fine, we have enough examples for 4 batches
        # (with one to spare).
        iterator = SequentialSubsetIterator(10, 3, 3)
        for i in range(3):
            iterator.next()
    except Exception:
        assert False
    raised = False
    try:
        iterator.next()
    except StopIteration:
        raised = True
    assert raised
    try:
        # This should fail, since you can't make 5 batches of 3 from 10.
        iterator = SequentialSubsetIterator(10, 3, 5)
    except ValueError:
        return
    assert False
class SparseDataset(Dataset):
    """
    SparseDataset is by itself an iterator.
    """
    def __init__(self,
                 load_path=None,
                 from_scipy_sparse_dataset=None,
                 zipped_npy=True):

        self.load_path = load_path

        if self.load_path != None:
            if zipped_npy == True:
                print '... loading sparse data set from a zip npy file'
                self.sparse_matrix = scipy.sparse.csr_matrix(numpy.load(
                    gzip.open(load_path)),
                                                             dtype=floatX)
            else:
                print '... loading sparse data set from a npy file'
                self.sparse_matrix = scipy.sparse.csr_matrix(
                    numpy.load(load_path).item(), dtype=floatX)
        else:
            print '... building from given sparse dataset'
            self.sparse_matrix = from_scipy_sparse_dataset

        self.data_n_rows = self.sparse_matrix.shape[0]
        self.num_examples = self.data_n_rows

    def get_design_matrix(self):
        return self.sparse_matrix

    def get_batch_design(self, batch_size, include_labels=False):
        """
        method inherited from Dataset
        """
        self.iterator(mode='sequential',
                      batch_size=batch_size,
                      num_batches=None,
                      topo=None)
        return self.next()

    def get_batch_topo(self, batch_size):
        """
        method inherited from Dataset
        """
        raise NotImplementedError('Not implemented for sparse dataset')

    def iterator(self,
                 mode=None,
                 batch_size=None,
                 num_batches=None,
                 topo=None,
                 targets=None,
                 rng=None):
        """
        method inherited from Dataset
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(self.data_n_rows,
                                                            batch_size,
                                                            num_batches,
                                                            rng=None)
            return self
        else:
            raise NotImplementedError(
                'other iteration scheme not supported for now!')

    def __iter__(self):
        return self

    def next(self):
        indx = self.subset_iterator.next()
        try:
            mini_batch = self.sparse_matrix[indx]
        except IndexError:
            # the ind of minibatch goes beyond the boundary
            import ipdb
            ipdb.set_trace()
        return mini_batch
Example #9
0
class SparseDataset(Dataset):
    """
    SparseDataset is by itself an iterator.
    """
    def __init__(self, load_path=None, from_scipy_sparse_dataset=None, zipped_npy=True):

        self.load_path = load_path

        if self.load_path != None:
            if zipped_npy == True:
                print '... loading sparse data set from a zip npy file'
                self.sparse_matrix = scipy.sparse.csr_matrix(
                    numpy.load(gzip.open(load_path)), dtype=floatX)
            else:
                print '... loading sparse data set from a npy file'
                self.sparse_matrix = scipy.sparse.csr_matrix(
                    numpy.load(load_path).item(), dtype=floatX)
        else:
            print '... building from given sparse dataset'
            self.sparse_matrix = from_scipy_sparse_dataset

        self.data_n_rows = self.sparse_matrix.shape[0]
        self.num_examples = self.data_n_rows

    def get_design_matrix(self):
        return self.sparse_matrix

    def get_batch_design(self, batch_size, include_labels=False):
        """
        method inherited from Dataset
        """
        self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None)
        return self.next()

    def get_batch_topo(self, batch_size):
        """
        method inherited from Dataset
        """
        raise NotImplementedError('Not implemented for sparse dataset')

    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 topo=None, targets=None, rng=None):
        """
        method inherited from Dataset
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(self.data_n_rows,
                                            batch_size, num_batches, rng=None)
            return self
        else:
            raise NotImplementedError('other iteration scheme not supported for now!')


    def __iter__(self):
        return self

    def next(self):
        indx = self.subset_iterator.next()
        try:
            mini_batch = self.sparse_matrix[indx]
        except IndexError:
            # the ind of minibatch goes beyond the boundary
            import ipdb; ipdb.set_trace()
        return mini_batch
Example #10
0
class CroppedPatchesDataset(Dataset):
    """
    CroppedPatchesDataset is by itself an iterator.
    """
    def __init__(self,
                 img_shape,
                 iter_mode="fprop",
                 h5_file=None,
                 start=None,
                 stop=None,
                 mode=None):

        self.__dict__.update(locals())
        self.img_shape = img_shape

        if self.self is not None:
            del self.self

        if mode is not None:
            self.mode = mode
        elif start is not None or stop is not None:
            self.mode = "r+"
        else:
            self.mode = "r"

        if not os.path.isfile(h5_file):
            raise ValueError("Please enter a valid file path.")

        self.initialize_dataset(h5_file)

    def initialize_dataset(self, h5_file):
        """
        Set the files and the patches,...etc.
        """
        self.h5file = tables.openFile(h5_file, mode=self.mode)
        self.dataset = self.h5file.root

        self.X = self.dataset.Data.Pt
        self.Y = self.dataset.Data.Tgt
        self.imgnos = self.dataset.Data.Ino
        self.plocs = self.dataset.Data.Ploc
        self.data_n_rows = self.targets.shape[0]

    def set_iter_mode(self, r_mode):
        self.iter_mode = r_mode

    def get_design_matrix(self):
        """
        Return the patches as a dense design matrix.
        """
        return self.patches

    def get_batch_design(self, batch_size, include_labels=False):
        """
        Method inherited from the Dataset.
        """
        self.iterator(mode='sequential',
                      batch_size=batch_size,
                      num_batches=None,
                      topo=None)
        return self.next()

    def get_batch_topo(self, batch_size):
        """
        Method inherited from the Dataset.
        """
        raise NotImplementedError('Not implemented for sparse dataset')

    def iterator(self,
                 mode=None,
                 batch_size=None,
                 num_batches=None,
                 topo=None,
                 targets=None,
                 rng=None):
        """
        Method inherited from the Dataset.
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets
        self.cur_idx = -1

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(
                self.data_n_rows,
                batch_size=1,
                num_batches=num_batches,
                rng=None)
            return self
        else:
            raise NotImplementedError(
                'other iteration scheme not supported for now!')

    def __iter__(self):
        return self

    def next(self):
        """
        Method for the getting the next indices from the minibatch.
        """

        if self.cur_idx == -1:
            batch_start_indx = self.subset_iterator.next()
        else:
            batch_start_indx = self.cur_idx

        begining_img_no = self.imgnos[batch_start_indx]

        mini_batch_patches = []
        mini_batch_plocs = []
        mini_batch_imgnos = []
        mini_batch_targets = []

        indx = batch_start_indx

        while indx is not None:
            if (mini_batch_targets[-1] is not None) and (mini_batch_imgnos[-1]
                                                         != begining_img_no):
                self.cur_idx = indx
                break
            try:
                mini_batch_patches.append(self.X[indx.start])
                mini_batch_targets.append(self.Y[indx.start])
                mini_batch_imgnos.append(self.imgnos[indx.start])
                mini_batch_plocs.append(self.plocs[indx.start])
            except IndexError:
                print "The index of minibatch goes beyond the boundary."
                import ipdb
                ipdb.set_trace()

            indx = self.subset_iterator.next()

        if self.iter_mode == "train":
            return (mini_batch_patches, mini_batch_targets)
        else:
            return (mini_batch_patches, mini_batch_targets, mini_batch_imgnos,
                    mini_batch_plocs)
Example #11
0
class CroppedPatchesDataset(Dataset):
    """
    CroppedPatchesDataset is by itself an iterator.
    """
    def __init__(self,
                 img_shape,
                 iter_mode="fprop",
                 h5_file=None,
                 start=None,
                 stop=None,
                 mode=None):

        self.__dict__.update(locals())
        self.img_shape = img_shape

        if self.self is not None:
            del self.self

        if mode is not None:
            self.mode = mode
        elif start is not None or stop is not None:
            self.mode = "r+"
        else:
            self.mode = "r"

        if not os.path.isfile(h5_file):
            raise ValueError("Please enter a valid file path.")

        self.initialize_dataset(h5_file)

    def initialize_dataset(self, h5_file):
        """
        Set the files and the patches,...etc.
        """
        self.h5file = tables.openFile(h5_file, mode=self.mode)
        self.dataset = self.h5file.root

        self.X = self.dataset.Data.Pt
        self.Y = self.dataset.Data.Tgt
        self.imgnos = self.dataset.Data.Ino
        self.plocs = self.dataset.Data.Ploc
        self.data_n_rows = self.targets.shape[0]

    def set_iter_mode(self, r_mode):
        self.iter_mode = r_mode

    def get_design_matrix(self):
        """
        Return the patches as a dense design matrix.
        """
        return self.patches

    def get_batch_design(self, batch_size, include_labels=False):
        """
        Method inherited from the Dataset.
        """
        self.iterator(mode='sequential', batch_size=batch_size, num_batches=None, topo=None)
        return self.next()

    def get_batch_topo(self, batch_size):
        """
        Method inherited from the Dataset.
        """
        raise NotImplementedError('Not implemented for sparse dataset')

    def iterator(self,
            mode=None,
            batch_size=None,
            num_batches=None,
            topo=None,
            targets=None,
            rng=None):
        """
        Method inherited from the Dataset.
        """
        self.mode = mode
        self.batch_size = batch_size
        self._targets = targets
        self.cur_idx = -1

        if mode == 'sequential':
            self.subset_iterator = SequentialSubsetIterator(self.data_n_rows,
                                            batch_size=1, num_batches=num_batches, rng=None)
            return self
        else:
            raise NotImplementedError('other iteration scheme not supported for now!')

    def __iter__(self):
        return self

    def next(self):
        """
        Method for the getting the next indices from the minibatch.
        """

        if self.cur_idx == -1:
            batch_start_indx = self.subset_iterator.next()
        else:
            batch_start_indx = self.cur_idx

        begining_img_no = self.imgnos[batch_start_indx]

        mini_batch_patches = []
        mini_batch_plocs = []
        mini_batch_imgnos = []
        mini_batch_targets = []

        indx = batch_start_indx

        while indx is not None:
            if (mini_batch_targets[-1] is not None) and (mini_batch_imgnos[-1] != begining_img_no):
                self.cur_idx = indx
                break
            try:
                mini_batch_patches.append(self.X[indx.start])
                mini_batch_targets.append(self.Y[indx.start])
                mini_batch_imgnos.append(self.imgnos[indx.start])
                mini_batch_plocs.append(self.plocs[indx.start])
            except IndexError:
                print "The index of minibatch goes beyond the boundary."
                import ipdb; ipdb.set_trace()

            indx = self.subset_iterator.next()

        if self.iter_mode == "train":
            return (mini_batch_patches, mini_batch_targets)
        else:
            return (mini_batch_patches, mini_batch_targets, mini_batch_imgnos, mini_batch_plocs)