Ejemplo n.º 1
0
def get_features(path, split, standardize):
    if path.find(',') != -1:
        paths = path.split(',')
        Xs = [ get_features(subpath, split, standardize) for subpath in paths ]
        X = np.concatenate( Xs, axis = 1)
        return X


    if path.endswith('.npy'):
        topo_view = np.load(path)
    else:
        topo_view = serial.load(path)

        if str(type(topo_view)).find('h5py') != -1:
            name ,= topo_view.keys()
            topo_view = topo_view[name].value.T

    if len(topo_view.shape) == 2:
        X = topo_view
    else:
        view_converter = DefaultViewConverter(topo_view.shape[1:])

        print 'converting data'
        X = view_converter.topo_view_to_design_mat(topo_view)

    if split:
        X = np.concatenate( (np.abs(X),np.abs(-X)), axis=1)

    if standardize:
        assert False   #bug: if X is test set, we need to subtract train mean, divide by train std
        X -= X.mean(axis=0)
        X /= np.sqrt(.01+np.var(X,axis=0))

    return X
Ejemplo n.º 2
0
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color = False, rescale = True):
    """ Given filters in rows, guesses dimensions of patches
        and nice dimensions for the PatchViewer and returns a PatchViewer
        containing visualizations of the filters"""

    num_channels = 1
    if is_color:
        num_channels = 3

    if grid_shape is None:
        grid_shape = PatchViewer.pick_shape(mat.shape[0] )
    if patch_shape is None:
        assert mat.shape[1] % num_channels == 0
        patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels, exact = True)
        assert patch_shape[0] * patch_shape[1] * num_channels == mat.shape[1]
    rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color = is_color)
    topo_shape = (patch_shape[0], patch_shape[1], num_channels)
    view_converter = DefaultViewConverter(topo_shape)
    topo_view = view_converter.design_mat_to_topo_view(mat)
    for i in xrange(mat.shape[0]):
        if activation is not None:
            if hasattr(activation[0], '__iter__'):
                act = [a[i] for a in activation]
            else:
                act = activation[i]
        else:
            act = None

        patch = topo_view[i, :]

        rval.add_patch(patch, rescale=rescale,
                       activation=act)
    return rval
def plot(w):

    nblocks = int(model.n_g / model.sparse_gmask.bw_g)
    filters_per_block = model.sparse_gmask.bw_g * model.sparse_hmask.bw_h

    block_viewer = PatchViewer((model.sparse_gmask.bw_g, model.sparse_hmask.bw_h),
                               (opts.height, opts.width),
                               is_color = opts.color,
                               pad=(2,2))

    chan_viewer = PatchViewer(get_dims(nblocks),
                              (block_viewer.image.shape[0],
                              block_viewer.image.shape[1]),
                              is_color = opts.color,
                              pad=(5,5))

    main_viewer = PatchViewer(get_dims(nplots),
                              (chan_viewer.image.shape[0],
                               chan_viewer.image.shape[1]),
                              is_color = opts.color,
                              pad=(10,10))

    topo_shape = [opts.height, opts.width, opts.chans]
    view_converter = DefaultViewConverter(topo_shape)

    if opts.splitblocks:
        os.makedirs('filters/')

    for chan_i in xrange(nplots):

        viewer_dims = slice(0, None) if opts.color else chan_i

        for bidx in xrange(nblocks):

            for fidx in xrange(filters_per_block):
                fi = bidx * filters_per_block + fidx
                topo_view = view_converter.design_mat_to_topo_view(w[fi:fi+1,:])
                try:
                    block_viewer.add_patch(topo_view[0,:,:,viewer_dims])
                except:
                    import pdb; pdb.set_trace()

            if opts.splitblocks:
                pl.imshow(block_viewer.image, interpolation='nearest')
                pl.axis('off')
                pl.title('Wv - block %i, chan %i' % (bidx, chan_i))
                pl.savefig('filters/filters_chan%i_block%i.png' % (bidx, chan_i))

            chan_viewer.add_patch(block_viewer.image[:,:,viewer_dims] - 0.5)
            block_viewer.clear()

        main_viewer.add_patch(chan_viewer.image[:,:,viewer_dims] - 0.5)
        chan_viewer.clear()

    return copy.copy(main_viewer.image)
Ejemplo n.º 4
0
    def set_topological_view(self, V, axes=('b', 0, 1, 'c')):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        .. todo::

            Why is this parameter named 'V'?

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of
            training examples.
        axes : WRITEME
        """
        assert not contains_nan(V)
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)
        self.X = self.view_converter.topo_view_to_design_mat(V)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not contains_nan(self.X)

        # Update data specs
        X_space = VectorSpace(dim=self.X.shape[1])
        X_source = 'features'
        if self.y is None:
            space = X_space
            source = X_source
        else:
            if self.y.ndim == 1:
                dim = 1
            else:
                dim = self.y.shape[-1]
            # This is to support old pickled models
            if getattr(self, 'y_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.y_labels)
            elif getattr(self, 'max_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.max_labels)
            else:
                y_space = VectorSpace(dim=dim)
            y_source = 'targets'

            Latent_space = VectorSpace(dim=self.latent.shape[-1])
            Latent_source = 'latents'

            space = CompositeSpace((X_space, y_space,Latent_space))
            source = (X_source, y_source,Latent_source)

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)
Ejemplo n.º 5
0
    def _transform_multi_channel_data(self, X, y):
        # Data partitioning
        parted_X, parted_y = self._partition_data(X=X, y=y, partition_size=self.window_size)
        transposed_X = np.transpose(parted_X, [0, 2, 1])
        converted_X = np.reshape(transposed_X, (transposed_X.shape[0],
                                                transposed_X.shape[1],
                                                1,
                                                transposed_X.shape[2]))

        # Create view converter
        view_converter = DefaultViewConverter(shape=self.sample_shape,
                                              axes=('b', 0, 1, 'c'))

        # Convert data into a design matrix
        view_converted_X = view_converter.topo_view_to_design_mat(converted_X)
        assert np.all(converted_X == view_converter.design_mat_to_topo_view(view_converted_X))

        # Format the target into proper format
        sum_y = np.sum(parted_y, axis=1)
        sum_y[sum_y > 0] = 1
        one_hot_formatter = OneHotFormatter(max_labels=self.n_classes)
        hot_y = one_hot_formatter.format(sum_y)

        return view_converted_X, hot_y, view_converter
Ejemplo n.º 6
0
    def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')):
        '''
        Sets the dataset to represent topo_view, where topo_view is a batch
        of topological views of examples.

        Parameters
        ----------
        topo_view : ndarray
            An array containing a design matrix representation of training
            examples.
        '''
        
        assert not np.any(np.isnan(topo_view))
        frames = topo_view.shape[axes.index('b')]    # pretend frames come in as batch dim
        rows = topo_view.shape[axes.index(0)]
        cols = topo_view.shape[axes.index(1)]
        channels = topo_view.shape[axes.index('c')]

        # leave out frames...
        self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes)
        
        self.X = self.view_converter.topo_view_to_design_mat(topo_view)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not np.any(np.isnan(self.X))

        # Update data specs
        X_space = VectorSpace(dim = frames * rows * cols * channels)
        X_source = 'features'

        assert self.y is None, 'y not supported now'
        space = X_space
        source = X_source

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)
Ejemplo n.º 7
0
    def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of training
            examples. If unspecified, the entire dataset (`self.X`) is used
            instead.
        TODO: why is this parameter named 'V'?
        """
        assert not numpy.any(numpy.isnan(V))
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes)
        X = self.view_converter.topo_view_to_design_mat(V)
        assert not numpy.any(numpy.isnan(X))

        FaceBBoxDDMPytables.fill_hdf5(h5file = self.h5file,
                                            data_x = X,
                                            start = start)
Ejemplo n.º 8
0
def find_adversary(model,
                   X0,
                   label,
                   P0=None,
                   mu=.1,
                   epsilon=.25,
                   maxits=10,
                   stop_thresh=0.5,
                   griffin_lim=False):
    '''
    Solves:

    y* = argmin_y f(y; label) 
    s.t. y >= 0 and ||y-X0|| < e

    where f(y) is the cost associated the network associates with the pair (y,label)

    This can be solved using the projected gradient method:

    min_y f(y)
    s.t. y >= 0 and ||y-X0|| < e

    z = max(0, y^k - mu.f'(y^k))
    y^k+1 = P(z)

    P(z) = min_u ||u-z|| s.t. {u | ||u-X0|| < e }
    Lagrangian(u,l) = L(u,l) = ||u-z|| + nu*(||u-X0|| - e)
    dL/du = u-z + nu*(u-X0) = 0
    u = (1+nu)^-1 (z + nu*X0)

    KKT:
    ||u-x|| = e
    ||(1/(1+nu))(z + nu*x) - x|| = e
    ||(1/(1+nu))z + ((nu/(1+nu))-1)x|| = e
    ||(1/(1+nu))z - (1/(1+nu))x|| = e
    (1/(1+nu))||z-x|| = e
    nu = max(0,||z-x||/e - 1)

    function inputs:

    model - pylearn2 dnn model (implements fprop, cost)
    X0 - an example that the model classifies correctly
    label - an incorrect label
    '''
    # convert integer label into one-hot vector
    n_classes, n_examples = model.get_output_space().dim, X0.shape[0]
    nfft = 2 * (X0.shape[1] - 1)
    nhop = nfft // 2

    # Set-up gradient computation w/ Theano
    in_batch = model.get_input_space().make_theano_batch()
    out_batch = model.get_output_space().make_theano_batch()

    #cost      = model.cost(one_hot, model.fprop(in_batch))
    cost = model.cost(out_batch, model.fprop(in_batch))
    #cost      = model.layers[-1].cost(one_hot, model.fprop(in_batch))
    dCost = T.grad(cost * n_examples, in_batch)

    grad_theano = theano.function([in_batch, out_batch], dCost)
    fprop_theano = theano.function([in_batch], model.fprop(in_batch))
    fcost_theano = theano.function([in_batch, out_batch], cost)

    input_space = model.get_input_space()
    if isinstance(input_space, Conv2DSpace):
        tframes, dim = input_space.shape
        view_converter = DefaultViewConverter((tframes, dim, 1))
    else:
        dim = input_space.dim
        tframes = 1
        view_converter = None

    nframes = X0.shape[0]
    thop = 1.
    sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop))

    if view_converter:

        def grad(batch, labels):
            data = np.vstack([
                np.reshape(batch[i:i + tframes, :], (tframes * dim, ))
                for i in sup
            ])
            topo_view = grad_theano(
                view_converter.get_formatted_batch(data, input_space), labels)
            design_mat = view_converter.topo_view_to_design_mat(topo_view)
            return np.vstack(
                [np.reshape(r, (tframes, dim)) for r in design_mat])

        def fprop(batch):
            data = np.vstack([
                np.reshape(batch[i:i + tframes, :], (tframes * dim, ))
                for i in sup
            ])
            return fprop_theano(
                view_converter.get_formatted_batch(data, input_space))

        def fcost(batch, labels):
            data = np.vstack([
                np.reshape(batch[i:i + tframes, :], (tframes * dim, ))
                for i in sup
            ])
            return fcost_theano(
                view_converter.get_formatted_batch(data, input_space), labels)
    else:
        grad = grad_theano
        fprop = fprop_theano
        fcost = fcost_theano

    one_hot = np.zeros((len(sup), n_classes), dtype=np.float32)
    one_hot[:, label] = 1

    X0 = X0[:len(sup) * tframes, :]
    if P0 is not None: P0 = P0[:len(sup) * tframes, :]

    # projected gradient:
    last_pred = 0
    #Y = np.array(np.random.rand(*X0.shape), dtype=np.float32)
    Y = np.copy(X0)
    Y_old = np.copy(Y)
    t_old = 1
    #print 'cost(X0,y): ', fcost(X0, one_hot)
    for i in xrange(maxits):

        # gradient step
        g = grad(Y, one_hot)
        Z = Y - mu * np.sign(g)
        #print 'cost(X{},y): {}'.format(i+1, fcost(Z, one_hot))

        # non-negative projection
        Z = Z * (Z > 0)

        if griffin_lim:
            Z, P0 = griffin_lim_proj(np.hstack((Z, Z[:, -2:-nfft / 2 - 1:-1])),
                                     P0,
                                     its=0)

        # maximum allowable signal-to-noise projection
        nu = np.linalg.norm(
            (Z - X0)) / n_examples / epsilon - 1  # lagrange multiplier
        nu = nu * (nu >= 0)
        Y = (Z + nu * X0) / (1 + nu)

        # FISTA momentum
        # t = .5 + np.sqrt(1+4*t_old**2)/2.
        # alpha = (t_old - 1)/t
        # Y += alpha * (Y - Y_old)
        # Y_old = np.copy(Y)
        # t_old = t
        #'''

        # stopping condition
        # pred = np.sum(fprop(Y), axis=0)
        # pred /= np.sum(pred)

        #print 'iteration: {}, pred[label]: {}, nu: {}, snr: {}'.format(i, pred[label], nu, 20*np.log10(np.linalg.norm(X0)/np.linalg.norm(Y-X0)))

        # if pred[label] > stop_thresh:
        #     break
        # elif pred[label] < last_pred - 1e-4:
        #     pass#break
        # last_pred = pred[label]
    return Y, P0
Ejemplo n.º 9
0
    def _execute(self):

        batch_size = self.batch_size
        feature_type = self.feature_type
        pooling_region_counts = self.pooling_region_counts
        dataset_family = self.dataset_family
        which_set = self.which_set
        model = self.model
        size = self.size

        nan = 0

        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        assert full_X.dtype == 'float32'
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        print 'restricting to examples from classes 0 and 1'
        full_X = full_X[dataset.y_fine < 2, :]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches(patch_shape=(size, size),
                                        patch_stride=(1, 1))

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier

        print 'defining features'
        V = T.matrix('V')
        assert V.type.dtype == 'float32'
        model.make_pseudoparams()
        d = model.infer(V=V)

        H = d['H_hat']
        Mu1 = d['S_hat']
        G = d['G_hat']
        if len(G) != 1:
            raise NotImplementedError(
                "only supports two layer pd-dbms for now")
        G, = G

        assert H.dtype == 'float32'
        assert Mu1.dtype == 'float32'

        nfeat = model.s3c.nhid + model.dbm.rbms[0].nhid

        if self.feature_type == 'map_hs':
            feat = (H > 0.5) * Mu1
            raise NotImplementedError("doesn't support layer 2")
        elif self.feature_type == 'map_h':
            feat = T.cast(H > 0.5, dtype='float32')
            raise NotImplementedError("doesn't support layer 2")
        elif self.feature_type == 'exp_hs':
            feat = H * Mu1
            raise NotImplementedError("doesn't support layer 2")
        elif self.feature_type == 'exp_hs_split':
            Z = H * Mu1
            pos = T.clip(Z, 0., 1e32)
            neg = T.clip(-Z, 0, 1e32)
            feat = T.concatenate((pos, neg), axis=1)
            nfeat *= 2
            raise NotImplementedError("doesn't support layer 2")
        elif self.feature_type == 'exp_h,exp_g':
            feat = T.concatenate((H, G), axis=1)
        elif self.feature_type == 'exp_h_thresh':
            feat = H * (H > .01)
            raise NotImplementedError("doesn't support layer 2")
        else:
            raise NotImplementedError()

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([V], feat)

        if config.device.startswith('gpu') and nfeat >= 4000:
            f = halver(f, nfeat)

        topo_feat_var = T.TensorType(broadcastable=(False, False, False,
                                                    False),
                                     dtype='float32')()
        if self.pool_mode == 'mean':
            region_feat_var = topo_feat_var.mean(axis=(1, 2))
        elif self.pool_mode == 'max':
            region_feat_var = topo_feat_var.max(axis=(1, 2))
        else:
            raise ValueError("Unknown pool mode: " + self.pool_mode)
        region_features = function([topo_feat_var], region_feat_var)

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        outputs = [
            np.zeros((num_examples, count, count, nfeat), dtype='float32')
            for count in pooling_region_counts
        ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, nfeat]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i + batch_size, :])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = np.cast['float32'](d.get_design_matrix())

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i + batch_size, ...] = average_pool(count)

            t6 = time.time()

            print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)

        for output, save_path in zip(outputs, self.save_paths):
            np.save(save_path, output)

        if nan > 0:
            warnings.warn(str(nan) + ' features were nan')
Ejemplo n.º 10
0
parser.add_option('--channels',  action='store', type='int', dest='chans')
parser.add_option('--color', action='store_true',  dest='color', default=False)
parser.add_option('--layer', action='store',  type='int', dest='layer', default=0)
(opts, args) = parser.parse_args()

nplots = opts.chans
if opts.color:
    assert opts.chans == 3
    nplots = 1

def get_dims(nf):
    num_rows = numpy.floor(numpy.sqrt(nf))
    return (int(num_rows), int(numpy.ceil(nf / num_rows)))

topo_shape = [opts.height, opts.width, opts.chans]
viewconv = DefaultViewConverter(topo_shape)
viewdims = slice(0, None) if opts.color else 0

# load model and retrieve parameters
model = serial.load(opts.path)
if isinstance(model, TemperedDBN):
    rbm = model.rbms[opts.layer]
else:
    rbm = model

wv = rbm.Wv.get_value().T
wv_viewer = PatchViewer(get_dims(len(wv)), (opts.height, opts.width),
                        is_color = opts.color, pad=(2,2))
for i in xrange(len(wv)):
    topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1])
    wv_viewer.add_patch(topo_wvi[0])
Ejemplo n.º 11
0
parser.add_option('--top', action='store', type='int', dest='top', default=5)
parser.add_option('--mu', action='store_true',  dest='mu', default=False)
parser.add_option('--wv_only', action='store_true', dest='wv_only', default=False)
(opts, args) = parser.parse_args()

nplots = opts.chans
if opts.color:
    assert opts.chans == 3
    nplots = 1

def get_dims(nf):
    num_rows = numpy.floor(numpy.sqrt(nf))
    return (int(num_rows), int(numpy.ceil(nf / num_rows)))

topo_shape = [opts.height, opts.width, opts.chans]
viewconv = DefaultViewConverter(topo_shape)
viewdims = slice(0, None) if opts.color else 0

# load model and retrieve parameters
model = serial.load(opts.path)
wv = model.Wv.get_value().T
if opts.mu:
    wv = wv * model.mu.get_value()[:, None]

view1 = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2))
for i in xrange(len(wv)):
    topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1, :48*48])
    view1.add_patch(topo_wvi[0])

view2 = PatchViewer(get_dims(len(wv)), (opts.height, opts.width), is_color = opts.color, pad=(2,2))
for i in xrange(len(wv)):
Ejemplo n.º 12
0
max_filters = max([len(Wi) for Wi in W])
print 'max_filters = ', max_filters

block_viewer = PatchViewer(get_dims(max_filters),
                           (opts.height, opts.width), 
                           is_color = opts.color,
                           pad=(2,2))

main_viewer = PatchViewer(get_dims(nblocks), 
                          (block_viewer.image.shape[0],
                           block_viewer.image.shape[1]),
                          is_color = opts.color, 
                          pad=(5,5))

topo_shape = [opts.height, opts.width, opts.chans]
view_converter = DefaultViewConverter(topo_shape)

for di, w_di in enumerate(W):

    if opts.k == -1:
        # build "new_w" as linear combination of all previous filters
        if di > 0:
            new_w = numpy.dot(w_di, prev_w)
        else: 
            new_w = w_di
    else:
        new_w = numpy.zeros((len(w_di), opts.height * opts.width)) if di else w_di

    for fi in xrange(len(w_di)):

        if opts.k != -1:
Ejemplo n.º 13
0
    seglen = 30
    x = x[:seglen * fs]

    # make sure format agrees with training data
    if len(x.shape) != 1:
        print 'making mono:'
        x = np.sum(x, axis=1) / 2.  # mono
    if fs != 22050:
        print 'resampling to 22050 hz:'
        import scikits.samplerate as samplerate
        x = samplerate.resample(x, 22050. / fs, 'sinc_best')
        fs = 22050

    if isinstance(input_space, Conv2DSpace):
        tframes, dim = input_space.shape
        view_converter = DefaultViewConverter((tframes, dim, 1))
    else:
        dim = input_space.dim
        tframes = 1
        view_converter = None

    nfft = 2 * (dim - 1)
    nhop = nfft // 2
    nframes = (len(x) - nfft) / nhop
    x = x[:(nframes - 1) * nhop +
          nfft]  # truncate input to multiple of hopsize

    # format batches for 1d/2d nets
    thop = 1.
    sup = np.arange(0, nframes - tframes + 1, np.int(tframes / thop))
    if view_converter:
Ejemplo n.º 14
0
    def __init__(self, config, adv_model, which_set='train'):

        keys = ['train', 'test', 'valid']
        assert which_set in keys

        # load hdf5 metadata
        self.hdf5 = tables.open_file(config['hdf5'], mode='r')
        data = self.hdf5.get_node('/', 'Data')
        param = self.hdf5.get_node('/', 'Param')
        self.file_index = param.file_index[0]
        self.file_dict = param.file_dict[0]
        self.label_list = param.label_list[0]
        self.targets = param.targets[0]
        self.nfft = param.fft[0]['nfft']

        # load parition information
        self.support = config[which_set]
        self.file_list = config[which_set + '_files']
        self.mean = config['mean']
        self.mean = self.mean.reshape((np.prod(self.mean.shape), ))
        self.var = config['var']
        self.var = self.var.reshape((np.prod(self.var.shape), ))
        self.istd = np.reciprocal(np.sqrt(self.var))
        self.mask = (self.istd < 20)
        self.tframes = config['tframes']

        # setup adversary
        self.adv_model = adv_model
        in_batch = adv_model.get_input_space().make_theano_batch()
        out_batch = adv_model.get_output_space().make_theano_batch()
        cost = adv_model.cost(out_batch, adv_model.fprop(in_batch))
        dCost = T.grad(cost, in_batch)

        grad_theano = theano.function([in_batch, out_batch], dCost)
        fprop_theano = theano.function([in_batch], adv_model.fprop(in_batch))
        fcost_theano = theano.function([in_batch, out_batch], cost)

        self.input_space = adv_model.get_input_space()
        if isinstance(self.input_space, Conv2DSpace):
            tframes, dim = self.input_space.shape
            view_converter = DefaultViewConverter((tframes, dim, 1))

            def grad(batch, labels):
                topo_view = grad_theano(
                    view_converter.get_formatted_batch(batch,
                                                       self.input_space),
                    labels)
                return view_converter.topo_view_to_design_mat(topo_view)

            def fprop(batch):
                return fprop_theano(
                    view_converter.get_formatted_batch(batch,
                                                       self.input_space))

            def fcost(batch, labels):
                return fcost_theano(
                    view_converter.get_formatted_batch(batch,
                                                       self.input_space),
                    labels)

            self.grad = grad
            self.fprop = fprop
            self.fcost = fcost

            super(AdversaryDataset,
                  self).__init__(X=data.X,
                                 y=data.y,
                                 view_converter=view_converter)

        else:
            dim = self.input_space.dim
            tframes = 1
            view_converter = None

            self.grad = grad_theano
            self.fprop = fprop_theano
            self.fcost = fcost_theano

            super(AdversaryDataset, self).__init__(X=data.X, y=data.y)
Ejemplo n.º 15
0
parser.add_option('--top', action='store', type='int', dest='top', default=5)
parser.add_option('--mu', action='store_true',  dest='mu', default=False)
parser.add_option('--wv_only', action='store_true', dest='wv_only', default=False)
(opts, args) = parser.parse_args()

nplots = opts.chans
if opts.color:
    assert opts.chans == 3
    nplots = 1

def get_dims(nf):
    num_rows = numpy.floor(numpy.sqrt(nf))
    return (int(num_rows), int(numpy.ceil(nf / num_rows)))

topo_shape = [opts.height, opts.width, opts.chans]
viewconv = DefaultViewConverter(topo_shape)
viewdims = slice(0, None) if opts.color else 0

# load model and retrieve parameters
model = serial.load(opts.path)
wv = model.Wv.get_value().T
if opts.mu:
    wv = wv * model.mu.get_value()[:, None]

wv_viewer = PatchViewer(get_dims(len(wv)), (opts.height, opts.width),
                        is_color = opts.color, pad=(2,2))
for i in xrange(len(wv)):
    topo_wvi = viewconv.design_mat_to_topo_view(wv[i:i+1])
    wv_viewer.add_patch(topo_wvi[0])
if opts.wv_only:
    wv_viewer.show()
Ejemplo n.º 16
0
class myDenseDesignMatrix(dense_design_matrix.DenseDesignMatrix):

    _default_seed = (17, 2, 946)

    def __init__(self, X=None, topo_view=None, y=None, latent = None,
                 view_converter=None, axes=('b', 0, 1, 'c'),
                 rng=_default_seed, preprocessor=None, fit_preprocessor=False,
                 X_labels=None, y_labels=None):

        self.latent = latent
        self.X = X
        self.y = y
        self.view_converter = view_converter
        self.X_labels = X_labels
        self.y_labels = y_labels

        self._check_labels()

        if topo_view is not None:
            assert view_converter is None
            self.set_topological_view(topo_view, axes)
        else:
            assert X is not None, ("DenseDesignMatrix needs to be provided "
                                   "with either topo_view, or X")
            if view_converter is not None:

                # Get the topo_space (usually Conv2DSpace) from the
                # view_converter
                if not hasattr(view_converter, 'topo_space'):
                    raise NotImplementedError("Not able to get a topo_space "
                                              "from this converter: %s"
                                              % view_converter)

                # self.X_topo_space stores a "default" topological space that
                # will be used only when self.iterator is called without a
                # data_specs, and with "topo=True", which is deprecated.
                self.X_topo_space = view_converter.topo_space
            else:
                self.X_topo_space = None

            # Update data specs, if not done in set_topological_view
            X_source = 'features'
            if X_labels is None:
                X_space = VectorSpace(dim=X.shape[1])
            else:
                if X.ndim == 1:
                    dim = 1
                else:
                    dim = X.shape[-1]
                X_space = IndexSpace(dim=dim, max_labels=X_labels)

            if y is None:
                space = X_space
                source = X_source
            else:
                if y.ndim == 1:
                    dim = 1
                else:
                    dim = y.shape[-1]
                if y_labels is not None:
                    y_space = IndexSpace(dim=dim, max_labels=y_labels)
                else:
                    y_space = VectorSpace(dim=dim)
                y_source = 'targets'

                Latent_space = VectorSpace(dim=latent.shape[-1])
                Latent_source = 'latents'
                space = CompositeSpace((X_space, y_space, Latent_space))
                source = (X_source, y_source, Latent_source)

            self.data_specs = (space, source)
            self.X_space = X_space

        self.compress = False
        self.design_loc = None
        self.rng = make_np_rng(rng, which_method="random_integers")
        # Defaults for iterators
        self._iter_mode = resolve_iterator_class('sequential')
        self._iter_topo = False
        self._iter_targets = False
        self._iter_data_specs = (self.X_space, 'features')

        if preprocessor:
            preprocessor.apply(self, can_fit=fit_preprocessor)
        self.preprocessor = preprocessor



    def get_data(self):
        """
        Returns all the data, as it is internally stored.
        The definition and format of these data are described in
        `self.get_data_specs()`.

        Returns
        -------
        data : numpy matrix or 2-tuple of matrices
            The data
        """
        if self.y is None:
            return self.X
        else:
            return (self.X, self.y, self.latent)



    def set_topological_view(self, V, axes=('b', 0, 1, 'c')):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        .. todo::

            Why is this parameter named 'V'?

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of
            training examples.
        axes : WRITEME
        """
        assert not contains_nan(V)
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)
        self.X = self.view_converter.topo_view_to_design_mat(V)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not contains_nan(self.X)

        # Update data specs
        X_space = VectorSpace(dim=self.X.shape[1])
        X_source = 'features'
        if self.y is None:
            space = X_space
            source = X_source
        else:
            if self.y.ndim == 1:
                dim = 1
            else:
                dim = self.y.shape[-1]
            # This is to support old pickled models
            if getattr(self, 'y_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.y_labels)
            elif getattr(self, 'max_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.max_labels)
            else:
                y_space = VectorSpace(dim=dim)
            y_source = 'targets'

            Latent_space = VectorSpace(dim=self.latent.shape[-1])
            Latent_source = 'latents'

            space = CompositeSpace((X_space, y_space,Latent_space))
            source = (X_source, y_source,Latent_source)

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)

    def get_targets(self):
        """
        .. todo::

            WRITEME
        """
        return self.y
    def get_latents(self):
        """
        .. todo::

            WRITEME
        """
        return self.latent

    def get_batch_design(self, batch_size, include_labels=False):

        try:
            idx = self.rng.randint(self.X.shape[0] - batch_size + 1)
        except ValueError:
            if batch_size > self.X.shape[0]:
                reraise_as(ValueError("Requested %d examples from a dataset "
                                      "containing only %d." %
                                      (batch_size, self.X.shape[0])))
            raise
        rx = self.X[idx:idx + batch_size, :]
        if include_labels:
            if self.y is None:
                return rx, None
            ry = self.y[idx:idx + batch_size]
            rlatent = self.latent[idx:idx + batch_size]
            return rx, ry,rlatent
        rx = np.cast[config.floatX](rx)
        return rx

    def get_batch_topo(self, batch_size, include_labels=False):
        """
        .. todo::

            WRITEME

        Parameters
        ----------
        batch_size : int
            WRITEME
        include_labels : bool
            WRITEME
        """

        if include_labels:
            batch_design, labels, latents= self.get_batch_design(batch_size, True)
        else:
            batch_design = self.get_batch_design(batch_size)

        rval = self.view_converter.design_mat_to_topo_view(batch_design)

        if include_labels:
            return rval, labels, latents

        return rval
Ejemplo n.º 17
0
    def _execute(self):

        batch_size = self.batch_size
        pooling_region_counts = self.pooling_region_counts
        dataset_family = self.dataset_family
        which_set = self.which_set
        size = self.size

        nan = 0


        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1]  <= full_X.shape[0]

            print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive'
            full_X = full_X[self.restrict[0]:self.restrict[1],:]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) )

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier


        print 'defining features'
        V = T.matrix('V')


        Z = T.dot(V, self.W)

        alpha = self.alpha
        if self.one_sided:
            feat = T.clip(abs(Z),alpha,1e30)-alpha
        else:
            pos = T.clip(Z,alpha,1e30) - alpha
            neg = T.clip(-Z,alpha,1e30) - alpha

            feat = T.concatenate((pos, neg), axis=1)

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([V],feat)


        nfeat = self.W.get_value().shape[1] * (2-self.one_sided)

        if config.device.startswith('gpu') and nfeat >= 4000:
            f = halver(f, nfeat)

        topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')()
        region_features = function([topo_feat_var],
                topo_feat_var.mean(axis=(1,2)) )

        def average_pool( stride ):
            def point( p ):
                return p * ns / stride

            rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] )

            return rval

        outputs = [ np.zeros((num_examples,count,count,nfeat),dtype='float32') for count in pooling_region_counts ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nfeat] ) )

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches( orig_shape  = (ns, ns), patch_shape=(1,1) )

        if len(range(0,num_examples-batch_size+1,batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0,num_examples-batch_size+1,batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i+batch_size,:])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit = False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i+batch_size,...] = average_pool(count)

            t6 = time.time()

            print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)

        for output, save_path in zip(outputs, self.save_paths):
            if self.chunk_size is not None:
                assert save_path.endswith('.npy')
                save_path_pieces = save_path.split('.npy')
                assert len(save_path_pieces) == 2
                assert save_path_pieces[1] == ''
                save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy'
            np.save(save_path,output)


        if nan > 0:
            warnings.warn(str(nan)+' features were nan')
Ejemplo n.º 18
0
def make_viewer(mat,
                grid_shape=None,
                patch_shape=None,
                activation=None,
                pad=None,
                is_color=False,
                rescale=True):
    """
    Given filters in rows, guesses dimensions of patches
    and nice dimensions for the PatchViewer and returns a PatchViewer
    containing visualizations of the filters.

    Parameters
    ----------
    mat : ndarray
        Values should lie in [-1, 1] if `rescale` is False.
        0. always indicates medium gray, with negative values drawn as
        blacker and positive values drawn as whiter.
        A matrix with each row being a different image patch, OR
        a 4D tensor in ('b', 0, 1, 'c') format.
        If matrix, we assume it was flattened using the same procedure as a
        ('b', 0, 1, 'c') DefaultViewConverter uses.
    grid_shape : tuple, optional
        A tuple of two ints specifying the shape of the grad in the
        PatchViewer, in (rows, cols) format. If not specified, this
        function does its best to choose an aesthetically pleasing
        value.
    patch_shape : tupe, optional
        A tuple of two ints specifying the shape of the patch.
        If `mat` is 4D, this function gets the patch shape from the shape of
        `mat`. If `mat` is 2D and patch_shape is not specified, this function
        assumes the patches are perfectly square.
    activation : iterable
        An iterable collection describing some kind of activation value
        associated with each patch. This is indicated with a border around the
        patch whose color intensity increases with activation value.
        The individual activation values may be single floats to draw one
        border or iterable collections of floats to draw multiple borders with
        differing intensities around the patch.
    pad : int, optional
        The amount of padding to add between patches in the displayed image.
    is_color : int
        If True, assume the images are in color.
        Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just
        look at its shape[-1].
    rescale : bool
        If True, rescale each patch so that its highest magnitude pixel
        reaches a value of either 0 or 1 depending on the sign of that pixel.

    Returns
    -------
    patch_viewer : PatchViewer
        A PatchViewer containing the patches stored in `mat`.
    """

    num_channels = 1
    if is_color:
        num_channels = 3

    if grid_shape is None:
        grid_shape = PatchViewer.pick_shape(mat.shape[0])
    if mat.ndim > 2:
        patch_shape = mat.shape[1:3]
        topo_view = mat
        num_channels = mat.shape[3]
        is_color = num_channels > 1
    else:
        if patch_shape is None:
            assert mat.shape[1] % num_channels == 0
            patch_shape = PatchViewer.pick_shape(mat.shape[1] // num_channels,
                                                 exact=True)
            assert mat.shape[1] == (patch_shape[0] * patch_shape[1] *
                                    num_channels)
        topo_shape = (patch_shape[0], patch_shape[1], num_channels)
        view_converter = DefaultViewConverter(topo_shape)
        topo_view = view_converter.design_mat_to_topo_view(mat)
    rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color)
    for i in xrange(mat.shape[0]):
        if activation is not None:
            if hasattr(activation[0], '__iter__'):
                act = [a[i] for a in activation]
            else:
                act = activation[i]
        else:
            act = None

        patch = topo_view[i, :]

        rval.add_patch(patch, rescale=rescale, activation=act)
    return rval
Ejemplo n.º 19
0
def test_init_with_vc():
    rng = np.random.RandomState([4, 5, 6])
    d = DenseDesignMatrix(X=rng.randn(12, 5),
                          view_converter=DefaultViewConverter([1, 2, 3]))
Ejemplo n.º 20
0
    def __init__(self,
                 which_set,
                 base_path='${PYLEARN2_DATA_PATH}/icml_2013_emotions',
                 start=None,
                 stop=None,
                 preprocessor=None,
                 fit_preprocessor=False,
                 axes=('b', 0, 1, 'c'),
                 fit_test_preprocessor=False):
        """
        which_set: A string specifying which portion of the dataset
            to load. Valid values are 'train' or 'public_test'
        base_path: The directory containing the .csv files from kaggle.com.
                This directory should be writable; if the .csv files haven't
                already been converted to npy, this class will convert them
                to save memory the next time they are loaded.
        fit_preprocessor: True if the preprocessor is allowed to fit the
                   data.
        fit_test_preprocessor: If we construct a test set based on this
                    dataset, should it be allowed to fit the test set?
        """

        self.test_args = locals()
        self.test_args['which_set'] = 'public_test'
        self.test_args['fit_preprocessor'] = fit_test_preprocessor
        del self.test_args['start']
        del self.test_args['stop']
        del self.test_args['self']

        files = {'train': 'train.csv', 'public_test': 'test.csv'}

        try:
            filename = files[which_set]
        except KeyError:
            raise ValueError("Unrecognized dataset name: " + which_set)

        path = base_path + '/' + filename

        path = preprocess(path)

        X, y = self._load_data(path, which_set == 'train')

        if start is not None:
            assert which_set != 'test'
            assert isinstance(start, int)
            assert isinstance(stop, int)
            assert start >= 0
            assert start < stop
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            if y is not None:
                y = y[start:stop, :]

        view_converter = DefaultViewConverter(shape=[48, 48, 1], axes=axes)

        super(EmotionsDataset, self).__init__(X=X,
                                              y=y,
                                              y_labels=7,
                                              view_converter=view_converter)

        if preprocessor:
            preprocessor.apply(self, can_fit=fit_preprocessor)
Ejemplo n.º 21
0
# check for global scaling
if not opts.local:
    samples = samples / numpy.abs(samples).max()

##############
# PLOT FILTERS
##############

import pdb; pdb.set_trace()
viewer = PatchViewer(get_dims(model.batch_size),
                     (opts.height, opts.width),
                     is_color = opts.color,
                     pad=(2,2))

topo_shape = [opts.height, opts.width, opts.chans]
view_converter = DefaultViewConverter(topo_shape)
topo_view = view_converter.design_mat_to_topo_view(samples)

for chan_i in xrange(nplots):

    topo_chan = topo_view if opts.color else topo_view[..., chan_i:chan_i+1]

    for bi in xrange(model.batch_size):
        viewer.add_patch(topo_chan[bi])

    #pl.subplot(1, nplots, chan_i+1)
    #pl.imshow(viewer.image, interpolation=None)
    #pl.axis('off'); pl.title('samples (channel %i)' % chan_i)
    viewer.show()

Ejemplo n.º 22
0
    quit(-1)


models = []
try:
    for model in serial.load(model_path):
        models.append(model)
except Exception as e:
    usage()
    print(model_path + "doesn't seem to be a valid model path, I got this error when trying to load it: ")
    print(e)

# load the test set
with open('preprocessed_test_for_pylearn2.pkl') as f:
    dataset = pkl.load(f)
dataset = DenseDesignMatrix(X=dataset, view_converter=DefaultViewConverter(shape=[32, 32, 1], axes=['b', 0, 1, 'c']))

print(models)
predictions = []
print(len(models))

for model in models:

    print(model)

    model.set_batch_size(dataset.X.shape[0])

    X = model.get_input_space().make_batch_theano()
    Y = model.fprop(X) # forward prop the test data

    y = T.argmax(Y, axis=1)
Ejemplo n.º 23
0
class TemporalDenseDesignMatrix(DenseDesignMatrix):
    '''
    A class for representing datasets that can be stored as a dense design
    matrix, but whose examples are slices of width >= 2 rows each.
    '''

    _default_seed = (17, 2, 946)

    def __init__(self, X=None, topo_view=None, y=None,
                 view_converter=None, axes = ('b', 0, 1, 2, 'c'),
                 rng=_default_seed, preprocessor = None, fit_preprocessor=False):
        '''
        TODO: rewrite or just inherit...
        same as DenseDesignMatrix...???
        
        Parameters
        ----------

        X : ndarray, 2-dimensional, optional
            Should be supplied if `topo_view` is not. A design
            matrix of shape (number examples, number features)
            that defines the dataset.
            XXXXXXXXXXX not allowed
        topo_view : ndarray, optional
            Should be supplied if X is not.  An array whose first
            dimension is of length number examples. The remaining
            dimensions are xamples with topological significance,
            e.g. for images the remaining axes are rows, columns,
            and channels.
            TODO: time is 0, ii is 1, jj is 2
        y : ndarray, 1-dimensional(?), optional
            Labels or targets for each example. The semantics here
            are not quite nailed down for this yet.
        view_converter : object, optional
            An object for converting between the design matrix
            stored internally and the data that will be returned
            by iterators.
        rng : object, optional
            A random number generator used for picking random
            indices into the design matrix when choosing minibatches.
        '''

        assert topo_view is not None, (
            'For TemporalDenseDesignMatrix, must provide topo_view (not X)'
        )

        assert axes == ('b', 0, 1, 2, 'c')

        reduced_axes = ('b', 0, 1, 'c')
        
        super(TemporalDenseDesignMatrix, self).__init__(
            X = X,
            topo_view = topo_view,
            y = y,
            view_converter = view_converter,
            axes = reduced_axes,
            rng = rng,
            preprocessor = preprocessor,
            fit_preprocessor = fit_preprocessor
        )

        self._X = self.X
        self.X = None   # prevent other access

    def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')):
        '''
        Sets the dataset to represent topo_view, where topo_view is a batch
        of topological views of examples.

        Parameters
        ----------
        topo_view : ndarray
            An array containing a design matrix representation of training
            examples.
        '''
        
        assert not np.any(np.isnan(topo_view))
        frames = topo_view.shape[axes.index('b')]    # pretend frames come in as batch dim
        rows = topo_view.shape[axes.index(0)]
        cols = topo_view.shape[axes.index(1)]
        channels = topo_view.shape[axes.index('c')]

        # leave out frames...
        self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes)
        
        self.X = self.view_converter.topo_view_to_design_mat(topo_view)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not np.any(np.isnan(self.X))

        # Update data specs
        X_space = VectorSpace(dim = frames * rows * cols * channels)
        X_source = 'features'

        assert self.y is None, 'y not supported now'
        space = X_space
        source = X_source

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)

    @functools.wraps(Dataset.iterator)
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 topo=None, targets=None, rng=None, data_specs=None,
                 return_tuple=False):
        '''thin wrapper... TODO: doc'''

        assert mode == 'shuffled_sequential', (
            'Only shuffled_sequential mode is supported'
        )
        assert data_specs != None, 'Must provide data_specs'
        assert len(data_specs) == 2, 'data_specs must include only one tuple for "features"'
        assert type(data_specs[0]) is CompositeSpace, 'must be composite space...??'
        assert data_specs[0].num_components == 1, 'must only have one component, features'
        assert data_specs[1][0] == 'features', 'data_specs must include only one tuple for "features"'

        output_space = data_specs[0].components[0]
        num_frames = output_space.shape[0]

        if num_batches is None:
            num_batches = 10  # another hack... just determines how often new iterators will be created?
        base_num_batches = num_batches * batch_size




        # Iterates through ONE example at a time
        # BEGIN HUGE HACK  (enable self.X access just for this function)
        self.X = self._X
        base_iterator = super(TemporalDenseDesignMatrix, self).iterator(
            mode='random_slice',  # to return continguous bits
            batch_size=num_frames,
            num_batches=base_num_batches,
            topo=topo,
            targets=targets,
            rng=rng,
            data_specs=data_specs,
            return_tuple=False)
        self.X = None
        # END HUGE HACK
        
        return CopyingConcatenatingIterator(base_iterator, how_many = batch_size)
Ejemplo n.º 24
0
class FaceBBoxDDMPytables(dense_design_matrix.DenseDesignMatrix):
    filters = tables.Filters(complib='blosc', complevel=1)
    h5file = None
    """
    DenseDesignMatrix based on PyTables for face bounding boxes.
    """
    def __init__(self, X=None, h5file=None, topo_view=None, y=None,
                 view_converter=None, axes = ('b', 0, 1, 'c'),
                 image_shape=None, receptive_field_shape=None,
                 bbox_conversion_type=ConversionType.GUID,
                 area_ratio=None,
                 stride=None, use_output_map=True, rng=None):
        """
        Parameters
        ----------

        X : ndarray, 2-dimensional, optional
            Should be supplied if `topo_view` is not. A design
            matrix of shape (number examples, number features)
            that defines the dataset.
        topo_view : ndarray, optional
            Should be supplied if X is not.  An array whose first
            dimension is of length number examples. The remaining
            dimensions are xamples with topological significance,
            e.g. for images the remaining axes are rows, columns,
            and channels.
        y : ndarray, 1-dimensional(?), optional
            Labels or targets for each example. The semantics here
            are not quite nailed down for this yet.
        view_converter : object, optional
            An object for converting between design matrices and
            topological views. Currently DefaultViewConverter is
            the only type available but later we may want to add
            one that uses the retina encoding that the U of T group
            uses.
        image_shape: list
            Shape of the images that we are processing.
        receptive_field_size: list
            Size of the receptive field of the convolutional neural network.
        stride: integer
            The stride that we have used for the convolution operation.
        rng : object, optional
            A random number generator used for picking random
            indices into the design matrix when choosing minibatches.
        """

        if rng is None:
            rng = (17, 2, 946)

        assert image_shape is not None
        assert receptive_field_shape is not None
        assert stride is not None

        self.image_shape = image_shape
        self.receptive_field_shape = receptive_field_shape
        self.stride = stride
        self.use_output_map = use_output_map
        self.bbox_conversion_type = bbox_conversion_type
        self.h5file = h5file
        self.area_ratio = area_ratio
        self._deprecated_interface = True
        FaceBBoxDDMPytables.filters = tables.Filters(complib='blosc', complevel=1)


        super(FaceBBoxDDMPytables, self).__init__(X = X,
                                            topo_view = topo_view,
                                            y = y,
                                            view_converter = view_converter,
                                            axes = axes,
                                            rng = rng)

    def set_design_matrix(self, X, start = 0):
        """
        Parameters
        ----------
        X: Images
        """
        assert (len(X.shape) == 2)
        assert self.h5file is not None
        assert not numpy.any(numpy.isnan(X))

        if self.h5file.isopen and (self.h5file.mode == "w" or self.h5file.mode == "r+"):
            self.fill_hdf5(h5file=self.h5file,
                data_x=X,
                start=start)
        else:
            raise ValueError("H5File is not open or not in the writable mode!")

    def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of training
            examples. If unspecified, the entire dataset (`self.X`) is used
            instead.
        TODO: why is this parameter named 'V'?
        """
        assert not numpy.any(numpy.isnan(V))
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes)
        X = self.view_converter.topo_view_to_design_mat(V)
        assert not numpy.any(numpy.isnan(X))

        FaceBBoxDDMPytables.fill_hdf5(h5file = self.h5file,
                                            data_x = X,
                                            start = start)

    @functools.wraps(Dataset.iterator)
    def iterator(self, mode=None, batch_size=None, num_batches=None,
                 topo=None, targets=None, rng=None, data_specs=None,
                 return_tuple=False):

        # build data_specs from topo and targets if needed
        if topo is None:
            topo = getattr(self, '_iter_topo', False)

        if data_specs[0] is not None:
            if isinstance(data_specs[0], Conv2DSpace) or isinstance(data_specs[0].components[0],
                    Conv2DSpace):
                topo = True

        if topo:
            # self.iterator is called without a data_specs, and with
            # "topo=True", so we use the default topological space
            # stored in self.X_topo_space
            assert self.X_topo_space is not None
            X_space = self.X_topo_space
        else:
            X_space = self.X_space

        if targets is None:
            if "targets" in data_specs[1]:
                targets = True
            else:
                targets = False

        if data_specs is None:
            if targets:
                assert self.y is not None
                y_space = data_specs[0].components[1]
                space = CompositeSpace(components=(X_space, y_space))
                source = ('features', 'targets')
            else:
                space = X_space
                source = 'features'

            print space
            data_specs = (space, source)

        # TODO: Refactor
        if mode is None:
            if hasattr(self, '_iter_subset_class'):
                mode = self._iter_subset_class
            else:
                raise ValueError('iteration mode not provided and no default '
                                 'mode set for %s' % str(self))
        else:
            mode = resolve_iterator_class(mode)

        if batch_size is None:
            batch_size = getattr(self, '_iter_batch_size', None)

        if num_batches is None:
            num_batches = getattr(self, '_iter_num_batches', None)

        if rng is None and mode.stochastic:
            rng = self.rng

        if data_specs is None:
            data_specs = self._iter_data_specs

        return FaceBBoxDDMIterator(self,
                                    mode(self.X.shape[0], batch_size, num_batches, rng),
                                    img_shape=self.image_shape,
                                    receptive_field_shape=self.receptive_field_shape,
                                    stride=self.stride,
                                    bbox_conversion_type=self.bbox_conversion_type,
                                    topo=topo,
                                    targets=targets,
                                    area_ratio=self.area_ratio,
                                    use_output_map=self.use_output_map,
                                    data_specs=data_specs,
                                    return_tuple=return_tuple)

    @staticmethod
    def init_hdf5(path=None, shapes=None):
        """
        Initialize hdf5 file to be used as a dataset
        """
        assert shapes is not None

        x_shape, y_shape = shapes
        print "init_hdf5"

        # make pytables
        if path is None:
            if FaceBBoxDDMPytables.h5file is None:
                raise ValueError("path variable should not be empty.")
            else:
                h5file = FaceBBoxDDMPytables.h5file
        else:
                h5file = tables.openFile(path, mode = "w", title = "Google Face bounding boxes Dataset.")

        gcolumns = h5file.createGroup(h5file.root, "Data", "Data")
        atom = tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom()

        filters = FaceBBoxDDMPytables.filters

        h5file.createCArray(gcolumns, 'X', atom = atom, shape = x_shape,
                title = "Images", filters = filters)

        h5file.createTable(gcolumns, 'bboxes', BoundingBox,
                title = "Face bounding boxes", filters = filters)

        return h5file, gcolumns

    @staticmethod
    def fill_hdf5(h5file, data_x, data_y = None, node = None, start = 0, batch_size = 5000):
        """
        PyTables tends to crash if you write large data on them at once.
        This function write data on file in batches

        start: the start index to write data
        """

        if node is None:
            node = h5file.root.Data
        if FaceBBoxDDMPytables.h5file is None:
            FaceBBoxDDMPytables.h5file = h5file

        data_size = data_x.shape[0]
        last = numpy.floor(data_size / float(batch_size)) * batch_size
        for i in xrange(0, data_size, batch_size):
            stop = i + numpy.mod(data_size, batch_size) if i >= last else i + batch_size
            assert len(range(start + i, start + stop)) == len(range(i, stop))
            assert (start + stop) <= (node.X.shape[0])

            node.X[start + i: start + stop, :] = data_x[i:stop, :]

            if data_y is not None:
                node.y[start + i: start + stop, :] = data_y[i:stop, :]

            h5file.flush()

    @staticmethod
    def resize(h5file, start, stop, remove_old_node=False):
        if h5file is None:
            raise ValueError("h5file should not be None.")

        data = h5file.root.Data
        node_name = "Data_%s_%s" % (start, stop)
        if remove_old_node:
            try:
                gcolumns = h5file.createGroup('/', node_name, "Data %s" %   node_name)
            except tables.exceptions.NodeError:
                h5file.removeNode('/', node_name, 1)
                gcolumns = h5file.createGroup('/', node_name, "Data %s" % node_name)
        elif node_name in h5file.root:
            return h5file, getattr(h5file.root, node_name)
        else:
            gcolumns = h5file.createGroup('/', node_name, "Data %s" %   node_name)

        if FaceBBoxDDMPytables.h5file is None:
            FaceBBoxDDMPytables.h5file = h5file

        start = 0 if start is None else start
        stop = gcolumns.X.nrows if stop is None else stop

        atom = tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom()
        filters = FaceBBoxDDMPytables.filters

        x = h5file.createCArray(gcolumns, 'X', atom = atom, shape = ((stop - start, data.X.shape[1])),
                title = "Images", filters = filters)

        y = h5file.createTable(gcolumns, 'bboxes', BoundingBox,
                title = "Face bounding boxes", filters = filters)

        x[:] = data.X[start:stop]
        bboxes = get_image_bboxes(slice(start, stop), data.bboxes)
        y.append(bboxes)

        if remove_old_node:
            h5file.removeNode('/', "Data", 1)
            h5file.renameNode('/', "Data", node_name)

        h5file.flush()
        return h5file, gcolumns
Ejemplo n.º 25
0
def analyze(config):
    output_path = config.get('output_path');
#     model_file = os.path.join(output_path, 'eeg', 'conv3', 'convolutional_network.pkl');
#     model_file = os.path.join(output_path, 'eeg', 'conv10', 'epochs', 'cnn_epoch94.pkl');
    model_file = '../../../debug/debug_run4/debug_network.pkl';
    with log_timing(log, 'loading convnet model from {}'.format(model_file)):
        model = serial.load(model_file);
        
    input_shape =  model.get_input_space().shape;
        
    config = config.eeg;
    hyper_params = {
                'input_length':input_shape[0], #25+151-1+301-1, # this should leave a single value per channel after convolution
                'hop_size':5,               # reduce amount of data by factor 5
                
                'dataset_root': config.get('dataset_root'),
                'dataset_suffix': config.get('dataset_suffix'),
                'save_path': config.get('save_path'),
        }
        
    dataset_yaml = '''
    !obj:deepthought.datasets.rwanda2013rhythms.EEGDataset.EEGDataset {
                                 name : 'testset',
                                 path : %(dataset_root)s, 
                                 suffix : '_channels', # %(dataset_suffix)s,
                                 subjects : [0],
                                 resample : [400, 100],
                                 start_sample : 2500,
                                 stop_sample  : 3200,     # None (empty) = end of sequence
                  # FIXME:                
#                                  n_fft : 24,
#                                  frame_size : 10, # %(input_length)i,                                
                                 frame_size : %(input_length)i,
                                 
                                 hop_size : %(hop_size)i,           
                                 label_mode : 'rhythm_type',
#                                  save_matrix_path: '../../../debug/debug.pkl'
                            }
'''
    dataset_yaml = dataset_yaml  % hyper_params;
    print dataset_yaml;

    with log_timing(log, 'parsing yaml'):    
        testset = yaml_parse.load(dataset_yaml);
        
#     print testset.subject_partitions;
#     print testset.sequence_partitions;
    
    seq_starts = testset.sequence_partitions;
#     return;
    
#     axes=['b', 0, 1, 'c']
#     def dimshuffle(b01c):
#         default = ('b', 0, 1, 'c')
#         return b01c.transpose(*[default.index(axis) for axis in axes])
#     data = dimshuffle(testset.X);
    
#     design_matrix = model.get_design_matrix()

#     view_converter = DefaultViewConverter([475, 1, 1]);
#     data = view_converter.


#     ## get the labels
#     data_specs= (model.get_output_space(), "targets");
#     it = testset.iterator(
#                            mode='sequential', 
#                            batch_size=100,
#                            data_specs=data_specs);
#     labels = np.hstack([np.argmax(minibatch, axis = 1) for minibatch in it])
#     print labels[0:1000]
# 
#     ## get the predictions
#     minibatch = model.get_input_space().make_theano_batch();
#     output_fn = theano.function(inputs=[minibatch], 
#                                 outputs=T.argmax(model.fprop(minibatch), axis = 1));
#     print "function compiled"
# #     data_specs= (CompositeSpace((
# #                                 model.get_input_space(), 
# #                                 model.get_output_space())), 
# #                 ("features", "targets"));
#                 
#     data_specs= (model.get_input_space(), "features");    
#     it = testset.iterator(
#                             mode='sequential', 
#                             batch_size=100,
#                             data_specs=data_specs);
#     print "iterator ready"
#         
#     y_pred = np.hstack([output_fn(minibatch) for minibatch in it])
#     
#     print y_pred[0:1000]
    
    
    minibatch = model.get_input_space().make_theano_batch();
    output_fn = theano.function(inputs=[minibatch], 
                                outputs=T.argmax(model.fprop(minibatch), axis = 1));
    print "function compiled"
    
    data_specs= (CompositeSpace((
                                model.get_input_space(), 
                                model.get_output_space())), 
                ("features", "targets"));
    it = testset.iterator('sequential',
                          batch_size=100,
                          data_specs=data_specs);
    print "iterator ready"
                    
    y_pred = [];
    y_real = [];                
    for minibatch, target in it:
        y_pred.append(output_fn(minibatch));
        y_real.append(np.argmax(target, axis = 1));
    y_pred = np.hstack(y_pred);
    y_real = np.hstack(y_real);   
    
    print y_pred[0:1000]
    
    print classification_report(y_real, y_pred);
    print confusion_matrix(y_real, y_pred);

    misclass = (y_real != y_pred);
    print misclass.mean();
    
    correct = 0;
    s_real = [];
    s_pred = [];
    s_pred_agg = [];
    
    n_channels = 16;
    channel_scores = np.zeros(n_channels, dtype=np.int);
    
    for i in xrange(len(seq_starts)):
        
        start = seq_starts[i];
        if i < len(seq_starts) - 1:
            stop = seq_starts[i+1];
        else:
            stop = None;
        
        s_real.append(y_real[start]);
        
#         print np.bincount(y_pred[start:stop]);
#         print np.argmax(np.bincount(y_pred[start:stop]));

        s_pred.append(np.argmax(np.bincount(y_pred[start:stop])));
        
        s_pred_agg.append(np.mean(y_pred[start:stop])); # works only for binary classification
        
        seq_misclass = misclass[start:stop].mean();
#         print '{} [{}{}]: {}'.format(i, start, stop, seq_misclass);
        
        if seq_misclass < 0.5: # more correct than incorrect
            correct += 1;
            channel_scores[i%n_channels] += 1;
    
    s_real = np.hstack(s_real);
    s_pred = np.hstack(s_pred);  
    
    print s_real;
    print s_pred;       
    print s_pred_agg;
    
    print 'aggregated'
    print classification_report(s_real, s_pred);
    print confusion_matrix(s_real, s_pred);
    
    s_misclass = (s_real != s_pred);
    print s_misclass.mean();
    
    print channel_scores;
    
    return;
    
    
    
    
    
    
    
    

    input_shape =  model.get_input_space().shape;
    
    print input_shape
    
    view_converter = DefaultViewConverter((input_shape[0], input_shape[1], 1));
    
    data = view_converter.design_mat_to_topo_view(testset.X);
    print data.shape;
                
    X = model.get_input_space().make_theano_batch()
    Y = model.fprop( X )
    Y = T.argmax( Y, axis = 1 ) # needed - otherwise not single value
    output_fn = theano.function( [X], Y );
    


    
#     y_pred = output_fn( data );

    batch_size = 1000;
    y_pred = [];
    batch_start = 0;
    while batch_start < data.shape[0]:
        batch_stop = min(data.shape[0], batch_start + batch_size);
        y_pred.append(output_fn( data[batch_start:batch_stop] ));
#         if batch_start == 0: print y_pred;
        batch_start = batch_stop;
    y_pred = np.hstack(y_pred);

    print testset.labels[0:1000]
    print y_pred[0:1000]

    print classification_report(testset.labels, y_pred);
    print confusion_matrix(testset.labels, y_pred);

    labels = np.argmax(testset.y, axis=1)
    print classification_report(labels, y_pred);
    print confusion_matrix(labels, y_pred);
    
    labels = np.argmax(testset.y, axis=1)
    print classification_report(labels, y_pred);
    print confusion_matrix(labels, y_pred);

    misclass = (labels != y_pred).mean()
    print misclass
    
#     # alternative version from KeepBestParams
#     minibatch = T.matrix('minibatch')
#     output_fn = theano.function(inputs=[minibatch],outputs=T.argmax( model.fprop(minibatch), axis = 1 ));
#     it = testset.iterator('sequential', batch_size=batch_size, targets=False);
#     y_pred = [output_fn(mbatch) for mbatch in it];

#             y_hat = T.argmax(state, axis=1)
#             y = T.argmax(target, axis=1)
#             misclass = T.neq(y, y_hat).mean()
#             misclass = T.cast(misclass, config.floatX)
#             rval['misclass'] = misclass
#             rval['nll'] = self.cost(Y_hat=state, Y=target)
        
    

    log.debug('done');
Ejemplo n.º 26
0
def make_viewer(mat, grid_shape=None, patch_shape=None,
                activation=None, pad=None, is_color = False, rescale = True):
    """
    Given filters in rows, guesses dimensions of patches
    and nice dimensions for the PatchViewer and returns a PatchViewer
    containing visualizations of the filters.

    Parameters
    ----------
    mat : ndarray
        Values should lie in [-1, 1] if `rescale` is False.
        0. always indicates medium gray, with negative values drawn as
        blacker and positive values drawn as whiter.
        A matrix with each row being a different image patch, OR
        a 4D tensor in ('b', 0, 1, 'c') format.
        If matrix, we assume it was flattened using the same procedure as a
        ('b', 0, 1, 'c') DefaultViewConverter uses.
    grid_shape : tuple, optional
        A tuple of two ints specifying the shape of the grad in the
        PatchViewer, in (rows, cols) format. If not specified, this
        function does its best to choose an aesthetically pleasing
        value.
    patch_shape : tupe, optional
        A tuple of two ints specifying the shape of the patch.
        If `mat` is 4D, this function gets the patch shape from the shape of
        `mat`. If `mat` is 2D and patch_shape is not specified, this function
        assumes the patches are perfectly square.
    activation : iterable
        An iterable collection describing some kind of activation value
        associated with each patch. This is indicated with a border around the
        patch whose color intensity increases with activation value.
        The individual activation values may be single floats to draw one
        border or iterable collections of floats to draw multiple borders with
        differing intensities around the patch.
    pad : int, optional
        The amount of padding to add between patches in the displayed image.
    is_color : int
        If True, assume the images are in color.
        Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just
        look at its shape[-1].
    rescale : bool
        If True, rescale each patch so that its highest magnitude pixel
        reaches a value of either 0 or 1 depending on the sign of that pixel.

    Returns
    -------
    patch_viewer : PatchViewer
        A PatchViewer containing the patches stored in `mat`.
    """

    num_channels = 1
    if is_color:
        num_channels = 3

    if grid_shape is None:
        grid_shape = PatchViewer.pick_shape(mat.shape[0] )
    if mat.ndim > 2:
        patch_shape = mat.shape[1:3]
        topo_view = mat
        num_channels = mat.shape[3]
        is_color = num_channels > 1
    else:
        if patch_shape is None:
            assert mat.shape[1] % num_channels == 0
            patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels,
                                                 exact = True)
            assert mat.shape[1] == (patch_shape[0] *
                                    patch_shape[1] *
                                    num_channels)
        topo_shape = (patch_shape[0], patch_shape[1], num_channels)
        view_converter = DefaultViewConverter(topo_shape)
        topo_view = view_converter.design_mat_to_topo_view(mat)
    rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color = is_color)
    for i in xrange(mat.shape[0]):
        if activation is not None:
            if hasattr(activation[0], '__iter__'):
                act = [a[i] for a in activation]
            else:
                act = activation[i]
        else:
            act = None

        patch = topo_view[i, :]

        rval.add_patch(patch, rescale=rescale,
                       activation=act)
    return rval
Ejemplo n.º 27
0
    def _execute(self):

        global num_superpixels
        num_output_features = self.num_output_features
        idxs = self.idxs
        top = self.top
        bottom = self.bottom
        left = self.left
        right = self.right

        save_path = self.save_path
        batch_size = self.batch_size
        dataset_family = self.dataset_family
        which_set = self.which_set
        model = self.model
        size = self.size

        nan = 0

        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1] <= full_X.shape[0]

            print('restricting to examples ', self.restrict[0], ' through ',
                  self.restrict[1], ' exclusive')
            full_X = full_X[self.restrict[0]:self.restrict[1], :]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches(patch_shape=(size, size),
                                        patch_stride=(1, 1))

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier

        print('defining features')
        V = T.matrix('V')

        mu = model.mu

        feat = triangle_code(V, mu)

        assert feat.dtype == 'float32'
        print('compiling theano function')
        f = function([V], feat)

        nhid = model.mu.get_value().shape[0]

        if config.device.startswith('gpu') and nhid >= 4000:
            f = halver(f, model.nhid)

        topo_feat_var = T.TensorType(broadcastable=(False, False, False,
                                                    False),
                                     dtype='float32')()
        if self.pool_mode == 'mean':
            region_features = function([topo_feat_var],
                                       topo_feat_var.mean(axis=(1, 2)))
        elif self.pool_mode == 'max':
            region_features = function([topo_feat_var],
                                       topo_feat_var.max(axis=(1, 2)))
        else:
            assert False

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        output = np.zeros((num_examples, num_output_features), dtype='float32')

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, nhid]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0:
            print(num_examples)
            print(batch_size)

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print(i)
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i + batch_size, :])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if contains_nan(feat):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            superpixels = average_pool(num_superpixels)

            assert batch_size == 1

            if self.pool_mode == 'mean':
                for j in xrange(num_output_features):
                    output[i:i + batch_size,
                           j] = superpixels[:, top[j]:bottom[j] + 1,
                                            left[j]:right[j] + 1,
                                            idxs[j]].mean()
            elif self.pool_mode == 'max':
                for j in xrange(num_output_features):
                    output[i:i + batch_size,
                           j] = superpixels[:, top[j]:bottom[j] + 1,
                                            left[j]:right[j] + 1,
                                            idxs[j]].max()
            else:
                assert False

            assert output[i:i + batch_size, :].max() < 1e20

            t6 = time.time()

            print((t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5))

        if self.chunk_size is not None:
            assert save_path.endswith('.npy')
            save_path_pieces = save_path.split('.npy')
            assert len(save_path_pieces) == 2
            assert save_path_pieces[1] == ''
            save_path = save_path_pieces[0] + '_' + chr(
                ord('A') + self.chunk_id) + '.npy'
        np.save(save_path, output)

        if nan > 0:
            warnings.warn(str(nan) + ' features were nan')
Ejemplo n.º 28
0
    def _execute(self):

        global num_superpixels
        global num_output_features
        global idxs
        global top
        global bottom
        global left
        global right

        save_path = self.save_path
        batch_size = self.batch_size
        dataset_family = self.dataset_family
        which_set = self.which_set
        size = self.size


        nan = 0


        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1]  <= full_X.shape[0]

            print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive'
            full_X = full_X[self.restrict[0]:self.restrict[1],:]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) )

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier


        Z = T.matrix('Z')

        pos = T.clip(Z,0.,1e30)
        neg = T.clip(-Z,0.,1e30)

        feat = T.concatenate((pos, neg), axis=1)

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([Z],feat)

        nhid = 3200 # 2 * num dictionary elems

        if config.device.startswith('gpu') and nhid >= 4000:
            f = halver(f, nhid)

        topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')()
        region_features = function([topo_feat_var],
                topo_feat_var.mean(axis=(1,2)) )

        def average_pool( stride ):
            def point( p ):
                return p * ns / stride

            rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] )

            return rval

        output =  np.zeros((num_examples,num_output_features),dtype='float32')


        fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) )

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches( orig_shape  = (ns, ns), patch_shape=(1,1) )

        if len(range(0,num_examples-batch_size+1,batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0,num_examples-batch_size+1,batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i+batch_size,:])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit = False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'

            M.put(s,'batch',X2)

            M.eval(s, 'Z = sparse_codes(batch, dictionary, lambda)')
            Z = M.get(s, 'Z')

            feat = f(np.cast['float32'](Z))

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            superpixels = average_pool(num_superpixels)

            assert batch_size == 1

            assert superpixels.shape[0] == batch_size
            assert superpixels.shape[1] == num_superpixels
            assert superpixels.shape[2] == num_superpixels
            assert superpixels.shape[3] == 2 * num_filters

            for j in xrange(num_output_features):
                output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1,
                        left[j]:right[j]+1, idxs[j]].mean()

            t6 = time.time()

            print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)

        if self.chunk_size is not None:
            assert save_path.endswith('.npy')
            save_path_pieces = save_path.split('.npy')
            assert len(save_path_pieces) == 2
            assert save_path_pieces[1] == ''
            save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy'
        np.save(save_path,output)


        if nan > 0:
            warnings.warn(str(nan)+' features were nan')
Ejemplo n.º 29
0
    def __init__(self, patient_id, which_set, preprocessor_path, data_dir,
                 leave_one_out_seizure, sample_size_second, batch_size,
                 default_seed=0):
        """
        The Epilepsiae dataset customized for leave-one-seizure-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        leave_one_out_seizure : int
            Index of the withheld seizure.
        sample_size_second : int
            Number of seconds used to specify sample size.
        batch_size : int
            Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        # Load data
        files = ['rec_26402102/26402102_0003.mat',
                 'rec_26402102/26402102_0007.mat',
                 'rec_26402102/26402102_0008.mat',
                 'rec_26402102/26402102_0017.mat']
        scalp_channels = np.asarray([   u'FP1',
                                        u'FP2',
                                        u'F3',
                                        u'F4',
                                        u'C3',
                                        u'C4',
                                        u'P3',
                                        u'P4',
                                        u'O1',
                                        u'O2',
                                        u'F7',
                                        u'F8',
                                        u'T3',
                                        u'T4',
                                        u'T5',
                                        u'T6',
                                        u'FZ',
                                        u'CZ',
                                        u'PZ'   ])
        # Get seizure information
        seizure_info = pd.read_table(os.path.join(data_dir, 'RECORDS-WITH-SEIZURES.txt'), sep='\t')
        seizure_info['filename'] = seizure_info['filename'].str.replace('.data', '.mat', case=False)

        self.data_dir = data_dir
        self.files = files
        self.seizure_info = seizure_info
        self.filter_channels = scalp_channels
        self.default_seed = default_seed
        self.leave_one_out_seizure = leave_one_out_seizure
        self.batch_size = batch_size

        X, y, n_channels, sample_size = self.load_data(which_set, sample_size_second, batch_size, preprocessor_path)
        self.n_channels = n_channels
        self.sample_size = sample_size

        view_converter = DefaultViewConverter((1, sample_size, 1))
        view_converter.set_axes(axes=['b', 0, 1, 'c'])

        DenseDesignMatrix.__init__(self, X=X, y=y,
                                   view_converter=view_converter,
                                   axes=['b', 0, 1, 'c'])
Ejemplo n.º 30
0
    def __init__(self,
                 patient_id,
                 which_set,
                 preprocessor_path,
                 data_dir,
                 leave_one_out_seizure,
                 sample_size_second,
                 batch_size,
                 default_seed=0):
        """
        The Epilepsiae dataset customized for leave-one-seizure-out cross validation.

        Parameters
        ----------
        patient_id : int
            Patient ID.
        which_set : string
            Name used to specify which partition of the dataset to be loaded (e.g., 'train', 'valid', or 'test').
            If not specified, all data will be loaded.
        preprocessor_path : string
            File path to store the scaler for pre-processing the EEG data.
        data_dir : string
            Directory that store the source EEG data.
        leave_one_out_seizure : int
            Index of the withheld seizure.
        sample_size_second : int
            Number of seconds used to specify sample size.
        batch_size : int
            Size of the batch, used to remove a few samples to make the the number samples dividable by the batch size.
        default_seed : int, optional
            Seed for random.

        For preprocessing, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/datasets/preprocessing.py

        For customizing dataset, see more in
            https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/emotions_dataset.py

        """

        # Load data
        files = [
            'rec_26402102/26402102_0003.mat', 'rec_26402102/26402102_0007.mat',
            'rec_26402102/26402102_0008.mat', 'rec_26402102/26402102_0017.mat'
        ]
        scalp_channels = np.asarray([
            u'FP1', u'FP2', u'F3', u'F4', u'C3', u'C4', u'P3', u'P4', u'O1',
            u'O2', u'F7', u'F8', u'T3', u'T4', u'T5', u'T6', u'FZ', u'CZ',
            u'PZ'
        ])
        # Get seizure information
        seizure_info = pd.read_table(os.path.join(data_dir,
                                                  'RECORDS-WITH-SEIZURES.txt'),
                                     sep='\t')
        seizure_info['filename'] = seizure_info['filename'].str.replace(
            '.data', '.mat', case=False)

        self.data_dir = data_dir
        self.files = files
        self.seizure_info = seizure_info
        self.filter_channels = scalp_channels
        self.default_seed = default_seed
        self.leave_one_out_seizure = leave_one_out_seizure
        self.batch_size = batch_size

        X, y, n_channels, sample_size = self.load_data(which_set,
                                                       sample_size_second,
                                                       batch_size,
                                                       preprocessor_path)
        self.n_channels = n_channels
        self.sample_size = sample_size

        view_converter = DefaultViewConverter((1, sample_size, 1))
        view_converter.set_axes(axes=['b', 0, 1, 'c'])

        DenseDesignMatrix.__init__(self,
                                   X=X,
                                   y=y,
                                   view_converter=view_converter,
                                   axes=['b', 0, 1, 'c'])