def _execute(self):

        global num_superpixels
        num_output_features = self.num_output_features
        idxs = self.idxs
        top = self.top
        bottom = self.bottom
        left = self.left
        right = self.right

        save_path = self.save_path
        batch_size = self.batch_size
        dataset_family = self.dataset_family
        which_set = self.which_set
        model = self.model
        size = self.size

        nan = 0

        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1] <= full_X.shape[0]

            print('restricting to examples ', self.restrict[0], ' through ',
                  self.restrict[1], ' exclusive')
            full_X = full_X[self.restrict[0]:self.restrict[1], :]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches(patch_shape=(size, size),
                                        patch_stride=(1, 1))

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier

        print('defining features')
        V = T.matrix('V')

        mu = model.mu

        feat = triangle_code(V, mu)

        assert feat.dtype == 'float32'
        print('compiling theano function')
        f = function([V], feat)

        nhid = model.mu.get_value().shape[0]

        if config.device.startswith('gpu') and nhid >= 4000:
            f = halver(f, model.nhid)

        topo_feat_var = T.TensorType(broadcastable=(False, False, False,
                                                    False),
                                     dtype='float32')()
        if self.pool_mode == 'mean':
            region_features = function([topo_feat_var],
                                       topo_feat_var.mean(axis=(1, 2)))
        elif self.pool_mode == 'max':
            region_features = function([topo_feat_var],
                                       topo_feat_var.max(axis=(1, 2)))
        else:
            assert False

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        output = np.zeros((num_examples, num_output_features), dtype='float32')

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, nhid]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0:
            print(num_examples)
            print(batch_size)

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print(i)
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i + batch_size, :])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if contains_nan(feat):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            superpixels = average_pool(num_superpixels)

            assert batch_size == 1

            if self.pool_mode == 'mean':
                for j in xrange(num_output_features):
                    output[i:i + batch_size,
                           j] = superpixels[:, top[j]:bottom[j] + 1,
                                            left[j]:right[j] + 1,
                                            idxs[j]].mean()
            elif self.pool_mode == 'max':
                for j in xrange(num_output_features):
                    output[i:i + batch_size,
                           j] = superpixels[:, top[j]:bottom[j] + 1,
                                            left[j]:right[j] + 1,
                                            idxs[j]].max()
            else:
                assert False

            assert output[i:i + batch_size, :].max() < 1e20

            t6 = time.time()

            print((t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5))

        if self.chunk_size is not None:
            assert save_path.endswith('.npy')
            save_path_pieces = save_path.split('.npy')
            assert len(save_path_pieces) == 2
            assert save_path_pieces[1] == ''
            save_path = save_path_pieces[0] + '_' + chr(
                ord('A') + self.chunk_id) + '.npy'
        np.save(save_path, output)

        if nan > 0:
            warnings.warn(str(nan) + ' features were nan')
Exemple #2
0
def train_cnn(lambda_l2,
              dropout1,
              dropout2,
              h1_neurons,
              h2_neurons,
              es_patience,
              batch_size,
              X_train,
              X_train_eyes,
              X_train_headpose,
              y_train,
              X_valid,
              X_valid_eyes,
              X_valid_headpose,
              y_valid,
              use_headpose,
              use_eyes,
              best_weights,
              use_pretrained_model=False):
    #describes network architecture
    dataset = {
        'train': {
            'X': X_train,
            'eyes': X_train_eyes,
            'headpose': X_train_headpose,
            'y': y_train
        },
        'valid': {
            'X': X_valid,
            'eyes': X_valid_eyes,
            'headpose': X_valid_headpose,
            'y': y_valid
        }
    }
    input_shape = dataset['train']['X'][0].shape
    l_in = lasagne.layers.InputLayer(shape=(None, input_shape[0],
                                            input_shape[1], input_shape[2]), )
    l_conv1 = lasagne.layers.Conv2DLayer(
        l_in,
        num_filters=16,
        filter_size=(3, 3),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotNormal(gain='relu'))
    l_pool1 = lasagne.layers.MaxPool2DLayer(l_conv1, pool_size=(2, 2))
    l_conv2 = lasagne.layers.Conv2DLayer(
        l_pool1,
        num_filters=32,
        filter_size=(2, 2),
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotNormal(gain='relu'))
    l_pool2 = lasagne.layers.MaxPool2DLayer(l_conv2, pool_size=(2, 2))
    l_pool2_dropout = lasagne.layers.DropoutLayer(l_pool2, p=dropout1)
    eyes_shape = dataset['train']['eyes'][0].shape
    l_in_eyes = lasagne.layers.InputLayer(shape=(None, eyes_shape[0]))
    headpose_shape = dataset['train']['headpose'][0].shape
    l_in_headpose = lasagne.layers.InputLayer(shape=(None, headpose_shape[0]))
    #concatenates eye and/or headpose information to the net
    if (use_eyes and use_headpose):
        l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer(
            l_pool2_dropout,
            (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[2] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[3]))
        l_concat = lasagne.layers.ConcatLayer(
            [l_pool2_dropout_reshaped, l_in_eyes, l_in_headpose], axis=1)
    elif use_eyes:
        l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer(
            l_pool2_dropout,
            (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[2] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[3]))
        l_concat = lasagne.layers.ConcatLayer(
            [l_pool2_dropout_reshaped, l_in_eyes], axis=1)
    elif use_headpose:
        l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer(
            l_pool2_dropout,
            (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[2] *
             (lasagne.layers.get_output_shape(l_pool2_dropout))[3]))
        l_concat = lasagne.layers.ConcatLayer(
            [l_pool2_dropout_reshaped, l_in_headpose], axis=1)
    else:
        l_concat = l_pool2_dropout
    l_hidden1 = lasagne.layers.DenseLayer(
        l_concat,
        num_units=h1_neurons,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotNormal(gain='relu'))
    l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, dropout2)
    l_hidden2 = lasagne.layers.DenseLayer(
        l_hidden1_dropout,
        num_units=h2_neurons,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.GlorotNormal(gain='relu'))
    #output units are the x and y angles of the gaze
    l_output = lasagne.layers.DenseLayer(
        l_hidden2, num_units=2, nonlinearity=lasagne.nonlinearities.identity)
    #logs structure of the net
    with open("results.txt", "a") as myfile:
        myfile.write("**********" + '\n')
        myfile.write("net structure:\n" + str(
            lasagne.layers.get_output_shape(
                lasagne.layers.get_all_layers(l_output))) + '\n')
        myfile.write("**********" + '\n')
    #print out the shape of the net
    print lasagne.layers.get_output_shape(
        lasagne.layers.get_all_layers(l_output))
    #theano uses symbolic variables to store and process data
    net_output = lasagne.layers.get_output(l_output)
    true_output = (T.TensorType(theano.config.floatX,
                                (False, False)))('true_output')
    loss = T.mean(lasagne.objectives.squared_error(net_output, true_output))
    #computes training and validation loss according as squared error of the net's output and the true output
    loss_train = T.mean(
        lasagne.objectives.squared_error(
            lasagne.layers.get_output(l_output, deterministic=False),
            true_output))
    loss_eval = T.mean(
        lasagne.objectives.squared_error(
            lasagne.layers.get_output(l_output, deterministic=True),
            true_output))
    #adds l2 regularization to the loss function
    loss_regularization = lasagne.regularization.regularize_network_params(
        l_output, lasagne.regularization.l2)
    params = lasagne.layers.get_all_params(l_output, trainable=True)
    loss_train = loss_train + lambda_l2 * loss_regularization
    #adamax updates weights of the network with the help of the loss function
    updates = lasagne.updates.adamax(loss_train, params)
    #warn instaed of giving error in case of unused import, because headpose and eye information can be omitted by design
    train = theano.function([
        l_in.input_var, l_in_eyes.input_var, l_in_headpose.input_var,
        true_output
    ],
                            loss_train,
                            updates=updates,
                            on_unused_input='warn')
    get_output = theano.function(
        [l_in.input_var, l_in_eyes.input_var, l_in_headpose.input_var],
        lasagne.layers.get_output(l_output, deterministic=True),
        on_unused_input='warn')
    BATCH_SIZE = batch_size
    N_EPOCHS = np.inf
    batch_idx = 0
    epoch = 0

    train_mean_errors = []
    train_rmses = []
    valid_mean_errors = []
    valid_rmses = []

    patience = es_patience
    best_valid_rmse = np.inf
    best_valid_mean_error = np.inf
    best_valid_epoch = 0

    #train model with batch gradient descent until early stopping decides to end it
    #print out progress during training
    while epoch < N_EPOCHS:
        train(dataset['train']['X'][batch_idx:batch_idx + BATCH_SIZE],
              dataset['train']['eyes'][batch_idx:batch_idx + BATCH_SIZE],
              dataset['train']['headpose'][batch_idx:batch_idx + BATCH_SIZE],
              dataset['train']['y'][batch_idx:batch_idx + BATCH_SIZE])
        batch_idx += BATCH_SIZE
        if batch_idx >= dataset['train']['X'].shape[0]:
            batch_idx = 0
            epoch += 1

            if use_pretrained_model and epoch == 1:
                lasagne.layers.set_all_param_values(l_output, best_weights)

            val_predictions = get_output(dataset['valid']['X'],
                                         dataset['valid']['eyes'],
                                         dataset['valid']['headpose'])
            train_predictions = get_output(dataset['train']['X'],
                                           dataset['train']['eyes'],
                                           dataset['train']['headpose'])

            train_mean_error = degrees(
                mean_absolute_error(dataset['train']['y'], train_predictions))
            print("Epoch {} training accuracy (mean error in degrees): {}".
                  format(epoch, train_mean_error))
            valid_mean_error = degrees(
                mean_absolute_error(dataset['valid']['y'], val_predictions))
            print("Epoch {} validation accuracy (mean error in degrees): {}".
                  format(epoch, valid_mean_error))
            train_mean_errors.append(train_mean_error)
            valid_mean_errors.append(valid_mean_error)
            train_rmse = degrees(
                sqrt(
                    mean_squared_error(dataset['train']['y'],
                                       train_predictions)))
            print("Epoch {} training accuracy (RMSE in degrees): {}".format(
                epoch, train_rmse))
            valid_rmse = degrees(
                sqrt(mean_squared_error(dataset['valid']['y'],
                                        val_predictions)))
            if valid_rmse < best_valid_rmse:
                print("Epoch {} validation accuracy (RMSE in degrees): {}".
                      format(epoch, colored(valid_rmse, 'green')))
            else:
                print("Epoch {} validation accuracy (RMSE in degrees): {}".
                      format(epoch, colored(valid_rmse, 'red')))
            train_rmses.append(train_rmse)
            valid_rmses.append(valid_rmse)

            if valid_rmse < best_valid_rmse:
                best_valid_rmse = valid_rmse
                best_valid_mean_error = valid_mean_error
                best_valid_epoch = epoch
                best_weights = lasagne.layers.get_all_param_values(l_output)
                best_val_predictions = val_predictions
            elif best_valid_epoch + patience <= epoch:
                print(colored("Early stopping.", 'blue'))
                print(
                    colored(
                        "Best valid rmse was " + str(best_valid_rmse) +
                        " at epoch " + str(best_valid_epoch), 'blue'))
                print(
                    colored(
                        "Best valid mean error was " +
                        str(best_valid_mean_error) + " at epoch " +
                        str(best_valid_epoch), 'blue'))
                lasagne.layers.set_all_param_values(l_output, best_weights)
                break

                train_losses.append(train_rmse)
                valid_losses.append(valid_rmse)
                best_valid_loss = best_valid_rmse

    return best_val_predictions, train_mean_errors, valid_mean_errors, train_rmses, valid_rmses, best_valid_rmse, best_valid_mean_error, best_weights
Exemple #3
0
def TensorType(dtype, shape):
    return tt.TensorType(str(dtype), np.atleast_1d(shape) == 1)
Exemple #4
0
def multinomial(random_state,
                size=None,
                n=1,
                pvals=[0.5, 0.5],
                ndim=None,
                dtype='int64'):
    """
    Sample from one or more multinomial distributions defined by
    one-dimensional slices in pvals.

    Parameters
    ----------
    pvals
        A tensor of shape "nmulti+(L,)" describing each multinomial
        distribution.  This tensor must have the property that
        numpy.allclose(pvals.sum(axis=-1), 1) is true.
    size
        A vector of shape information for the output; this can also
        specify the "nmulti" part of pvals' shape.  A -1 in the k'th position
        from the right means to borrow the k'th position from the
        right in nmulti. (See examples below.)
        Default ``None`` means size=nmulti.
    n
        The number of experiments to simulate for each
        multinomial. This can be a scalar, or tensor, it will be
        broadcasted to have shape "nmulti".
    dtype
        The dtype of the return value (which will represent counts)

    Returns
    -------
    tensor
        Tensor of len(size)+1 dimensions, and shape[-1]==L, with
        the specified ``dtype``, with the experiment counts. See
        examples to understand the shape of the return value, which is
        derived from both size and pvals.shape. In return value rval,
        "numpy.allclose(rval.sum(axis=-1), n)" will be true.

    Extended Summary
    ----------------
    For example, to simulate n experiments from each multinomial in a batch of
    size B:

        size=None, pvals.shape=(B,L) --> rval.shape=[B,L]

        rval[i,j] is the count of possibility j in the i'th distribution (row)
        in pvals.

    Using size:

        size=(1,-1), pvals.shape=(A,B,L)
        --> rval.shape=[1,B,L], and requires that A==1.

        rval[k,i,j] is the count of possibility j in the distribution specified
        by pvals[k,i].

    Using size for broadcasting of pvals:

        size=(10, 1, -1), pvals.shape=(A, B, L)
        --> rval.shape=[10,1,B,L], and requires that A==1.

        rval[l,k,i,j] is the count of possibility j in the
        distribution specified by pvals[k,i], in the l'th of 10
        draws.

    """
    n = tensor.as_tensor_variable(n)
    pvals = tensor.as_tensor_variable(pvals)
    # until ellipsis is implemented (argh)
    tmp = pvals.T[0].T
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp)
    bcast = bcast + (pvals.type.broadcastable[-1], )
    op = RandomFunction(multinomial_helper,
                        tensor.TensorType(dtype=dtype, broadcastable=bcast),
                        ndim_added=1)
    return op(random_state, size, n, pvals)
Exemple #5
0
 def make_node(self, x):
     if not isinstance(x.type, GpuArrayType):
         raise TypeError(x)
     return Apply(self, [x],
                  [tensor.TensorType(dtype=x.dtype,
                                     broadcastable=x.broadcastable)()])
Exemple #6
0
    def _execute(self):

        batch_size = self.batch_size
        feature_type = self.feature_type
        pooling_region_counts = self.pooling_region_counts
        dataset_family = self.dataset_family
        which_set = self.which_set
        model = self.model
        size = self.size

        nan = 0

        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        assert full_X.dtype == 'float32'
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1] <= full_X.shape[0]

            print 'restricting to examples ', self.restrict[
                0], ' through ', self.restrict[1], ' exclusive'
            full_X = full_X[self.restrict[0]:self.restrict[1], :]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches(patch_shape=(size, size),
                                        patch_stride=(1, 1))

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier

        print 'defining features'
        V = T.matrix('V')
        assert V.type.dtype == 'float32'
        model.make_pseudoparams()
        d = model.e_step.variational_inference(V=V)

        H = d['H_hat']
        Mu1 = d['S_hat']

        assert H.dtype == 'float32'
        assert Mu1.dtype == 'float32'

        nfeat = model.nhid

        if self.feature_type == 'map_hs':
            feat = (H > 0.5) * Mu1
        elif self.feature_type == 'map_h':
            feat = T.cast(H > 0.5, dtype='float32')
        elif self.feature_type == 'exp_hs':
            feat = H * Mu1
        elif self.feature_type == 'exp_hs_split':
            Z = H * Mu1
            pos = T.clip(Z, 0., 1e32)
            neg = T.clip(-Z, 0, 1e32)
            feat = T.concatenate((pos, neg), axis=1)
            nfeat *= 2
        elif self.feature_type == 'exp_h':
            feat = H
        elif self.feature_type == 'exp_h_thresh':
            feat = H * (H > .01)
        else:
            raise NotImplementedError()

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([V], feat)

        if config.device.startswith('gpu') and nfeat >= 4000:
            f = halver(f, nfeat)

        topo_feat_var = T.TensorType(broadcastable=(False, False, False,
                                                    False),
                                     dtype='float32')()
        if self.pool_mode == 'mean':
            region_feat_var = topo_feat_var.mean(axis=(1, 2))
        elif self.pool_mode == 'max':
            region_feat_var = topo_feat_var.max(axis=(1, 2))
        else:
            raise ValueError("Unknown pool mode: " + self.pool_mode)
        region_features = function([topo_feat_var], region_feat_var)

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        outputs = [
            np.zeros((num_examples, count, count, nfeat), dtype='float32')
            for count in pooling_region_counts
        ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, nfeat]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i + batch_size, :])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = np.cast['float32'](d.get_design_matrix())

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i + batch_size, ...] = average_pool(count)

            t6 = time.time()

            print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)

        for output, save_path in zip(outputs, self.save_paths):
            if self.chunk_size is not None:
                assert save_path.endswith('.npy')
                save_path_pieces = save_path.split('.npy')
                assert len(save_path_pieces) == 2
                assert save_path_pieces[1] == ''
                save_path = save_path_pieces[0] + '_' + chr(
                    ord('A') + self.chunk_id) + '.npy'
            np.save(save_path, output)

        if nan > 0:
            warnings.warn(str(nan) + ' features were nan')
Exemple #7
0
 def make_node(self, x):
     x = theano.tensor.as_tensor_variable(x)
     assert x.ndim == 2
     o = T.TensorType(dtype='int8', broadcastable=[])()
     return theano.Apply(self, [x], [o])
Exemple #8
0
    def setUp(self):
        super(TestConv3D, self).setUp()
        utt.seed_rng()
        self.rng = N.random.RandomState(utt.fetch_seed())

        mode = copy.copy(theano.compile.mode.get_default_mode())
        mode.check_py_code = False

        self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
        self.W.name = 'W'
        self.b = shared(N.zeros(1, dtype=floatX))
        self.b.name = 'b'
        self.rb = shared(N.zeros(1, dtype=floatX))
        self.rb.name = 'rb'
        self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX))
        self.V.name = 'V'
        self.d = shared(N.ndarray(shape=(3, ), dtype=int))
        self.d.name = 'd'

        self.H = conv3D(self.V, self.W, self.b, self.d)
        self.H.name = 'H'
        self.H_func = function([], self.H, mode=mode)
        self.H_shape_func = function([], self.H.shape, mode=mode)

        self.RShape = T.vector(dtype='int64')
        self.RShape.name = 'RShape'

        self.otherH = T.TensorType(floatX,
                        (False, False, False, False, False))(name='otherH')
        self.transp = convTransp3D(self.W, self.rb, self.d,
                                   self.otherH, self.RShape)
        self.transp.name = 'transp'
        self.transp_func = function([self.otherH, self.RShape],
                                    self.transp, mode=mode)

        self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape)
        self.R.name = 'R'
        self.R_func = function([self.RShape], self.R, mode=mode)
        self.R_shape_func = function([self.RShape], self.R.shape)

        diff = self.V - self.R
        diff.name = 'diff'
        sqr = T.sqr(diff)
        sqr.name = 'sqr'
        self.reconsObj = T.sum(sqr)
        self.reconsObj.name = 'reconsObj'
        self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode)

        W_grad = T.grad(self.reconsObj, self.W)

        self.gradientsFunc = function([self.RShape],
                        [W_grad, T.grad(self.reconsObj,
                        self.H), T.grad(self.reconsObj, self.V),
                         T.grad(self.reconsObj, self.b)], mode=mode)

        self.check_c_against_python = function([self.RShape],
                        [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj,
                        self.H), T.grad(self.reconsObj, self.V),
                         T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE')

        self.dCdW_shape_func = function([self.RShape],
                        T.grad(self.reconsObj, self.W).shape, mode=mode)
Exemple #9
0
serial.mkdir(outdir)

paths = os.listdir(base)
if len(paths) != expected_num_images:
    raise AssertionError("Something is wrong with your " + base \
            + "directory. It should contain " + str(expected_num_images) + \
            " image files, but contains " + str(len(paths)))

kernel_shape = 7

from theano import tensor as T
from pylearn2.utils import sharedX
from pylearn2.datasets.preprocessing import gaussian_filter
from theano.tensor.nnet import conv2d

X = T.TensorType(dtype='float32', broadcastable=(True, False, False, True))()
from theano import config
if config.compute_test_value == 'raise':
    X.tag.test_value = np.zeros((1, 32, 32, 1), dtype=X.dtype)
orig_X = X
filter_shape = (1, 1, kernel_shape, kernel_shape)
filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape))

X = X.dimshuffle(0, 3, 1, 2)

convout = conv2d(X, filters=filters, border_mode='full')

# For each pixel, remove mean of 9x9 neighborhood
mid = int(np.floor(kernel_shape / 2.))
centered_X = X - convout[:, :, mid:-mid, mid:-mid]
Exemple #10
0
 def __init__(self, n_in=None, n_out=None,
              base_network=None, data_map=None, data_map_i=None,
              shared_params_network=None,
              mask=None, sparse_input=False, target='classes', train_flag=False, eval_flag=False):
   """
   :param int n_in: input dim of the network
   :param dict[str,(int,int)] n_out: output dim of the network.
     first int is num classes, second int is 1 if it is sparse, i.e. we will get the indices.
   :param dict[str,theano.Variable] data_map: if specified, this will be used for x/y (and it expects data_map_i)
   :param dict[str,theano.Variable] data_map_i: if specified, this will be used for i/j
   :param LayerNetwork|None base_network: optional base network where we will derive x/y/i/j/n_in/n_out from.
     data_map will have precedence over base_network.
   :param LayerNetwork|()->LayerNetwork|None shared_params_network: optional network where we will share params with.
     we will error if there is a param which cannot be shared.
   :param str mask: e.g. "unity" or None ("dropout")
   :param bool sparse_input: for SourceLayer
   :param str target: default target
   :param bool train_flag: marks that we are used for training
   :param bool eval_flag: marks that we are used for evaluation
   """
   if n_out is None:
     assert base_network is not None
     n_out = base_network.n_out
   else:
     assert n_out is not None
     n_out = n_out.copy()
   if n_in is None:
     assert "data" in n_out
     n_in = n_out["data"][0]
   if "data" not in n_out:
     data_dim = 3
     n_out["data"] = (n_in, data_dim - 1)  # small hack: support input-data as target
   else:
     assert 1 <= n_out["data"][1] <= 2  # maybe obsolete check...
     data_dim = n_out["data"][1] + 1  # one more because of batch-dim
   if data_map is not None:
     assert data_map_i is not None
     self.y = data_map
     self.x = data_map["data"]
     self.j = data_map_i
     self.i = data_map_i["data"]
   elif base_network is not None:
     self.x = base_network.x
     self.y = base_network.y
     self.i = base_network.i
     self.j = base_network.j
   else:
     dtype = "float32" if data_dim >= 3 else "int32"
     self.x = T.TensorType(dtype, ((False,) * data_dim))('x')
     self.y = {"data": self.x}
     self.i = T.bmatrix('i'); """ :type: theano.Variable """
     self.j = {"data": self.i}
   if base_network is not None:
     self.epoch = base_network.epoch
     self.tags  = base_network.tags
   else:
     self.epoch = T.constant(0, name="epoch", dtype="int32")
     self.tags  = T.bmatrix('tags')
   self.constraints = {}
   self.total_constraints = T.constant(0)
   Layer.initialize_rng()
   self.n_in = n_in
   self.n_out = n_out
   self.hidden = {}; """ :type: dict[str,ForwardLayer|RecurrentLayer] """
   self.train_params_vars = []; """ :type: list[theano.compile.sharedvalue.SharedVariable] """
   self.description = None; """ :type: LayerNetworkDescription | None """
   self.train_param_args = None; """ :type: dict[str] """
   self.recurrent = False  # any of the from_...() functions will set this
   self.default_mask = mask
   self.sparse_input = sparse_input
   self.default_target = target
   self.train_flag = train_flag
   self.eval_flag = eval_flag
   self.output = {}; " :type: dict[str,FramewiseOutputLayer] "
   self.known_grads = {}; " :type: dict[theano.Variable,theano.Variable]"
   self.json_content = "{}"
   self.costs = {}
   self.total_cost = T.constant(0)
   self.objective = None
   self.update_step = 0
   self.errors = {}
   self.loss = None
   self.ctc_priors = None
   self.calc_step_base = None
   self.calc_steps = []
   self.base_network = base_network
   self.shared_params_network = shared_params_network
Exemple #11
0
def main():
    # Load the dataset
    print("Loading data...")
    data, labels = load_data(filename)
    mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True)
    subjNumbers = np.squeeze(mat['subjectNum'])     # subject IDs for each trial

    # Create folds based on subject numbers (for leave-subject-out x-validation)
    fold_pairs = []
    if augment:
        # Aggregate augmented data and labels
        data_aug, labels_aug = load_data(filename_aug)
        data = np.concatenate((data, data_aug), axis=1)
        labels = np.vstack((labels, labels_aug))
        # Leave-Subject-Out cross validation
        for i in np.unique(subjNumbers):
            ts = subjNumbers == i
            tr = np.squeeze(np.nonzero(np.bitwise_not(ts)))         # Training indices
            ts = np.squeeze(np.nonzero(ts))
            # Include augmented training data
            tr = np.concatenate((tr, tr+subjNumbers.size))
            np.random.shuffle(tr)       # Shuffle indices
            np.random.shuffle(ts)
            fold_pairs.append((tr, ts))
    else:
        # Leave-Subject-Out cross validation
        for i in np.unique(subjNumbers):
            ts = subjNumbers == i
            tr = np.squeeze(np.nonzero(np.bitwise_not(ts)))
            ts = np.squeeze(np.nonzero(ts))
            np.random.shuffle(tr)       # Shuffle indices
            np.random.shuffle(ts)
            fold_pairs.append((tr, ts))

    # Initializing output variables
    validScores, testScores = [], []
    trainLoss = np.zeros((len(fold_pairs), num_epochs))
    validLoss = np.zeros((len(fold_pairs), num_epochs))
    validEpochAccu = np.zeros((len(fold_pairs), num_epochs))
    # fold_pairs[:1]
    for foldNum, fold in enumerate(fold_pairs):
        print('Beginning fold {0} out of {1}'.format(foldNum+1, len(fold_pairs)))
        # Divide the dataset into train, validation and test sets
        (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(data, labels, fold)
        X_train = X_train.astype("float32", casting='unsafe')
        X_val = X_val.astype("float32", casting='unsafe')
        X_test = X_test.astype("float32", casting='unsafe')

        # trainMeans = [np.mean(X_train[:, :, i, :, :].flatten()) for i in range(X_train.shape[2])]
        # trainStds = [np.std(X_train[:, :, i, :, :].flatten()) for i in range(X_train.shape[2])]
        # for i in range(len(trainMeans)):
        #     X_train[:, :, i, :, :] = (X_train[:, :, i, :, :] - trainMeans[i]) / trainStds[i]
        #     X_val[:, :, i, :, :] = (X_val[:, :, i, :, :] - trainMeans[i]) / trainStds[i]
        #     X_test[:, :, i, :, :] = (X_test[:, :, i, :, :] - trainMeans[i]) / trainStds[i]
        # X_train = X_train / np.float32(256)
        # X_val = X_val / np.float32(256)
        # X_test = X_test / np.float32(256)

        # Prepare Theano variables for inputs and targets
        input_var = T.TensorType('floatX', ((False,) * 5))()        # Notice the () at the end
        target_var = T.ivector('targets')
        # Create neural network model (depending on first command line parameter)
        print("Building model and compiling functions...")
        # Building the appropriate model
        if model == '1dconv':
            network = build_convpool_conv1d(input_var)
        elif model == 'maxpool':
            network = build_convpool_max(input_var)
        elif model == 'lstm':
            network = build_convpool_lstm(input_var)
        elif model == 'mix':
            network = build_convpool_mix(input_var)

        # Initialize parameters with previously saved ones.
        if init_pars:
            with np.load('weigths_lasg{0}.npz'.format(foldNum)) as f:
                # Extract CNN parameters only (not the FC layers)
                param_values = [f['arr_%d' % i] for i in range(14)]
                layers = lasagne.layers.get_all_layers(network)
                lasagne.layers.set_all_param_values(layers[83], param_values)

        # Create a loss expression for training, i.e., a scalar objective we want
        # to minimize (for our multi-class problem, it is the cross-entropy loss):
        prediction = lasagne.layers.get_output(network)
        loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
        loss = loss.mean()
        # We could add some weight decay as well here, see lasagne.regularization.

        # Create update expressions for training, i.e., how to modify the
        # parameters at each training step. Here, we'll use Stochastic Gradient
        # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
        params = lasagne.layers.get_all_params(network, trainable=True)
        updates = lasagne.updates.adam(loss, params, learning_rate=0.001)

        # Create a loss expression for validation/testing. The crucial difference
        # here is that we do a deterministic forward pass through the network,
        # disabling dropout layers.
        test_prediction = lasagne.layers.get_output(network, deterministic=True)
        test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                                target_var)
        test_loss = test_loss.mean()
        # As a bonus, also create an expression for the classification accuracy:
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                          dtype=theano.config.floatX)

        # Compile a function performing a training step on a mini-batch (by giving
        # the updates dictionary) and returning the corresponding training loss:
        train_fn = theano.function([input_var, target_var], loss, updates=updates)

        # Compile a second function computing the validation loss and accuracy:
        val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

        # Finally, launch the training loop.
        print("Starting training...")
        best_validation_accu = 0
        # We iterate over epochs:
        for epoch in range(num_epochs):
            # In each epoch, we do a full pass over the training data:
            train_err = 0
            train_batches = 0
            start_time = time.time()
            for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False):
                inputs, targets = batch
                train_err += train_fn(inputs, targets)
                train_batches += 1

            # And a full pass over the validation data:
            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1
            av_train_err = train_err / train_batches
            av_val_err = val_err / val_batches
            av_val_acc = val_acc / val_batches
            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(av_train_err))
            print("  validation loss:\t\t{:.6f}".format(av_val_err))
            print("  validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100))

            trainLoss[foldNum, epoch] = av_train_err
            validLoss[foldNum, epoch] = av_val_err
            validEpochAccu[foldNum, epoch] = av_val_acc * 100

            if av_val_acc > best_validation_accu:
                best_validation_accu = av_val_acc

                # After training, we compute and print the test error:
                test_err = 0
                test_acc = 0
                test_batches = 0
                for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
                    inputs, targets = batch
                    err, acc = val_fn(inputs, targets)
                    test_err += err
                    test_acc += acc
                    test_batches += 1

                av_test_err = test_err / test_batches
                av_test_acc = test_acc / test_batches
                print("Final results:")
                print("  test loss:\t\t\t{:.6f}".format(av_test_err))
                print("  test accuracy:\t\t{:.2f} %".format(av_test_acc * 100))
                # Dump the network weights to a file like this:
                np.savez('weights_lasg_{0}_{1}'.format(model, foldNum), *lasagne.layers.get_all_param_values(network))
        validScores.append(best_validation_accu * 100)
        testScores.append(av_test_acc * 100)
        print('-'*50)
        print("Best validation accuracy:\t\t{:.2f} %".format(best_validation_accu * 100))
        print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100))
    scipy.io.savemat('cnn_lasg_{0}_results'.format(model),
                     {'validAccu': validScores,
                      'testAccu': testScores,
                      'trainLoss': trainLoss,
                      'validLoss': validLoss,
                      'validEpochAccu': validEpochAccu
                      })
Exemple #12
0
    def _execute(self):

        global num_superpixels
        global num_output_features
        global idxs
        global top
        global bottom
        global left
        global right

        save_path = self.save_path
        batch_size = self.batch_size
        dataset_family = self.dataset_family
        which_set = self.which_set
        size = self.size


        nan = 0


        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1]  <= full_X.shape[0]

            print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive'
            full_X = full_X[self.restrict[0]:self.restrict[1],:]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) )

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier


        Z = T.matrix('Z')

        pos = T.clip(Z,0.,1e30)
        neg = T.clip(-Z,0.,1e30)

        feat = T.concatenate((pos, neg), axis=1)

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([Z],feat)

        nhid = 3200 # 2 * num dictionary elems

        if config.device.startswith('gpu') and nhid >= 4000:
            f = halver(f, nhid)

        topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')()
        region_features = function([topo_feat_var],
                topo_feat_var.mean(axis=(1,2)) )

        def average_pool( stride ):
            def point( p ):
                return p * ns / stride

            rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] )

            return rval

        output =  np.zeros((num_examples,num_output_features),dtype='float32')


        fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) )

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches( orig_shape  = (ns, ns), patch_shape=(1,1) )

        if len(range(0,num_examples-batch_size+1,batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0,num_examples-batch_size+1,batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i+batch_size,:])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit = False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'

            M.put(s,'batch',X2)

            M.eval(s, 'Z = sparse_codes(batch, dictionary, lambda)')
            Z = M.get(s, 'Z')

            feat = f(np.cast['float32'](Z))

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            superpixels = average_pool(num_superpixels)

            assert batch_size == 1

            assert superpixels.shape[0] == batch_size
            assert superpixels.shape[1] == num_superpixels
            assert superpixels.shape[2] == num_superpixels
            assert superpixels.shape[3] == 2 * num_filters

            for j in xrange(num_output_features):
                output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1,
                        left[j]:right[j]+1, idxs[j]].mean()

            t6 = time.time()

            print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)

        if self.chunk_size is not None:
            assert save_path.endswith('.npy')
            save_path_pieces = save_path.split('.npy')
            assert len(save_path_pieces) == 2
            assert save_path_pieces[1] == ''
            save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy'
        np.save(save_path,output)


        if nan > 0:
            warnings.warn(str(nan)+' features were nan')
Exemple #13
0
    def make_node(self, mean_anom, eccen):
        output_var = tt.TensorType(dtype=theano.scalar.upcast(
            mean_anom.dtype, eccen.dtype),
                                   broadcastable=[False] * mean_anom.ndim)()

        return gof.Apply(self, [mean_anom, eccen], [output_var])
Exemple #14
0
def new_tensor(name, ndim, dtype):
    import theano.tensor as TT
    return TT.TensorType(dtype, (False, ) * ndim)(name)
Exemple #15
0
def main(num_epochs=NEPOCH):
    print("Loading data ...")
    snli = SNLI(batch_size=BSIZE)
    train_batches = list(snli.train_minibatch_generator())
    dev_batches = list(snli.dev_minibatch_generator())
    test_batches = list(snli.test_minibatch_generator())
    W_word_embedding = snli.weight  # W shape: (# vocab size, WE_DIM)
    W_word_embedding = snli.weight / \
                       (numpy.linalg.norm(snli.weight, axis=1).reshape(snli.weight.shape[0], 1) + \
                        0.00001)
    del snli

    print("Building network ...")
    ########### input layers ###########
    # hypothesis
    input_var_h = T.TensorType('int32', [False, False])('hypothesis_vector')
    input_var_h.tag.test_value = numpy.hstack(
        (numpy.random.randint(1, 10000, (BSIZE, 18),
                              'int32'), numpy.zeros(
                                  (BSIZE, 6)).astype('int32')))
    l_in_h = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                       input_var=input_var_h)

    input_mask_h = T.TensorType('int32', [False, False])('hypo_mask')
    input_mask_h.tag.test_value = numpy.hstack((numpy.ones(
        (BSIZE, 18), dtype='int32'), numpy.zeros((BSIZE, 6), dtype='int32')))
    input_mask_h.tag.test_value[1, 18:22] = 1
    l_mask_h = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                         input_var=input_mask_h)

    # premise
    input_var_p = T.TensorType('int32', [False, False])('premise_vector')
    input_var_p.tag.test_value = numpy.hstack(
        (numpy.random.randint(1, 10000, (BSIZE, 16),
                              'int32'), numpy.zeros(
                                  (BSIZE, 3)).astype('int32')))
    l_in_p = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                       input_var=input_var_p)

    input_mask_p = T.TensorType('int32', [False, False])('premise_mask')
    input_mask_p.tag.test_value = numpy.hstack((numpy.ones(
        (BSIZE, 16), dtype='int32'), numpy.zeros((BSIZE, 3), dtype='int32')))
    input_mask_p.tag.test_value[1, 16:18] = 1
    l_mask_p = lasagne.layers.InputLayer(shape=(BSIZE, None),
                                         input_var=input_mask_p)
    ###################################

    # output shape (BSIZE, None, WEDIM)
    l_hypo_embed = lasagne.layers.EmbeddingLayer(
        l_in_h,
        input_size=W_word_embedding.shape[0],
        output_size=W_word_embedding.shape[1],
        W=W_word_embedding)

    l_prem_embed = lasagne.layers.EmbeddingLayer(
        l_in_p,
        input_size=W_word_embedding.shape[0],
        output_size=W_word_embedding.shape[1],
        W=l_hypo_embed.W)

    # EMBEDING MAPPING: output shape (BSIZE, None, WEMAP)
    l_hypo_reduced_embed = DenseLayer3DInput(l_hypo_embed,
                                             num_units=WEMAP,
                                             b=None,
                                             nonlinearity=None)
    l_hypo_embed_dpout = lasagne.layers.DropoutLayer(l_hypo_reduced_embed,
                                                     p=DPOUT,
                                                     rescale=True)
    l_prem_reduced_embed = DenseLayer3DInput(l_prem_embed,
                                             num_units=WEMAP,
                                             W=l_hypo_reduced_embed.W,
                                             b=None,
                                             nonlinearity=None)
    l_prem_embed_dpout = lasagne.layers.DropoutLayer(l_prem_reduced_embed,
                                                     p=DPOUT,
                                                     rescale=True)

    # ATTEND
    l_hypo_embed_hid1 = DenseLayer3DInput(
        l_hypo_embed_dpout,
        num_units=EMBDHIDA,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_hypo_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_hypo_embed_hid1,
                                                          p=DPOUT,
                                                          rescale=True)
    l_hypo_embed_hid2 = DenseLayer3DInput(
        l_hypo_embed_hid1_dpout,
        num_units=EMBDHIDB,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_prem_embed_hid1 = DenseLayer3DInput(
        l_prem_embed_dpout,
        num_units=EMBDHIDA,
        W=l_hypo_embed_hid1.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_prem_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_prem_embed_hid1,
                                                          p=DPOUT,
                                                          rescale=True)
    l_prem_embed_hid2 = DenseLayer3DInput(
        l_prem_embed_hid1_dpout,
        num_units=EMBDHIDB,
        W=l_hypo_embed_hid2.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    # output dim: (BSIZE, NROWx, NROWy)
    l_e = ComputeEmbeddingPool([l_hypo_embed_hid1, l_prem_embed_hid2])
    # output dim: (BSIZE, NROWy, DIM)
    l_hypo_weighted = AttendOnEmbedding([l_hypo_reduced_embed, l_e],
                                        masks=[l_mask_h, l_mask_p],
                                        direction='col')
    # output dim: (BSIZE, NROWx, DIM)
    l_prem_weighted = AttendOnEmbedding([l_prem_reduced_embed, l_e],
                                        masks=[l_mask_h, l_mask_p],
                                        direction='row')

    # COMPARE
    # output dim: (BSIZE, NROW, 4*LSTMHID)
    l_hypo_premwtd = lasagne.layers.ConcatLayer(
        [l_hypo_reduced_embed, l_prem_weighted], axis=2)
    l_prem_hypowtd = lasagne.layers.ConcatLayer(
        [l_prem_reduced_embed, l_hypo_weighted], axis=2)

    l_hypo_premwtd_dpout = lasagne.layers.DropoutLayer(l_hypo_premwtd,
                                                       p=DPOUT,
                                                       rescale=True)
    l_hypo_comphid1 = DenseLayer3DInput(
        l_hypo_premwtd_dpout,
        num_units=COMPHIDA,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_hypo_comphid1_dpout = lasagne.layers.DropoutLayer(l_hypo_comphid1,
                                                        p=DPOUT,
                                                        rescale=True)
    l_hypo_comphid2 = DenseLayer3DInput(
        l_hypo_comphid1_dpout,
        num_units=COMPHIDB,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    l_prem_hypowtd_dpout = lasagne.layers.DropoutLayer(l_prem_hypowtd,
                                                       p=DPOUT,
                                                       rescale=True)
    l_prem_comphid1 = DenseLayer3DInput(
        l_prem_hypowtd_dpout,
        num_units=COMPHIDA,
        W=l_hypo_comphid1.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_prem_comphid1_dpout = lasagne.layers.DropoutLayer(l_prem_comphid1,
                                                        p=DPOUT,
                                                        rescale=True)
    l_prem_comphid2 = DenseLayer3DInput(
        l_prem_comphid1_dpout,
        num_units=COMPHIDB,
        W=l_hypo_comphid2.W,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)

    # AGGREGATE
    # output dim: (BSIZE, 4*LSTMHID)
    l_hypo_mean = MeanOverDim(l_hypo_comphid2, mask=l_mask_h, dim=1)
    l_prem_mean = MeanOverDim(l_prem_comphid2, mask=l_mask_p, dim=1)

    l_v1v2 = lasagne.layers.ConcatLayer([l_hypo_mean, l_prem_mean], axis=1)
    l_v1v2_dpout = lasagne.layers.DropoutLayer(l_v1v2, p=DPOUT, rescale=True)

    l_outhid1 = lasagne.layers.DenseLayer(
        l_v1v2_dpout,
        num_units=OUTHID,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    l_outhid1_dpout = lasagne.layers.DropoutLayer(l_outhid1,
                                                  p=DPOUT,
                                                  rescale=True)

    l_outhid2 = lasagne.layers.DenseLayer(
        l_outhid1_dpout,
        num_units=OUTHID,
        b=None,
        nonlinearity=lasagne.nonlinearities.rectify)
    # l_outhid2_dpout = lasagne.layers.DropoutLayer(l_outhid2, p=DPOUT, rescale=True)

    l_output = lasagne.layers.DenseLayer(
        l_outhid2,
        num_units=3,
        b=None,
        nonlinearity=lasagne.nonlinearities.softmax)

    ########### target, cost, validation, etc. ##########
    target_values = T.ivector('target_output')
    target_values.tag.test_value = numpy.asarray([
        1,
    ] * BSIZE, dtype='int32')

    network_output = lasagne.layers.get_output(l_output)
    network_prediction = T.argmax(network_output, axis=1)
    error_rate = T.mean(T.neq(network_prediction, target_values))

    network_output_clean = lasagne.layers.get_output(l_output,
                                                     deterministic=True)
    network_prediction_clean = T.argmax(network_output_clean, axis=1)
    error_rate_clean = T.mean(T.neq(network_prediction_clean, target_values))

    cost = T.mean(
        T.nnet.categorical_crossentropy(network_output, target_values))
    cost_clean = T.mean(
        T.nnet.categorical_crossentropy(network_output_clean, target_values))

    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_output)
    if not UPDATEWE:
        all_params.remove(l_hypo_embed.W)

    numparams = sum(
        [numpy.prod(i) for i in [i.shape.eval() for i in all_params]])
    print("Number of params: {}\nName\t\t\tShape\t\t\tSize".format(numparams))
    print("-----------------------------------------------------------------")
    for item in all_params:
        print("{0:24}{1:24}{2}".format(item, item.shape.eval(),
                                       numpy.prod(item.shape.eval())))

    # if exist param file then load params
    look_for = 'params' + os.sep + 'params_' + filename + '.pkl'
    if os.path.isfile(look_for):
        print("Resuming from file: " + look_for)
        all_param_values = cPickle.load(open(look_for, 'rb'))
        for p, v in zip(all_params, all_param_values):
            p.set_value(v)

    # Compute SGD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adagrad(cost, all_params, LR)

    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([
        l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var,
        l_mask_p.input_var, target_values
    ], [cost, error_rate],
                            updates=updates)
    # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))
    compute_cost = theano.function([
        l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var,
        l_mask_p.input_var, target_values
    ], [cost_clean, error_rate_clean])

    # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False))

    def evaluate(mode):
        if mode == 'dev':
            data = dev_batches
        if mode == 'test':
            data = test_batches

        set_cost = 0.
        set_error_rate = 0.
        for batches_seen, (hypo, hm, premise, pm, truth) in enumerate(data, 1):
            _cost, _error = compute_cost(hypo, hm, premise, pm, truth)
            set_cost = (1.0 - 1.0 / batches_seen) * set_cost + \
                       1.0 / batches_seen * _cost
            set_error_rate = (1.0 - 1.0 / batches_seen) * set_error_rate + \
                             1.0 / batches_seen * _error

        return set_cost, set_error_rate

    print("Done. Evaluating scratch model ...")
    dev_set_cost, dev_set_error = evaluate('dev')
    print("BEFORE TRAINING: dev cost %f, error %f" %
          (dev_set_cost, dev_set_error))
    print("Training ...")
    try:
        for epoch in range(num_epochs):
            train_set_cost = 0.
            train_set_error = 0.
            start = time.time()

            for batches_seen, (hypo, hm, premise, pm,
                               truth) in enumerate(train_batches, 1):
                _cost, _error = train(hypo, hm, premise, pm, truth)
                train_set_cost = (1.0 - 1.0 / batches_seen) * train_set_cost + \
                                 1.0 / batches_seen * _cost
                train_set_error = (1.0 - 1.0 / batches_seen) * train_set_error + \
                                  1.0 / batches_seen * _error
                if (batches_seen * BSIZE) % 5000 == 0:
                    end = time.time()
                    print("Sample %d %.2fs, lr %.4f, train cost %f, error %f" %
                          (batches_seen * BSIZE, end - start, LR,
                           train_set_cost, train_set_error))
                    start = end

                if (batches_seen * BSIZE) % 100000 == 0:
                    dev_set_cost, dev_set_error = evaluate('dev')
                    print("***dev cost %f, error %f" %
                          (dev_set_cost, dev_set_error))

            # save parameters
            all_param_values = [p.get_value() for p in all_params]
            cPickle.dump(
                all_param_values,
                open('params' + os.sep + 'params_' + filename + '.pkl', 'wb'))

            dev_set_cost, dev_set_error = evaluate('dev')
            test_set_cost, test_set_error = evaluate('test')

            print("epoch %d, cost: train %f dev %f test %f;\n"
                  "         error train %f dev %f test %f" %
                  (epoch, train_set_cost, dev_set_cost, test_set_cost,
                   train_set_error, dev_set_error, test_set_error))
    except KeyboardInterrupt:
        pdb.set_trace()
        pass
Exemple #16
0
def make_training_functions(model):

    # 这里l_out是model的输出层
    l_out = model['l_out']
    #声明一个Batch——index的变量
    batch_index = T.iscalar('batch_index')
    # bct01
    #x是五维向量 y是一维
    X = T.TensorType('float32', [False]*5)('X')
    y = T.TensorType('int32', [False]*1)('y')
    out_shape = lasagne.layers.get_output_shape(l_out)
    #log.info('output_shape = {}'.format(out_shape))

    # 切片函数 参数:start ,stop[step] 用法arr[batch_slice]
    batch_slice = slice(batch_index*cfg['batch_size'], (batch_index+1)*cfg['batch_size'])

    # 用给定的输入和网络模型 做输出
    out = lasagne.layers.get_output(l_out, X)
    # 也用来做输出,但会屏蔽掉所有的drop-out层
    dout = lasagne.layers.get_output(l_out, X, deterministic=True)
    # 获取训练网络的所有的参数 一般用于更新网络表达式
    params = lasagne.layers.get_all_params(l_out)
    l2_norm = lasagne.regularization.regularize_network_params(l_out,
            lasagne.regularization.l2)
    # 判断 x是不是某类型 (实例,类型名) dict 字典tensorboard
    if isinstance(cfg['learning_rate'], dict):
        # share将变量共享为全局变量 ,在多个函数中公用
        learning_rate = theano.shared(np.float32(cfg['learning_rate'][0]))
    else:
        learning_rate = theano.shared(np.float32(cfg['learning_rate']))


    softmax_out = T.nnet.softmax( out )
    loss = T.cast(T.mean(T.nnet.categorical_crossentropy(softmax_out, y)), 'float32')
    pred = T.argmax( dout, axis=1 )
    error_rate = T.cast( T.mean( T.neq(pred, y) ), 'float32' )
    # 正则化损失函数 l2使权值足够小
    reg_loss = loss + cfg['reg']*l2_norm
    # 动量梯度下降 更新params
    updates = lasagne.updates.momentum(reg_loss, params, learning_rate, cfg['momentum'])


    #shared相当于一个全局变量
    X_shared = lasagne.utils.shared_empty(5, dtype='float32')
    y_shared = lasagne.utils.shared_empty(1, dtype='float32')

    dout_fn = theano.function([X], dout)
    pred_fn = theano.function([X], pred)
    pred_fn=theano.function([batch_index], pred, givens={
            X: X_shared[batch_slice]
            ,
        })

    update_iter = theano.function([batch_index], reg_loss,
            updates=updates, givens={
            X: X_shared[batch_slice],
            y: T.cast( y_shared[batch_slice], 'int32'),
        })

    error_rate_fn = theano.function([batch_index], error_rate, givens={
            X: X_shared[batch_slice],
            y: T.cast( y_shared[batch_slice], 'int32'),
        })

    loss_fn = theano.function([batch_index], reg_loss, givens={
            X: X_shared[batch_slice],
            y: T.cast( y_shared[batch_slice], 'int32'),
        })
    tfuncs = {'update_iter':update_iter,
             'error_rate':error_rate_fn,
             'loss':loss_fn,
             'dout' : dout_fn,
             'pred' : pred_fn,
            }
    tvars = {'X' : X,
             'y' : y,
             'X_shared' : X_shared,
             'y_shared' : y_shared,
             'batch_slice' : batch_slice,
             'batch_index' : batch_index,
             'learning_rate' : learning_rate,
            }
    return tfuncs, tvars
Exemple #17
0
def TensorType(dtype, shape, broadcastable=None):
    if broadcastable is None:
        broadcastable = np.atleast_1d(shape) == 1
    return tt.TensorType(str(dtype), broadcastable)
Exemple #18
0
    def train(self,
              params,
              indir,
              outdir,
              wdir,
              fid_lst_tra,
              fid_lst_val,
              X_vals,
              Y_vals,
              cfg,
              params_savefile,
              trialstr='',
              cont=None):

        print('Model initial status before training')
        worst_val = data.cost_0pred_rmse(Y_vals)  # RMSE
        print("    0-pred validation RMSE = {} (100%)".format(worst_val))
        init_pred_rms = data.prediction_rms(self._model, [X_vals])
        print('    initial RMS of prediction = {}'.format(init_pred_rms))
        init_val = data.cost_model_prediction_rmse(self._model, [X_vals],
                                                   Y_vals)
        best_val = None
        print("    initial validation RMSE = {} ({:.4f}%)".format(
            init_val, 100.0 * init_val / worst_val))

        nbbatches = int(len(fid_lst_tra) / cfg.train_batch_size)
        print('    using {} batches of {} sentences each'.format(
            nbbatches, cfg.train_batch_size))
        print('    model #parameters={}'.format(self._model.nbParams()))

        nbtrainframes = 0
        for fid in fid_lst_tra:
            X = data.loadfile(outdir, fid)
            nbtrainframes += X.shape[0]
        frameshift = 0.005  # TODO
        print('    Training set: {} sentences, #frames={} ({})'.format(
            len(fid_lst_tra), nbtrainframes,
            time.strftime('%H:%M:%S', time.gmtime(
                (nbtrainframes * frameshift)))))
        print('    #parameters/#frames={:.2f}'.format(
            float(self._model.nbParams()) / nbtrainframes))
        if cfg.train_nbepochs_scalewdata and not cfg.train_batch_lengthmax is None:
            # During an epoch, the whole data is _not_ seen by the training since cfg.train_batch_lengthmax is limited and smaller to the sentence size.
            # To compensate for it and make the config below less depedent on the data, the min ans max nbepochs are scaled according to the missing number of frames seen.
            # TODO Should consider only non-silent frames, many recordings have a lot of pre and post silences
            epochcoef = nbtrainframes / float(
                (cfg.train_batch_lengthmax * len(fid_lst_tra)))
            print('    scale number of epochs wrt number of frames')
            cfg.train_min_nbepochs = int(cfg.train_min_nbepochs * epochcoef)
            cfg.train_max_nbepochs = int(cfg.train_max_nbepochs * epochcoef)
            print('        train_min_nbepochs={}'.format(
                cfg.train_min_nbepochs))
            print('        train_max_nbepochs={}'.format(
                cfg.train_max_nbepochs))

        if self._errtype == 'WGAN':
            print('Preparing critic for WGAN...')
            critic_input_var = T.tensor3(
                'critic_input'
            )  # Either real data to predict/generate, or, fake data that has been generated

            [critic, layer_critic, layer_cond] = self._model.build_critic(
                critic_input_var,
                self._model._input_values,
                self._model.vocoder,
                self._model.insize,
                use_LSweighting=(cfg.train_LScoef > 0.0),
                LSWGANtransfreqcutoff=self._LSWGANtransfreqcutoff,
                LSWGANtranscoef=self._LSWGANtranscoef,
                use_WGAN_incnoisefeature=self._WGAN_incnoisefeature)

            # Create expression for passing real data through the critic
            real_out = lasagne.layers.get_output(critic)
            # Create expression for passing fake data through the critic
            genout = lasagne.layers.get_output(self._model.net_out)
            indict = {
                layer_critic: lasagne.layers.get_output(self._model.net_out),
                layer_cond: self._model._input_values
            }
            fake_out = lasagne.layers.get_output(critic, indict)

            # Create generator's loss expression
            # Force LSE for low frequencies, otherwise the WGAN noise makes the voice hoarse.
            print('WGAN Weighted LS - Generator part')

            wganls_weights_els = []
            wganls_weights_els.append([0.0])  # For f0
            specvs = np.arange(self._model.vocoder.specsize(),
                               dtype=theano.config.floatX)
            if cfg.train_LScoef == 0.0:
                wganls_weights_els.append(
                    np.ones(self._model.vocoder.specsize())
                )  # No special weighting for spec
            else:
                wganls_weights_els.append(
                    nonlin_sigmoidparm(
                        specvs,
                        sp.freq2fwspecidx(self._LSWGANtransfreqcutoff,
                                          self._model.vocoder.fs,
                                          self._model.vocoder.specsize()),
                        self._LSWGANtranscoef))  # For spec
            if self._model.vocoder.noisesize() > 0:
                if self._WGAN_incnoisefeature:
                    noisevs = np.arange(self._model.vocoder.noisesize(),
                                        dtype=theano.config.floatX)
                    wganls_weights_els.append(
                        nonlin_sigmoidparm(
                            noisevs,
                            sp.freq2fwspecidx(self._LSWGANtransfreqcutoff,
                                              self._model.vocoder.fs,
                                              self._model.vocoder.noisesize()),
                            self._LSWGANtranscoef))  # For noise
                else:
                    wganls_weights_els.append(
                        np.zeros(self._model.vocoder.noisesize()))
            if self._model.vocoder.vuvsize() > 0:
                wganls_weights_els.append([0.0])  # For vuv
            wganls_weights_ = np.hstack(wganls_weights_els)

            # TODO build wganls_weights_ for LSE instead for WGAN, for consistency with the paper

            # wganls_weights_ = np.hstack((wganls_weights_, wganls_weights_, wganls_weights_)) # That would be for MLPG using deltas
            wganls_weights_ *= (1.0 - cfg.train_LScoef)

            lserr = lasagne.objectives.squared_error(genout,
                                                     self._target_values)
            wganls_weights_ls = theano.shared(value=(1.0 - wganls_weights_),
                                              name='wganls_weights_ls')

            wganpart = fake_out * np.mean(
                wganls_weights_
            )  # That's a way to automatically balance the WGAN and LSE costs wrt the LSE spectral weighting
            lsepart = lserr * wganls_weights_ls  # Spectral weighting as complement of the WGAN part spectral weighting

            generator_loss = -wganpart.mean() + lsepart.mean(
            )  # A term in [-oo,oo] and one in [0,oo] ... why not, LSE as to be small enough for WGAN to do something.

            generator_lossratio = abs(wganpart.mean()) / abs(lsepart.mean())

            critic_loss = fake_out.mean() - real_out.mean(
            )  # For clarity: we want to maximum real-fake -> -(real-fake) -> fake-real

            # Improved training for Wasserstein GAN
            epsi = T.TensorType(dtype=theano.config.floatX,
                                broadcastable=(False, True, True))()
            mixed_X = (epsi * genout) + (1 - epsi) * critic_input_var
            indict = {
                layer_critic: mixed_X,
                layer_cond: self._model._input_values
            }
            output_D_mixed = lasagne.layers.get_output(critic, inputs=indict)
            grad_mixed = T.grad(T.sum(output_D_mixed), mixed_X)
            norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=[1, 2]))
            grad_penalty = T.mean(T.square(norm_grad_mixed - 1))
            critic_loss = critic_loss + cfg.train_pg_lambda * grad_penalty

            # Create update expressions for training
            critic_params = lasagne.layers.get_all_params(critic,
                                                          trainable=True)
            critic_updates = lasagne.updates.adam(
                critic_loss,
                critic_params,
                learning_rate=cfg.train_D_learningrate,
                beta1=cfg.train_D_adam_beta1,
                beta2=cfg.train_D_adam_beta2)
            print('    Critic architecture')
            print_network(critic, critic_params)

            generator_params = lasagne.layers.get_all_params(
                self._model.net_out, trainable=True)
            generator_updates = lasagne.updates.adam(
                generator_loss,
                generator_params,
                learning_rate=cfg.train_G_learningrate,
                beta1=cfg.train_G_adam_beta1,
                beta2=cfg.train_G_adam_beta2)
            self._optim_updates.extend([generator_updates, critic_updates])
            print('    Generator architecture')
            print_network(self._model.net_out, generator_params)

            # Compile functions performing a training step on a mini-batch (according
            # to the updates dictionary) and returning the corresponding score:
            print('Compiling generator training function...')
            generator_train_fn_ins = [self._model._input_values]
            generator_train_fn_ins.append(self._target_values)
            generator_train_fn_outs = [generator_loss, generator_lossratio]
            train_fn = theano.function(generator_train_fn_ins,
                                       generator_train_fn_outs,
                                       updates=generator_updates)
            train_validation_fn = theano.function(generator_train_fn_ins,
                                                  generator_loss,
                                                  no_default_updates=True)
            print('Compiling critic training function...')
            critic_train_fn_ins = [
                self._model._input_values, critic_input_var, epsi
            ]
            critic_train_fn = theano.function(critic_train_fn_ins,
                                              critic_loss,
                                              updates=critic_updates)
            critic_train_validation_fn = theano.function(
                critic_train_fn_ins, critic_loss, no_default_updates=True)

        elif self._errtype == 'LSE':
            print('    LSE Training')
            print_network(self._model.net_out, params)
            predicttrain_values = lasagne.layers.get_output(
                self._model.net_out, deterministic=False)
            costout = (predicttrain_values - self._target_values)**2

            self.cost = T.mean(
                costout)  # self.cost = T.mean(T.sum(costout, axis=-1)) ?

            print("    creating parameters updates ...")
            updates = lasagne.updates.adam(
                self.cost,
                params,
                learning_rate=float(10**cfg.train_learningrate_log10),
                beta1=float(cfg.train_adam_beta1),
                beta2=float(cfg.train_adam_beta2),
                epsilon=float(10**cfg.train_adam_epsilon_log10))

            self._optim_updates.append(updates)
            print("    compiling training function ...")
            train_fn = theano.function(self._model.inputs +
                                       [self._target_values],
                                       self.cost,
                                       updates=updates)
        else:
            raise ValueError('Unknown err type "' + self._errtype +
                             '"')  # pragma: no cover

        costs = defaultdict(list)
        epochs_modelssaved = []
        epochs_durs = []
        nbnodecepochs = 0
        generator_updates = 0
        epochstart = 1
        if cont and os.path.exists(
                os.path.splitext(params_savefile)[0] +
                '-trainingstate-last.pkl'):
            print('    reloading previous training state ...')
            savedcfg, extras, rngstate = self.loadTrainingState(
                os.path.splitext(params_savefile)[0] +
                '-trainingstate-last.pkl', cfg)
            np.random.set_state(rngstate)
            cost_val = extras['cost_val']
            # Restoring some local variables
            costs = extras['costs']
            epochs_modelssaved = extras['epochs_modelssaved']
            epochs_durs = extras['epochs_durs']
            generator_updates = extras['generator_updates']
            epochstart = extras['epoch'] + 1
            # Restore the saving criteria only none of those 3 cfg values changed:
            if (savedcfg.train_min_nbepochs == cfg.train_min_nbepochs) and (
                    savedcfg.train_max_nbepochs == cfg.train_max_nbepochs
            ) and (savedcfg.train_cancel_nodecepochs
                   == cfg.train_cancel_nodecepochs):
                best_val = extras['best_val']
                nbnodecepochs = extras['nbnodecepochs']

        print_log("    start training ...")
        for epoch in range(epochstart, 1 + cfg.train_max_nbepochs):
            timeepochstart = time.time()
            rndidx = np.arange(
                int(nbbatches * cfg.train_batch_size)
            )  # Need to restart from ordered state to make the shuffling repeatable after reloading training state, the shuffling will be different anyway
            np.random.shuffle(rndidx)
            rndidxb = np.split(rndidx, nbbatches)
            cost_tra = None
            costs_tra_batches = []
            costs_tra_gen_wgan_lse_ratios = []
            costs_tra_critic_batches = []
            load_times = []
            train_times = []
            for k in xrange(nbbatches):

                timeloadstart = time.time()
                print_tty('\r    Training batch {}/{}'.format(
                    1 + k, nbbatches))

                # Load training data online, because data is often too heavy to hold in memory
                fid_lst_trab = [fid_lst_tra[bidx] for bidx in rndidxb[k]]
                X_trab, _, Y_trab, _, W_trab = data.load_inoutset(
                    indir,
                    outdir,
                    wdir,
                    fid_lst_trab,
                    length=cfg.train_batch_length,
                    lengthmax=cfg.train_batch_lengthmax,
                    maskpadtype=cfg.train_batch_padtype,
                    cropmode=cfg.train_batch_cropmode)

                if 0:  # Plot batch
                    import matplotlib.pyplot as plt
                    plt.ion()
                    plt.imshow(Y_trab[0, ].T,
                               origin='lower',
                               aspect='auto',
                               interpolation='none',
                               cmap='jet')
                    from IPython.core.debugger import Pdb
                    Pdb().set_trace()

                load_times.append(time.time() - timeloadstart)
                print_tty(' (iter load: {:.6f}s); training '.format(
                    load_times[-1]))

                timetrainstart = time.time()
                if self._errtype == 'WGAN':

                    random_epsilon = np.random.uniform(
                        size=(cfg.train_batch_size, 1, 1)).astype('float32')
                    critic_returns = critic_train_fn(
                        X_trab, Y_trab,
                        random_epsilon)  # Train the criticmnator
                    costs_tra_critic_batches.append(float(critic_returns))

                    # TODO The params below are supposed to ensure the critic is "almost" fully converged
                    #      when training the generator. How to evaluate this? Is it the case currently?
                    if (generator_updates <
                            25) or (generator_updates % 500
                                    == 0):  # TODO Params hardcoded
                        critic_runs = 10  # TODO Params hardcoded 10
                    else:
                        critic_runs = 5  # TODO Params hardcoded 5
                    # martinarjovsky: "- Loss of the critic should never be negative, since outputing 0 would yeald a better loss so this is a huge red flag."
                    # if critic_returns>0 and k%critic_runs==0: # Train only if the estimate of the Wasserstein distance makes sense, and, each N critic iteration TODO Doesn't work well though
                    if k % critic_runs == 0:  # Train each N critic iteration
                        # Train the generator
                        trainargs = [X_trab]
                        trainargs.append(Y_trab)
                        [cost_tra, gen_ratio] = train_fn(*trainargs)
                        cost_tra = float(cost_tra)
                        generator_updates += 1

                        if 0:
                            log_plot_samples(
                                Y_vals,
                                Y_preds,
                                nbsamples=nbsamples,
                                fname=os.path.splitext(params_savefile)[0] +
                                '-fig_samples_' + trialstr +
                                '{:07}.png'.format(generator_updates),
                                vocoder=self._model.vocoder,
                                title='E{} I{}'.format(epoch,
                                                       generator_updates))

                elif self._errtype == 'LSE':
                    train_returns = train_fn(X_trab, Y_trab)
                    cost_tra = np.sqrt(float(train_returns))

                train_times.append(time.time() - timetrainstart)

                if not cost_tra is None:
                    print_tty(
                        'err={:.4f} (iter train: {:.4f}s)                  '.
                        format(cost_tra, train_times[-1]))
                    if np.isnan(cost_tra):  # pragma: no cover
                        print_log(
                            '    previous costs: {}'.format(costs_tra_batches))
                        print_log('    E{} Batch {}/{} train cost = {}'.format(
                            epoch, 1 + k, nbbatches, cost_tra))
                        raise ValueError('ERROR: Training cost is nan!')
                    costs_tra_batches.append(cost_tra)
                    if self._errtype == 'WGAN':
                        costs_tra_gen_wgan_lse_ratios.append(gen_ratio)
            print_tty(
                '\r                                                           \r'
            )
            if self._errtype == 'WGAN':
                costs['model_training'].append(0.1 *
                                               np.mean(costs_tra_batches))
                if cfg.train_LScoef > 0:
                    costs['model_training_wgan_lse_ratio'].append(
                        0.1 * np.mean(costs_tra_gen_wgan_lse_ratios))
            else:
                costs['model_training'].append(np.mean(costs_tra_batches))

            # Eval validation cost
            cost_validation_rmse = data.cost_model_prediction_rmse(
                self._model, [X_vals], Y_vals)
            costs['model_rmse_validation'].append(cost_validation_rmse)

            if self._errtype == 'WGAN':
                train_validation_fn_args = [X_vals]
                train_validation_fn_args.append(Y_vals)
                costs['model_validation'].append(0.1 * data.cost_model_mfn(
                    train_validation_fn, train_validation_fn_args))
                costs['critic_training'].append(
                    np.mean(costs_tra_critic_batches))
                random_epsilon = [
                    np.random.uniform(size=(1, 1)).astype('float32')
                ] * len(X_vals)
                critic_train_validation_fn_args = [
                    X_vals, Y_vals, random_epsilon
                ]
                costs['critic_validation'].append(
                    data.cost_model_mfn(critic_train_validation_fn,
                                        critic_train_validation_fn_args))
                costs['critic_validation_ltm'].append(
                    np.mean(costs['critic_validation']
                            [-cfg.train_validation_ltm_winlen:]))

                cost_val = costs['critic_validation_ltm'][-1]
            elif self._errtype == 'LSE':
                cost_val = costs['model_rmse_validation'][-1]

            print_log(
                "    E{}/{} {}  cost_tra={:.6f} (load:{}s train:{}s)  cost_val={:.6f} ({:.4f}% RMSE)  {} MiB GPU {} MiB RAM"
                .format(epoch, cfg.train_max_nbepochs, trialstr,
                        costs['model_training'][-1],
                        time2str(np.sum(load_times)),
                        time2str(np.sum(train_times)), cost_val,
                        100 * cost_validation_rmse / worst_val,
                        nvidia_smi_gpu_memused(), proc_memresident()))
            sys.stdout.flush()

            if np.isnan(cost_val):
                raise ValueError('ERROR: Validation cost is nan!')
            if (self._errtype == 'LSE') and (
                    cost_val >= cfg.train_cancel_validthresh * worst_val):
                raise ValueError(
                    'ERROR: Validation cost blew up! It is higher than {} times the worst possible values'
                    .format(cfg.train_cancel_validthresh))

            self._model.saveAllParams(os.path.splitext(params_savefile)[0] +
                                      '-last.pkl',
                                      cfg=cfg,
                                      printfn=print_log,
                                      extras={'cost_val': cost_val})

            # Save model parameters
            if epoch >= cfg.train_min_nbepochs:  # Assume no model is good enough before cfg.train_min_nbepochs
                if ((best_val is None) or (cost_val < best_val)
                    ):  # Among all trials of hyper-parameter optimisation
                    best_val = cost_val
                    self._model.saveAllParams(params_savefile,
                                              cfg=cfg,
                                              printfn=print_log,
                                              extras={'cost_val': cost_val},
                                              infostr='(E{} C{:.4f})'.format(
                                                  epoch, best_val))
                    epochs_modelssaved.append(epoch)
                    nbnodecepochs = 0
                else:
                    nbnodecepochs += 1

            if cfg.train_log_plot:
                print_log('    saving plots')
                log_plot_costs(costs,
                               worst_val,
                               fname=os.path.splitext(params_savefile)[0] +
                               '-fig_costs_' + trialstr + '.svg',
                               epochs_modelssaved=epochs_modelssaved)

                nbsamples = 2
                nbsamples = min(nbsamples, len(X_vals))
                Y_preds = []
                for sampli in xrange(nbsamples):
                    Y_preds.append(
                        self._model.predict(
                            np.reshape(
                                X_vals[sampli],
                                [1] + [s for s in X_vals[sampli].shape]))[0, ])

                plotsuffix = ''
                if len(epochs_modelssaved
                       ) > 0 and epochs_modelssaved[-1] == epoch:
                    plotsuffix = '_best'
                else:
                    plotsuffix = '_last'
                log_plot_samples(Y_vals,
                                 Y_preds,
                                 nbsamples=nbsamples,
                                 fname=os.path.splitext(params_savefile)[0] +
                                 '-fig_samples_' + trialstr + plotsuffix +
                                 '.png',
                                 vocoder=self._model.vocoder,
                                 title='E{}'.format(epoch))

            epochs_durs.append(time.time() - timeepochstart)
            print_log('    ET: {}   max TT: {}s   train ~time left: {}'.format(
                time2str(epochs_durs[-1]),
                time2str(
                    np.median(epochs_durs[-10:]) * cfg.train_max_nbepochs),
                time2str(
                    np.median(epochs_durs[-10:]) *
                    (cfg.train_max_nbepochs - epoch))))

            self.saveTrainingState(os.path.splitext(params_savefile)[0] +
                                   '-trainingstate-last.pkl',
                                   cfg=cfg,
                                   printfn=print_log,
                                   extras={
                                       'cost_val': cost_val,
                                       'best_val': best_val,
                                       'costs': costs,
                                       'epochs_modelssaved':
                                       epochs_modelssaved,
                                       'epochs_durs': epochs_durs,
                                       'nbnodecepochs': nbnodecepochs,
                                       'generator_updates': generator_updates,
                                       'epoch': epoch
                                   })

            if nbnodecepochs >= cfg.train_cancel_nodecepochs:  # pragma: no cover
                print_log(
                    'WARNING: validation error did not decrease for {} epochs. Early stop!'
                    .format(cfg.train_cancel_nodecepochs))
                break

        if best_val is None:
            raise ValueError('No model has been saved during training!')
        return {
            'epoch_stopped':
            epoch,
            'worst_val':
            worst_val,
            'best_epoch':
            epochs_modelssaved[-1] if len(epochs_modelssaved) > 0 else -1,
            'best_val':
            best_val
        }
Exemple #19
0
class DifferentialEquation(Op):
    r"""
    Specify an ordinary differential equation

    .. math::
        \dfrac{dy}{dt} = f(y,t,p) \quad y(t_0) = y_0

    Parameters
    ----------

    func : callable
        Function specifying the differential equation. Must take arguments y (n_states,), t (scalar), p (n_theta,)
    times : array
        Array of times at which to evaluate the solution of the differential equation.
    n_states : int
        Dimension of the differential equation.  For scalar differential equations, n_states=1.
        For vector valued differential equations, n_states = number of differential equations in the system.
    n_theta : int
        Number of parameters in the differential equation.
    t0 : float
        Time corresponding to the initial condition

    Examples
    --------

    .. code-block:: python

        def odefunc(y, t, p):
            #Logistic differential equation
            return p[0] * y[0] * (1 - y[0])

        times = np.arange(0.5, 5, 0.5)

        ode_model = DifferentialEquation(func=odefunc, times=times, n_states=1, n_theta=1, t0=0)
    """
    _itypes = [
        tt.TensorType(floatX, (False, )),  # y0 as 1D floatX vector
        tt.TensorType(floatX, (False, )),  # theta as 1D floatX vector
    ]
    _otypes = [
        tt.TensorType(
            floatX, (False, False)),  # model states as floatX of shape (T, S)
        tt.TensorType(
            floatX, (False, False, False)
        ),  # sensitivities as floatX of shape (T, S, len(y0) + len(theta))
    ]
    __props__ = ("func", "times", "n_states", "n_theta", "t0")

    def __init__(self, func, times, *, n_states, n_theta, t0=0):
        if not callable(func):
            raise ValueError("Argument func must be callable.")
        if n_states < 1:
            raise ValueError("Argument n_states must be at least 1.")
        if n_theta <= 0:
            raise ValueError("Argument n_theta must be positive.")

        # Public
        self.func = func
        self.t0 = t0
        self.times = tuple(times)
        self.n_times = len(times)
        self.n_states = n_states
        self.n_theta = n_theta
        self.n_p = n_states + n_theta

        # Private
        self._augmented_times = np.insert(times, 0, t0).astype(floatX)
        self._augmented_func = utils.augment_system(func, self.n_states,
                                                    self.n_theta)
        self._sens_ic = utils.make_sens_ic(self.n_states, self.n_theta, floatX)

        # Cache symbolic sensitivities by the hash of inputs
        self._apply_nodes = {}
        self._output_sensitivities = {}

    def _system(self, Y, t, p):
        r"""This is the function that will be passed to odeint. Solves both ODE and sensitivities.

        Parameters
        ----------
        Y : array
            augmented state vector (n_states + n_states + n_theta)
        t : float
            current time
        p : array
            parameter vector (y0, theta)
        """
        dydt, ddt_dydp = self._augmented_func(Y[:self.n_states], t, p,
                                              Y[self.n_states:])
        derivatives = np.concatenate([dydt, ddt_dydp])
        return derivatives

    def _simulate(self, y0, theta):
        # Initial condition comprised of state initial conditions and raveled sensitivity matrix
        s0 = np.concatenate([y0, self._sens_ic])

        # perform the integration
        sol = scipy.integrate.odeint(func=self._system,
                                     y0=s0,
                                     t=self._augmented_times,
                                     args=(np.concatenate(
                                         [y0, theta]), )).astype(floatX)
        # The solution
        y = sol[1:, :self.n_states]

        # The sensitivities, reshaped to be a sequence of matrices
        sens = sol[1:, self.n_states:].reshape(self.n_times, self.n_states,
                                               self.n_p)

        return y, sens

    def make_node(self, y0, theta):
        inputs = (y0, theta)
        _log.debug("make_node for inputs {}".format(hash(inputs)))
        states = self._otypes[0]()
        sens = self._otypes[1]()

        # store symbolic output in dictionary such that it can be accessed in the grad method
        self._output_sensitivities[hash(inputs)] = sens
        return Apply(self, inputs, (states, sens))

    def __call__(self, y0, theta, return_sens=False, **kwargs):
        if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states:
            raise ShapeError("Length of y0 is wrong.",
                             actual=(len(y0), ),
                             expected=(self.n_states, ))
        if isinstance(theta, (list, tuple)) and not len(theta) == self.n_theta:
            raise ShapeError("Length of theta is wrong.",
                             actual=(len(theta), ),
                             expected=(self.n_theta, ))

        # convert inputs to tensors (and check their types)
        y0 = tt.cast(tt.unbroadcast(tt.as_tensor_variable(y0), 0), floatX)
        theta = tt.cast(tt.unbroadcast(tt.as_tensor_variable(theta), 0),
                        floatX)
        inputs = [y0, theta]
        for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)):
            if not input_val.type == itype:
                raise ValueError(
                    f"Input {i} of type {input_val.type} does not have the expected type of {itype}"
                )

        # use default implementation to prepare symbolic outputs (via make_node)
        states, sens = super().__call__(y0, theta, **kwargs)

        if theano.config.compute_test_value != "off":
            # compute test values from input test values
            test_states, test_sens = self._simulate(
                y0=get_test_value(y0), theta=get_test_value(theta))

            # check types of simulation result
            if not test_states.dtype == self._otypes[0].dtype:
                raise DtypeError(
                    "Simulated states have the wrong type.",
                    actual=test_states.dtype,
                    expected=self._otypes[0].dtype,
                )
            if not test_sens.dtype == self._otypes[1].dtype:
                raise DtypeError(
                    "Simulated sensitivities have the wrong type.",
                    actual=test_sens.dtype,
                    expected=self._otypes[1].dtype,
                )

            # check shapes of simulation result
            expected_states_shape = (self.n_times, self.n_states)
            expected_sens_shape = (self.n_times, self.n_states, self.n_p)
            if not test_states.shape == expected_states_shape:
                raise ShapeError(
                    "Simulated states have the wrong shape.",
                    test_states.shape,
                    expected_states_shape,
                )
            if not test_sens.shape == expected_sens_shape:
                raise ShapeError(
                    "Simulated sensitivities have the wrong shape.",
                    test_sens.shape,
                    expected_sens_shape,
                )

            # attach results as test values to the outputs
            states.tag.test_value = test_states
            sens.tag.test_value = test_sens

        if return_sens:
            return states, sens
        return states

    def perform(self, node, inputs_storage, output_storage):
        y0, theta = inputs_storage[0], inputs_storage[1]
        # simulate states and sensitivities in one forward pass
        output_storage[0][0], output_storage[1][0] = self._simulate(y0, theta)

    def infer_shape(self, fgraph, node, input_shapes):
        s_y0, s_theta = input_shapes
        output_shapes = [(self.n_times, self.n_states),
                         (self.n_times, self.n_states, self.n_p)]
        return output_shapes

    def grad(self, inputs, output_grads):
        _log.debug("grad w.r.t. inputs {}".format(hash(tuple(inputs))))

        # fetch symbolic sensitivity output node from cache
        ihash = hash(tuple(inputs))
        if ihash in self._output_sensitivities:
            sens = self._output_sensitivities[ihash]
        else:
            _log.debug("No cached sensitivities found!")
            _, sens = self.__call__(*inputs, return_sens=True)
        ograds = output_grads[0]

        # for each parameter, multiply sensitivities with the output gradient and sum the result
        # sens is (n_times, n_states, n_p)
        # ograds is (n_times, n_states)
        grads = [tt.sum(sens[:, :, p] * ograds) for p in range(self.n_p)]

        # return separate gradient tensors for y0 and theta inputs
        result = tt.stack(grads[:self.n_states]), tt.stack(
            grads[self.n_states:])
        return result
Exemple #20
0
positive_set_ratio = inputParamsConfigAll['positive_set_ratio']
dropout = inputParamsConfigAll['dropout']
nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse']
augmentationFlag = inputParamsConfigAll['augmentationFlag']


if nonlinearityToUse == 'relu':
    nonLinearity = lasagne.nonlinearities.rectify
elif nonlinearityToUse == 'tanh':
    nonLinearity = lasagne.nonlinearities.tanh
elif nonlinearityToUse == 'sigmoid':
    nonLinearity = lasagne.nonlinearities.sigmoid
else:
    raise Exception(
        'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!')
dtensor5 = T.TensorType('float32', (False,) * 5)
input_var = dtensor5('inputs')
target_var = T.ivector('targets')

inputParamsNetwork = dict(shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity)

##############################
##############################
# And load them again later on like this:
with np.load(pathSavedNetwork) as f:
    param_values = [f['arr_%d' % i] for i in range(len(f.files))]

#Reshape the FC layer of saved CNN into FCN form    
#param_values[4] = param_values[4].reshape((64,32,7,7,5))
W4_new = np.zeros((64,32,7,7,4)).astype('float32')
for i in range(0,param_values[4].shape[1]): #weights for each node in FC layer form the columns
Exemple #21
0
def main():

    print("Building network ...")
    # Note in Rocktaschel's paper he first used a linear layer to transform wordvector
    # into vector of size K_HIDDEN. I'm assuming that this is equivalent to update W.
    # Input layer for premise
    input_var_type = T.TensorType('int32', [False] * 2)
    var_name = "input"
    input_var_prem = input_var_type(var_name)
    input_var_hypo = input_var_type(var_name)

    l_in_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM),
                                          input_var=input_var_prem)
    # Mask layer for premise
    l_mask_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM))
    # Input layer for hypothesis
    l_in_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO),
                                          input_var=input_var_hypo)
    # Mask layer for hypothesis
    l_mask_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO))

    # Word embedding layers
    l_in_prem_hypo = lasagne.layers.ConcatLayer([l_in_prem, l_in_hypo], axis=1)
    l_in_embedding = lasagne.layers.EmbeddingLayer(l_in_prem_hypo,
                                                   VOCAB_SIZE,
                                                   WORD_VECTOR_SIZE,
                                                   W=word_vector_init,
                                                   name='EmbeddingLayer')
    l_in_embedding_dropout = lasagne.layers.DropoutLayer(l_in_embedding,
                                                         p=DROPOUT_RATE,
                                                         rescale=True)
    l_in_prem_embedding = lasagne.layers.SliceLayer(l_in_embedding_dropout,
                                                    slice(0, MAX_LENGTH_PREM),
                                                    axis=1)
    l_in_hypo_embedding = lasagne.layers.SliceLayer(
        l_in_embedding,
        slice(MAX_LENGTH_PREM, MAX_LENGTH_PREM + MAX_LENGTH_HYPO),
        axis=1)

    # LSTM layer for premise
    l_lstm_prem = lasagne.layers.LSTMLayer_withCellOut(
        l_in_prem_embedding,
        K_HIDDEN,
        peepholes=False,
        grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh,
        mask_input=l_mask_prem,
        only_return_final=False)

    # The slicelayer extracts the cell output of the premise sentence
    l_lstm_prem_out = lasagne.layers.SliceLayer(l_lstm_prem, -1, axis=1)

    # LSTM layer for hypothesis
    # LSTM for premise and LSTM for hypothesis have different parameters
    l_lstm_hypo = lasagne.layers.LSTMLayer(
        l_in_hypo_embedding,
        K_HIDDEN,
        peepholes=False,
        grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh,
        cell_init=l_lstm_prem_out,
        mask_input=l_mask_hypo)

    # Isolate the last hidden unit output
    l_hypo_out = lasagne.layers.SliceLayer(l_lstm_hypo, -1, axis=1)

    # Attention layer
    l_attention = lasagne.layers.AttentionLayer([l_lstm_prem, l_lstm_hypo],
                                                K_HIDDEN,
                                                mask_input=l_mask_prem)
    l_attention_dropout = lasagne.layers.DropoutLayer(l_attention,
                                                      p=DROPOUT_RATE,
                                                      rescale=True)
    # A softmax layer create probability distribution of the prediction
    l_out = lasagne.layers.DenseLayer(
        l_attention_dropout,
        num_units=NUM_LABELS,
        W=lasagne.init.Normal(),
        nonlinearity=lasagne.nonlinearities.softmax)

    # The output of the net
    network_output_train = lasagne.layers.get_output(l_out,
                                                     deterministic=False)
    network_output_test = lasagne.layers.get_output(l_out, deterministic=True)

    # Theano tensor for the targets
    target_values = T.ivector('target_output')

    # The loss function is calculated as the mean of the cross-entropy
    cost = lasagne.objectives.categorical_crossentropy(network_output_train,
                                                       target_values).mean()
    from lasagne.regularization import l2, regularize_layer_params
    l2_penalty = regularize_layer_params(l_out, l2) * REGU
    cost = cost + l2_penalty
    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_out)

    # Compute ADAM updates for training
    print("Computing updates ...")
    # updates = lasagne.updates.adam(cost, all_params, learning_rate=LEARNING_RATE, beta1=0.9, beta2=0.999, epsilon=1e-08)
    updates = lasagne.updates.adam(cost,
                                   all_params,
                                   masks=[('EmbeddingLayer.W',
                                           embedding_w_mask)],
                                   learning_rate=LEARNING_RATE,
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-08)
    """
    # Test
    test_prediction = lasagne.layers.get_output(l_out, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_values).mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                    dtype=theano.config.floatX)
    """

    # Theano functions for training and computing cost
    train_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1),
                            target_values),
                       dtype=theano.config.floatX)
    print("Compiling functions ...")
    train = theano.function([
        l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var,
        l_mask_hypo.input_var, target_values
    ], [cost, train_acc],
                            updates=updates,
                            allow_input_downcast=True)

    # Theano function computing the validation loss and accuracy
    val_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1),
                          target_values),
                     dtype=theano.config.floatX)
    validate = theano.function([
        l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var,
        l_mask_hypo.input_var, target_values
    ], [cost, val_acc],
                               allow_input_downcast=True)

    print("Training ...")
    import sys
    sys.stdout.flush()
    try:
        for epoch in range(NUM_EPOCHS):
            n = 0
            avg_cost = 0.0
            count = 0
            sub_epoch = 0
            train_acc = 0
            while n < TRAIN_SIZE:
                X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data(
                    n, data_train)
                """
                print(X_prem.shape)
                print(X_prem_mask.shape)
                print(X_hypo.shape)
                print(X_hypo_mask.shape)
                """
                err, acc = train(X_prem, X_prem_mask, X_hypo, X_hypo_mask, y)
                avg_cost += err
                train_acc += acc
                n += BATCH_SIZE
                count += 1

                if (n / BATCH_SIZE) % (TRAIN_SIZE / BATCH_SIZE / 5) == 0:
                    sub_epoch += 1
                    avg_cost /= count
                    print(
                        "Sub epoch {} average loss = {}, accuracy = {}".format(
                            sub_epoch, avg_cost, train_acc / count * 100))
                    avg_cost = 0
                    count = 0
                    train_acc = 0

                    # Calculate validation accuracy
                    m = 0
                    val_err = 0
                    val_acc = 0
                    val_batches = 0
                    while m < VAL_SIZE:
                        X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data(
                            m, data_val)
                        err, acc = validate(X_prem, X_prem_mask, X_hypo,
                                            X_hypo_mask, y)
                        val_err += err
                        val_acc += acc
                        val_batches += 1
                        m += BATCH_SIZE

                    print("  validation loss:\t\t{:.6f}".format(val_err /
                                                                val_batches))
                    print("  validation accuracy:\t\t{:.2f} %".format(
                        val_acc / val_batches * 100))
                    sys.stdout.flush()

    except KeyboardInterrupt:
        pass
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import theano
from theano import tensor as T
#tensor3 =T.Tensortype(broadcastable=(False, False, False),dtype='float32')
#x =tensor3()
dtype = 'float32'

ndim = 1
broadcast = (False, ) * ndim
name = None

x = T.TensorType(dtype, broadcast)(name)
Exemple #23
0
def CreateLungInteriorMaskFCN(inputParamsConfigLocal, inputParamsLungInteriorMaskFCN):     
#    pathSavedNetwork = '/home/apezeshk/Codes/DeepMed/models/cnn_36368_20160921114711.npz'
#    pathSavedSamples = '/home/apezeshk/Codes/DeepMed/models/cnn_36368_20160921114711_samples.npz'  
    #currentCaseName = 'p0012_20000101_s3000561.npy'
    currentCaseName = inputParamsLungInteriorMaskFCN['currentCaseName'] #sth like p0012_20000101_s3000561.npy
    #input_3D_npy = inputParamsLungInteriorMaskFCN['input_3D_npy']
    masterFolderLidc = inputParamsLungInteriorMaskFCN['masterFolderLidc']
    masterFolderLungInterior = inputParamsLungInteriorMaskFCN['masterFolderLungInterior']
    cutPointFlag = inputParamsLungInteriorMaskFCN['cutPointFlag']
    z_depth = inputParamsLungInteriorMaskFCN['z_depth']
    fcnLayerFilterSize = inputParamsLungInteriorMaskFCN['fcnLayerFilterSize']
    #tagNoduleMaskFlag = inputParamsLungInteriorMaskFCN['tagNoduleMaskFlag'] #we don't need to tag anything for lung interior
    #remapFlag = inputParamsLungInteriorMaskFCN['remapFlag']
    # Further below where the fully connected layer filter is being constructed, the
    # way it is defined expects dimensions 1,2 of fcnLayerFilterSize to be odd, and dimension 3 even;
    # If you have to pass fcnLayerFilterSize that doesn't fit this, then you should write a separate definition for 
    # how that filter is defined. Idea being that ideally you want to define a delta function to convolve, but when
    # that is not possible due to even dimension, you will have to think of something else!
    if (np.mod(fcnLayerFilterSize[0], 2) != 1) or (np.mod(fcnLayerFilterSize[1], 2) != 1) or (np.mod(fcnLayerFilterSize[2], 2) != 0):
        raise ValueError('MaskFCN>>CreateNoduleMaskFCN>>fcnLayerFilterSize: expected dimensions 1,2 to be odd, dimension 3 to be even!')
#    input_3D_npy = '/diskStation/LIDC/LIDC_NUMPY_3d'
#    masterFolderLidc = '/raida/apezeshk/lung_dicom_dir'
    ########################
    ######Input Params######
#    inputParamsConfigLocal = {}
#    inputParamsConfigLocal['input_shape'] = '36, 36, 8'
#    inputParamsConfigLocal['learning_rate'] = '0.05'
#    inputParamsConfigLocal['momentum'] = '0.9'
#    inputParamsConfigLocal['num_epochs'] = '1'
#    inputParamsConfigLocal['batch_size'] = '1'
#    inputParamsConfigLocal['data_path'] = '/diskStation/LIDC/36368/'
#    inputParamsConfigLocal['train_set_size'] = '60000'
#    inputParamsConfigLocal['test_set_size'] = '500'
#    inputParamsConfigLocal['positive_set_ratio'] = '0.3'
#    inputParamsConfigLocal['dropout'] = '0.1'
#    inputParamsConfigLocal['nonlinearityToUse'] = 'relu'
#    inputParamsConfigLocal['numberOfLayers'] = 3
#    inputParamsConfigLocal['augmentationFlag'] = 1
#    inputParamsConfigLocal['weightInitToUse'] ='He' #weight initialization; either 'normal' or 'He' (for HeNormal)
#    inputParamsConfigLocal['lrDecayFlag'] = 1 #1 for using learning rate decay, 0 for constant learning rate throughout training
#    inputParamsConfigLocal['biasInitVal'] = 0.0 #1 for using learning rate decay, 0 for constant learning rate throughout training
    
    
    inputParamsConfigAll = inputParamsConfigLocal
    input_shape = inputParamsConfigAll['input_shape']
    #learning_rate = inputParamsConfigAll['learning_rate']
    #momentum = inputParamsConfigAll['momentum']
    #num_epochs = inputParamsConfigAll['num_epochs']
    #batch_size = inputParamsConfigAll['batch_size']
    #data_path = inputParamsConfigAll['data_path']
    #train_set_size = inputParamsConfigAll['train_set_size']
    #test_set_size = inputParamsConfigAll['test_set_size']
    #positive_set_ratio = inputParamsConfigAll['positive_set_ratio']
    dropout = inputParamsConfigAll['dropout']
    #nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse']
    #augmentationFlag = inputParamsConfigAll['augmentationFlag']
    numberOfLayers = inputParamsConfigAll['numberOfLayers']
    biasInitVal = inputParamsConfigAll['biasInitVal']
    
    weight_init = lasagne.init.Normal() #we now use He, but since everything is being loaded later this is ok!!
    biasInit = lasagne.init.Constant(biasInitVal) #for relu use biasInit=1 s.t. inputs to relu are positive in beginning
    
    nonLinearity = lasagne.nonlinearities.linear #use linear since u just want propagation of mask thru model
    inputParamsNetwork = dict(n_layer=numberOfLayers, shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity,
                              biasInit = biasInit)
                              
    dtensor5 = T.TensorType('float32', (False,) * 5)
    input_var = dtensor5('inputs')
    network_fcn_mask = Build_3dfcn_mask(weight_init, inputParamsNetwork, fcnLayerFilterSize, input_var)
    param_values_fcn_default = lasagne.layers.get_all_param_values(network_fcn_mask) #just so to get the fully connected dimension
    ######Input Params######
    ########################
    
    
        
    #with np.load(pathSavedNetwork) as f:
    #    param_values_fullnetwork = [f['arr_%d' % i] for i in range(len(f.files))]
    
    
    W0 = np.ones((1,1,1,1,1)).astype('float32')
    b0 = np.zeros((1,)).astype('float32')
    
    W2 = np.ones((1,1,1,1,1)).astype('float32')
    b2 = np.zeros((1,)).astype('float32')
    
    if numberOfLayers == 2:    
        W4 = np.zeros(np.shape(param_values_fcn_default[4])[2:]).astype('float32') #get the filter shape of first fully connected layer in original network
        current_filt_shape = W4.shape
            
        W4[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0)-1)] = 1
        W4[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0))] = 1
        W4 = W4 * 0.5 #this is so that the output range will not change (since instead of delta fn, 2 entries are equal to 1)
        W4 = np.reshape(W4, (1,1,current_filt_shape[0],current_filt_shape[1],current_filt_shape[2])) #make it 5-tuple
        b4 = np.zeros((1,)).astype('float32')
        
        W6 = np.ones((1,1,1,1,1)).astype('float32')            
        b6 = np.zeros((1,)).astype('float32')
        param_values_mask = []
        param_values_mask.extend([W0, b0, W2, b2, W4, b4, W6, b6])      
            
    elif numberOfLayers == 3:
        W4 = np.ones((1,1,1,1,1)).astype('float32')
        b4 = np.zeros((1,)).astype('float32')
        
        W6 = np.zeros(np.shape(param_values_fcn_default[6])[2:]).astype('float32') #get the filter shape of first fully connected layer in original network
        current_filt_shape = W6.shape
        
        # When fully connected layer has even size in z direction, e.g. (9,9,4), we can't have a delta function as filter
        # So using a filter with same size, with two 0.5s in it in 2nd and 3rd indices as next best thing!   
        W6[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0)-1)] = 1
        W6[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0))] = 1
        W6 = W6 * 0.5 #this is so that the output range will not change (since instead of delta fn, 2 entries are equal to 1)
        W6 = np.reshape(W6, (1,1,current_filt_shape[0],current_filt_shape[1],current_filt_shape[2])) #make it 5-tuple
        b6 = np.zeros((1,)).astype('float32')      
        
        W8 = np.ones((1,1,1,1,1)).astype('float32')
        b8 = np.zeros((1,)).astype('float32')      
        param_values_mask = []
        param_values_mask.extend([W0, b0, W2, b2, W4, b4, W6, b6, W8, b8])
          
    
    lasagne.layers.set_all_param_values(network_fcn_mask, param_values_mask) #load the model with the weights/biases
    
    mask_prediction = lasagne.layers.get_output(network_fcn_mask, deterministic=True)
    val_fn = theano.function([input_var], [mask_prediction])  # ,mode='DebugMode')
    
    
    ################################################################################
    ######Now load the lung interior mask, and shove it into the network
    ################################################################################
    
    #full_volume_path=os.path.join(input_3D_npy, currentCaseName)
    
    full_mask_path = os.path.join(masterFolderLungInterior, Path_create(currentCaseName)[:-4])
    mat_name = 'lungInterior_' + currentCaseName[:-4] + '.mat'
    if os.path.isfile(os.path.join(full_mask_path, mat_name)):
        lungInteriorData = sio.loadmat(os.path.join(full_mask_path, mat_name))  
        full_mask = lungInteriorData['currentFullVolBin'] #this returns uint8
    else: #read the corresponding unique mask, s.t. you will have proper size fake mask
        uniqueMask_path = os.path.join(masterFolderLidc, Path_create(currentCaseName)[:-4])
        tmp_name = 'uniqueStats_' + currentCaseName[:-4] + '.mat'
        uniqueStatsData = sio.loadmat(os.path.join(uniqueMask_path, tmp_name))    
        unique_mask = uniqueStatsData['allMaxRadiologistMsk'] #this returns uint8
        full_mask = np.zeros(np.shape(unique_mask))           
    
    #MAKE SURE THE TYPE FOR LUNG INTERIOR MASK IS RIGHT IN BELOW; DO U HAVE TO CONVERT TO INT16 THEN FLOAT32?!!    
    currentMask = full_mask.astype('float32')           
#    chopVolumeFlag = 1
#    cutPointFlag = 1
#    z_depth = 8           
    sub_vol_one = []
          
    currentMask = currentMask.reshape((1, 1, 512, 512, currentMask.shape[2]))
    if cutPointFlag == 1:
        xCutPoints = [0, 512]
        yCutPoints = [0, 512]
        tmpFlag = 0
        zCutPoints = [0]
        zStep = 80
        while tmpFlag != 7321:  # to make the loop end, set tmpFlag=7321; otherwise hold prev slice number in it
            currentZCut = tmpFlag + zStep
            if currentZCut > currentMask.shape[4]:
                currentZCut = currentMask.shape[4]
                zCutPoints.append(currentZCut)
                tmpFlag = 7321
            else:
                tmpFlag = currentZCut - z_depth  # this is amount of overlap between consecutive chops in z direction
                zCutPoints.append(currentZCut)
                zCutPoints.append(tmpFlag)
#    z_size=[]
#    x_size=[]
#    y_size=[]
#    first_cube_flag=0
#    vol_scores_currentVol = np.empty((0, 2))
#    score_mat=np.zeros(())
#    vol_labels_currentVol = []
    #this part is for the cases that last two slices should be changed if you we wanna to FCN( they got small z
    # we take from one cube by 20 and add it to another cube
    if (zCutPoints[-1]-zCutPoints[-2])<=16:
        zCutPoints[-3]=zCutPoints[-3]-20
        zCutPoints[-2] = zCutPoints[-2] - 20
    
    for i in range(0, len(xCutPoints) / 2):
        for j in range(0, len(yCutPoints) / 2):
            for k in range(0, len(zCutPoints) / 2):
                xStart = xCutPoints[2 * i]
                xEnd = xCutPoints[2 * i + 1]
                yStart = yCutPoints[2 * j]
                yEnd = yCutPoints[2 * j + 1]
                zStart = zCutPoints[2 * k]
                zEnd = zCutPoints[2 * k + 1]
                print(xStart, xEnd - 1, yStart, yEnd - 1, zStart, zEnd - 1)
                asd = currentMask[0, 0, xStart:xEnd, yStart:yEnd, zStart:zEnd]
                asd = asd.reshape((1, 1, asd.shape[0], asd.shape[1], asd.shape[2])) #put subvolume in 5D form for input to FCN
                test_pred_full_mask = val_fn(asd)
                test_pred_full_mask = test_pred_full_mask[0]
    #            test_pred_full_mask_softmax0 = np.exp(test_pred_full_mask[0, 0, :, :, :]) / (
    #            np.exp(test_pred_full_mask[0, 0, :, :, :]) + np.exp(test_pred_full_mask[0, 1, :, :, :]))
    #            test_pred_full_mask_softmax1 = np.exp(test_pred_full_mask[0, 1, :, :, :]) / (
    #            np.exp(test_pred_full_mask[0, 0, :, :, :]) + np.exp(test_pred_full_mask[0, 1, :, :, :]))
    
                #tmp_sub_vol=test_pred_full_mask_softmax1
                tmp_sub_vol = test_pred_full_mask.squeeze() #go from e.g. (1,1,120,120,25) to (120,120,25)
    
                if xStart==xCutPoints[0] and yStart==yCutPoints[0]:
                    #NOTE: when u split the volume N times, the difference in size due to 0 padding in last layer
                    # is repeated N times also! So whereas if u passed the entire volume with first fully connected
                    # layer (converted to fully convolutional) of size (9,9,4) you would get -4+1=-3 as many slices,
                    # if you split the volume in 2 and pass each subvolume, you get another round of -3 slices in 
                    # the end!!!
                    try:#This part adds the sub volumes back to back and overwrites the bad slice with the correct one
                        sub_vol_one=np.concatenate((sub_vol_one[:,:,:-2],tmp_sub_vol[:,:,3:]),axis=2) #I set the concatination margin to 2 since we have a one max pool for Z and last 2 slices are not correctly convolved
                    except:
                        sub_vol_one=tmp_sub_vol
                        
    
    sub_vol_one_fin = (sub_vol_one>0.0).astype('int') #convert to binary; it originally has 0.5 values due to z direction elongation in fully connected layer filter
    
        
    return sub_vol_one_fin
Exemple #24
0
# the same with above. broadcastable pattern indicats dimension of the variable.
# True means the length of the axis for that dimension is 1.
# empty list is a special case to mean scalar.
# pattern 	interpretation
# [] 	scalar
# [True] 	1D scalar (vector of length 1)
# [True, True] 	2D scalar (1x1 matrix)
# [False] 	vector
# [False, False] 	matrix
# [False] * n 	nD tensor
# [True, False] 	row (1xN matrix)
# [False, True] 	column (Mx1 matrix)
# [False, True, False] 	A Mx1xP tensor (a)
# [True, False, False] 	A 1xNxP tensor (b)
# [False, False, False] 	A MxNxP tensor (pattern of a + b)
x = T.TensorType(dtype='int32', broadcastable=())('myvar')

# config dependent float type (config.floatX is float 64 by default on x86_64)
x = T.scalar(name='x', dtype=T.config.floatX)
report(x)

# 1-dimensional vector (ndarray).
v = T.vector(dtype=T.config.floatX, name='v')
report(v)

# 2-dimensional ndarray in which the number of rows is guaranteed to be 1.
v = T.row(name=None, dtype=T.config.floatX)
report(v)

# 2-dimensional ndarray in which the number of columns is guaranteed to be 1.
v = T.col(name=None, dtype=T.config.floatX)
Exemple #25
0
 def make_node(self, *inputs):
     inputs = [tt.as_tensor_variable(i) for i in inputs]
     outputs = [tt.TensorType(inputs[0].dtype, (False, False))()]
     return gof.Apply(self, inputs, outputs)
Exemple #26
0
from theano import tensor as T
from theano.sandbox.neighbours import images2neibs

X = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')()
Y = images2neibs(X, (2, 2))
W = T.matrix()
Z = T.dot(Y, W)
cost = Z.sum()
T.grad(cost, W)
    def createNetwork(self, networkName, folderName, cnnLayers, kernel_Shapes,
                      intermediate_ConnectedLayers, n_classes,
                      sampleSize_Train, sampleSize_Test, batch_Size,
                      applyBatchNorm, numberEpochToApplyBatchNorm,
                      activationType, dropout_Rates, pooling_Params,
                      weights_Initialization_CNN, weights_Initialization_FCN,
                      weightsFolderName, weightsTrainedIdx, softmax_Temp):

        # ============= Model Parameters Passed as arguments ================
        # Assign parameters:
        self.networkName = networkName
        self.folderName = folderName
        self.cnnLayers = cnnLayers
        self.n_classes = n_classes
        self.kernel_Shapes = kernel_Shapes
        self.intermediate_ConnectedLayers = intermediate_ConnectedLayers
        self.pooling_scales = pooling_Params
        self.dropout_Rates = dropout_Rates
        self.activationType = activationType
        self.weight_Initialization_CNN = weights_Initialization_CNN
        self.weight_Initialization_FCN = weights_Initialization_FCN
        self.weightsFolderName = weightsFolderName
        self.weightsTrainedIdx = weightsTrainedIdx
        self.batch_Size = batch_Size
        self.sampleSize_Train = sampleSize_Train
        self.sampleSize_Test = sampleSize_Test
        self.applyBatchNorm = applyBatchNorm
        self.numberEpochToApplyBatchNorm = numberEpochToApplyBatchNorm
        self.softmax_Temp = softmax_Temp

        # Compute the CNN receptive field
        stride = 1
        self.receptiveField = computeReceptiveField(self.kernel_Shapes, stride)

        # --- Size of Image samples ---
        self.sampleSize_Train = sampleSize_Train
        self.sampleSize_Test = sampleSize_Test

        ## --- Batch Size ---
        self.batch_Size = batch_Size

        # ======== Calculated Attributes =========
        self.centralVoxelsTrain = getCentralVoxels(self.sampleSize_Train,
                                                   self.receptiveField)
        self.centralVoxelsTest = getCentralVoxels(self.sampleSize_Test,
                                                  self.receptiveField)

        #==============================
        rng = numpy.random.RandomState(23455)

        # Transfer to LIVIA NET
        self.sampleSize_Train = sampleSize_Train
        self.sampleSize_Test = sampleSize_Test

        # --------- Now we build the model -------- #

        print("...[STATUS]: Building the Network model...")

        # Define the symbolic variables used as input of the CNN
        # start-snippet-1
        # Define tensor5
        tensor5 = T.TensorType(dtype='float32',
                               broadcastable=(False, False, False, False,
                                              False))
        self.inputNetwork_Train = tensor5()
        self.inputNetwork_Test = tensor5()
        self.inputNetwork_Train_Bottom = tensor5()
        self.inputNetwork_Test_Bottom = tensor5()

        # Define input shapes to the netwrok
        inputSampleShape_Train = (self.batch_Size, 1, self.sampleSize_Train[0],
                                  self.sampleSize_Train[1],
                                  self.sampleSize_Train[2])
        inputSampleShape_Test = (self.batch_Size, 1, self.sampleSize_Test[0],
                                 self.sampleSize_Test[1],
                                 self.sampleSize_Test[2])

        print(" - Shape of input subvolume (Training): {}".format(
            inputSampleShape_Train))
        print(" - Shape of input subvolume (Testing): {}".format(
            inputSampleShape_Test))

        inputSample_Train = self.inputNetwork_Train
        inputSample_Test = self.inputNetwork_Test

        inputSample_Train_Bottom = self.inputNetwork_Train_Bottom
        inputSample_Test_Bottom = self.inputNetwork_Test_Bottom

        # TODO change cnnLayers name by networkLayers
        self.generateNetworkLayers(cnnLayers, kernel_Shapes,
                                   self.pooling_scales, inputSampleShape_Train,
                                   inputSampleShape_Test, inputSample_Train,
                                   inputSample_Train_Bottom, inputSample_Test,
                                   inputSample_Test_Bottom,
                                   intermediate_ConnectedLayers)
Exemple #28
0
def train_gan(
        separate_funcs=False,
        D_training_repeats=1,
        G_learning_rate_max=0.0010,
        D_learning_rate_max=0.0010,
        G_smoothing=0.999,
        adam_beta1=0.0,
        adam_beta2=0.99,
        adam_epsilon=1e-8,
        minibatch_default=16,
        minibatch_overrides={},
        rampup_kimg=40 / speed_factor,
        rampdown_kimg=0,
        lod_initial_resolution=4,
        lod_training_kimg=400 / speed_factor,
        lod_transition_kimg=400 / speed_factor,
        #lod_training_kimg       = 40,
        #lod_transition_kimg     = 40,
        total_kimg=10000 / speed_factor,
        dequantize_reals=False,
        gdrop_beta=0.9,
        gdrop_lim=0.5,
        gdrop_coef=0.2,
        gdrop_exp=2.0,
        drange_net=[-1, 1],
        drange_viz=[-1, 1],
        image_grid_size=None,
        #tick_kimg_default       = 1,
        tick_kimg_default=50 / speed_factor,
        tick_kimg_overrides={
            32: 20,
            64: 10,
            128: 10,
            256: 5,
            512: 2,
            1024: 1
        },
        image_snapshot_ticks=4,
        network_snapshot_ticks=40,
        image_grid_type='default',
        #resume_network_pkl      = '006-celeb128-progressive-growing/network-snapshot-002009.pkl',
        resume_network_pkl=None,
        resume_kimg=0,
        resume_time=0.0):

    # Load dataset and build networks.
    training_set, drange_orig = load_dataset()
    # training_set是dataset模块解析h5之后的对象,
    # drange_orig 为training_set.get_dynamic_range()

    if resume_network_pkl:
        print 'Resuming', resume_network_pkl
        G, D, _ = misc.load_pkl(
            os.path.join(config.result_dir, resume_network_pkl))
    else:
        G = network.Network(num_channels=training_set.shape[1],
                            resolution=training_set.shape[2],
                            label_size=training_set.labels.shape[1],
                            **config.G)
        D = network.Network(num_channels=training_set.shape[1],
                            resolution=training_set.shape[2],
                            label_size=training_set.labels.shape[1],
                            **config.D)
    Gs = G.create_temporally_smoothed_version(beta=G_smoothing,
                                              explicit_updates=True)

    # G,D对象可以由misc解析pkl之后生成,也可以由network模块构造

    misc.print_network_topology_info(G.output_layers)
    misc.print_network_topology_info(D.output_layers)

    # Setup snapshot image grid.
    # 设置中途输出图片的格式
    if image_grid_type == 'default':
        if image_grid_size is None:
            w, h = G.output_shape[3], G.output_shape[2]
            image_grid_size = np.clip(1920 / w, 3,
                                      16), np.clip(1080 / h, 2, 16)
        example_real_images, snapshot_fake_labels = training_set.get_random_minibatch(
            np.prod(image_grid_size), labels=True)
        snapshot_fake_latents = random_latents(np.prod(image_grid_size),
                                               G.input_shape)
    else:
        raise ValueError('Invalid image_grid_type', image_grid_type)

    # Theano input variables and compile generation func.
    print 'Setting up Theano...'
    real_images_var = T.TensorType('float32', [False] *
                                   len(D.input_shape))('real_images_var')
    # <class 'theano.tensor.var.TensorVariable'>
    # print type(real_images_var),real_images_var
    real_labels_var = T.TensorType(
        'float32', [False] * len(training_set.labels.shape))('real_labels_var')
    fake_latents_var = T.TensorType('float32', [False] *
                                    len(G.input_shape))('fake_latents_var')
    fake_labels_var = T.TensorType(
        'float32', [False] * len(training_set.labels.shape))('fake_labels_var')
    # 带有_var的均为输入张量
    G_lrate = theano.shared(np.float32(0.0))
    D_lrate = theano.shared(np.float32(0.0))
    # share语法就是用来设定默认值的,返回复制的对象
    gen_fn = theano.function([fake_latents_var, fake_labels_var],
                             Gs.eval_nd(fake_latents_var,
                                        fake_labels_var,
                                        ignore_unused_inputs=True),
                             on_unused_input='ignore')

    # gen_fn 是一个函数,输入为:[fake_latents_var, fake_labels_var],
    #                  输出位:Gs.eval_nd(fake_latents_var, fake_labels_var, ignore_unused_inputs=True),
    '''
    def function(inputs, 
                outputs=None, 
                mode=None, 
                updates=None, 
                givens=None, 
                no_default_updates=False, 
                accept_inplace=False, 
                name=None, 
                rebuild_strict=True, 
                allow_input_downcast=None, 
                profile=None, 
                on_unused_input=None)
    '''

    #生成函数

    # Misc init.
    #读入当前分辨率
    resolution_log2 = int(np.round(np.log2(G.output_shape[2])))
    #lod 精细度
    initial_lod = max(
        resolution_log2 - int(np.round(np.log2(lod_initial_resolution))), 0)
    cur_lod = 0.0
    min_lod, max_lod = -1.0, -2.0
    fake_score_avg = 0.0

    # Save example images.
    snapshot_fake_images = gen_fn(snapshot_fake_latents, snapshot_fake_labels)
    result_subdir = misc.create_result_subdir(config.result_dir,
                                              config.run_desc)
    misc.save_image_grid(example_real_images,
                         os.path.join(result_subdir, 'reals.png'),
                         drange=drange_orig,
                         grid_size=image_grid_size)
    misc.save_image_grid(snapshot_fake_images,
                         os.path.join(result_subdir, 'fakes%06d.png' % 0),
                         drange=drange_viz,
                         grid_size=image_grid_size)

    # Training loop.
    # 这里才是主训练入口
    # 注意在训练过程中不会跳出最外层while循环,因此更换分辨率等操作必然在while循环里

    #现有图片数
    cur_nimg = int(resume_kimg * 1000)
    cur_tick = 0

    tick_start_nimg = cur_nimg
    tick_start_time = time.time()
    tick_train_out = []
    train_start_time = tick_start_time - resume_time
    while cur_nimg < total_kimg * 1000:

        # Calculate current LOD.
        #计算当前精细度
        cur_lod = initial_lod
        if lod_training_kimg or lod_transition_kimg:
            tlod = (cur_nimg / (1000.0 / speed_factor)) / (lod_training_kimg +
                                                           lod_transition_kimg)
            cur_lod -= np.floor(tlod)
            if lod_transition_kimg:
                cur_lod -= max(
                    1.0 + (np.fmod(tlod, 1.0) - 1.0) *
                    (lod_training_kimg + lod_transition_kimg) /
                    lod_transition_kimg, 0.0)
            cur_lod = max(cur_lod, 0.0)

        # Look up resolution-dependent parameters.
        cur_res = 2**(resolution_log2 - int(np.floor(cur_lod)))
        # 当前分辨率
        minibatch_size = minibatch_overrides.get(cur_res, minibatch_default)
        tick_duration_kimg = tick_kimg_overrides.get(cur_res,
                                                     tick_kimg_default)

        # Update network config.
        # 更新网络结构
        lrate_coef = misc.rampup(cur_nimg / 1000.0, rampup_kimg)
        lrate_coef *= misc.rampdown_linear(cur_nimg / 1000.0, total_kimg,
                                           rampdown_kimg)
        G_lrate.set_value(np.float32(lrate_coef * G_learning_rate_max))
        D_lrate.set_value(np.float32(lrate_coef * D_learning_rate_max))
        if hasattr(G, 'cur_lod'): G.cur_lod.set_value(np.float32(cur_lod))
        if hasattr(D, 'cur_lod'): D.cur_lod.set_value(np.float32(cur_lod))

        # Setup training func for current LOD.
        new_min_lod, new_max_lod = int(np.floor(cur_lod)), int(
            np.ceil(cur_lod))

        #print " cur_lod%f\n  min_lod %f\n new_min_lod %f\n max_lod %f\n new_max_lod %f\n"%(cur_lod,min_lod,new_min_lod,max_lod,new_max_lod)

        if min_lod != new_min_lod or max_lod != new_max_lod:
            print 'Compiling training funcs...'
            min_lod, max_lod = new_min_lod, new_max_lod

            # Pre-process reals.
            real_images_expr = real_images_var
            if dequantize_reals:
                rnd = theano.sandbox.rng_mrg.MRG_RandomStreams(
                    lasagne.random.get_rng().randint(1, 2147462579))
                epsilon_noise = rnd.uniform(size=real_images_expr.shape,
                                            low=-0.5,
                                            high=0.5,
                                            dtype='float32')
                real_images_expr = T.cast(
                    real_images_expr, 'float32'
                ) + epsilon_noise  # match original implementation of Improved Wasserstein
            real_images_expr = misc.adjust_dynamic_range(
                real_images_expr, drange_orig, drange_net)
            if min_lod > 0:  # compensate for shrink_based_on_lod
                real_images_expr = T.extra_ops.repeat(real_images_expr,
                                                      2**min_lod,
                                                      axis=2)
                real_images_expr = T.extra_ops.repeat(real_images_expr,
                                                      2**min_lod,
                                                      axis=3)

            # Optimize loss.
            G_loss, D_loss, real_scores_out, fake_scores_out = evaluate_loss(
                G, D, min_lod, max_lod, real_images_expr, real_labels_var,
                fake_latents_var, fake_labels_var, **config.loss)
            G_updates = adam(G_loss,
                             G.trainable_params(),
                             learning_rate=G_lrate,
                             beta1=adam_beta1,
                             beta2=adam_beta2,
                             epsilon=adam_epsilon).items()

            D_updates = adam(D_loss,
                             D.trainable_params(),
                             learning_rate=D_lrate,
                             beta1=adam_beta1,
                             beta2=adam_beta2,
                             epsilon=adam_epsilon).items()

            D_train_fn = theano.function([
                real_images_var, real_labels_var, fake_latents_var,
                fake_labels_var
            ], [G_loss, D_loss, real_scores_out, fake_scores_out],
                                         updates=D_updates,
                                         on_unused_input='ignore')
            G_train_fn = theano.function([fake_latents_var, fake_labels_var],
                                         [],
                                         updates=G_updates + Gs.updates,
                                         on_unused_input='ignore')

        for idx in xrange(D_training_repeats):
            mb_reals, mb_labels = training_set.get_random_minibatch(
                minibatch_size,
                lod=cur_lod,
                shrink_based_on_lod=True,
                labels=True)
            mb_train_out = D_train_fn(
                mb_reals, mb_labels,
                random_latents(minibatch_size, G.input_shape),
                random_labels(minibatch_size, training_set))
            cur_nimg += minibatch_size
            tick_train_out.append(mb_train_out)
        G_train_fn(random_latents(minibatch_size, G.input_shape),
                   random_labels(minibatch_size, training_set))

        # Fade in D noise if we're close to becoming unstable
        fake_score_cur = np.clip(np.mean(mb_train_out[1]), 0.0, 1.0)
        fake_score_avg = fake_score_avg * gdrop_beta + fake_score_cur * (
            1.0 - gdrop_beta)
        gdrop_strength = gdrop_coef * (max(fake_score_avg - gdrop_lim, 0.0)**
                                       gdrop_exp)
        if hasattr(D, 'gdrop_strength'):
            D.gdrop_strength.set_value(np.float32(gdrop_strength))

        # Perform maintenance operations once per tick.
        if cur_nimg >= tick_start_nimg + tick_duration_kimg * 1000 or cur_nimg >= total_kimg * 1000:
            cur_tick += 1
            cur_time = time.time()
            tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
            tick_start_nimg = cur_nimg
            tick_time = cur_time - tick_start_time
            tick_start_time = cur_time
            tick_train_avg = tuple(
                np.mean(np.concatenate([np.asarray(v).flatten()
                                        for v in vals]))
                for vals in zip(*tick_train_out))
            tick_train_out = []

            # Print progress.
            print 'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-9.1f sec/kimg %-6.1f Dgdrop %-8.4f Gloss %-8.4f Dloss %-8.4f Dreal %-8.4f Dfake %-8.4f' % (
                (cur_tick, cur_nimg / 1000.0, cur_lod, minibatch_size,
                 misc.format_time(cur_time - train_start_time), tick_time,
                 tick_time / tick_kimg, gdrop_strength) + tick_train_avg)

            # Visualize generated images.
            if cur_tick % image_snapshot_ticks == 0 or cur_nimg >= total_kimg * 1000:
                snapshot_fake_images = gen_fn(snapshot_fake_latents,
                                              snapshot_fake_labels)
                misc.save_image_grid(snapshot_fake_images,
                                     os.path.join(
                                         result_subdir,
                                         'fakes%06d.png' % (cur_nimg / 1000)),
                                     drange=drange_viz,
                                     grid_size=image_grid_size)

            # Save network snapshot every N ticks.
            if cur_tick % network_snapshot_ticks == 0 or cur_nimg >= total_kimg * 1000:
                misc.save_pkl(
                    (G, D, Gs),
                    os.path.join(
                        result_subdir,
                        'network-snapshot-%06d.pkl' % (cur_nimg / 1000)))

    # Write final results.
    misc.save_pkl((G, D, Gs), os.path.join(result_subdir, 'network-final.pkl'))
    training_set.close()
    print 'Done.'
    with open(os.path.join(result_subdir, '_training-done.txt'), 'wt'):
        pass
Exemple #29
0
    # set those directories to something meaningful in your environment
    data_dir = "/home/valor/workspace/DLCV_ProtFun/data/full/processed_single_64/1A0H"
    grid_file = "/home/valor/workspace/DLCV_ProtFun/data/full/processed_single_64/1A0H/grid.memmap"

    # visualize the original grid
    test_grid = np.memmap(grid_file, mode='r', dtype=floatX).reshape(
        (1, 1, 64, 64, 64))
    log.debug(test_grid.shape)
    viewer = MoleculeView(data_dir=data_dir,
                          data={"density": test_grid[0, 0]},
                          info={"name": "test"})
    viewer.density3d()
    grid_side = test_grid.shape[3]

    # initialize the rotation layer
    input_grid = T.TensorType(floatX, (False, ) * 5)()
    input_layer = lasagne.layers.InputLayer(shape=(1, 1, grid_side, grid_side,
                                                   grid_side),
                                            input_var=input_grid)
    rotate_layer = GridRotationLayer(incoming=input_layer,
                                     grid_side=grid_side,
                                     n_channels=1,
                                     interpolation='nearest')

    # create a small function to test the rotation layer
    func = theano.function(inputs=[input_grid],
                           outputs=lasagne.layers.get_output(rotate_layer))

    # show 10 different rotations of the test grid
    import time
Exemple #30
0
    def create_theano_functions(self,
                                target_var,
                                deterministic_training=False):
        if target_var is None:
            if hasattr(self.dataset, 'get_dummy_y'):
                log.info("Use dataset-supplied dummy y to determine "
                         "shape and type of target variable")
                dummy_y = self.dataset.get_dummy_y()
                # tensor with as many dimensions as y
                target_type = T.TensorType(dtype=dummy_y.dtype,
                                           broadcastable=[False] *
                                           len(dummy_y.shape))
                target_var = target_type()
            else:
                log.info(
                    "Automatically determine size of target variable by example..."
                )
                # get a dummy batch and determine target size
                # use test set since it is smaller
                # maybe memory is freed quicker

                # prevent reloading at this step?
                was_reloadable = self.dataset.reloadable
                self.dataset.reloadable = False
                test_set = self.dataset_provider.get_train_valid_test(
                    self.dataset)['test']
                self.dataset.reloadable = was_reloadable
                batches = self.iterator.get_batches(test_set, shuffle=False)
                dummy_batch = batches.next()
                dummy_y = dummy_batch[1]
                del test_set
                # tensor with as many dimensions as y
                target_type = T.TensorType(dtype=dummy_y.dtype,
                                           broadcastable=[False] *
                                           len(dummy_y.shape))
                target_var = target_type()
                self.dataset.ensure_is_loaded()

        prediction = lasagne.layers.get_output(
            self.final_layer, deterministic=deterministic_training)

        # test as in during testing not as in "test set"
        test_prediction = lasagne.layers.get_output(self.final_layer,
                                                    deterministic=True)
        # Loss function might need layers or not...
        try:
            loss = self.loss_expression(prediction, target_var).mean()
            test_loss = self.loss_expression(test_prediction,
                                             target_var).mean()
        except TypeError:
            loss = self.loss_expression(prediction, target_var,
                                        self.final_layer).mean()
            test_loss = self.loss_expression(test_prediction, target_var,
                                             self.final_layer).mean()

        # create parameter update expressions
        params = lasagne.layers.get_all_params(self.final_layer,
                                               trainable=True)
        updates = self.updates_expression(loss, params)
        if self.updates_modifier is not None:
            # put norm constraints on all layer, for now fixed to max kernel norm
            # 2 and max col norm 0.5
            updates = self.updates_modifier.modify(updates, self.final_layer)
        input_var = lasagne.layers.get_all_layers(
            self.final_layer)[0].input_var
        # Store all parameters, including update params like adam params,
        # needed for resetting to best model after early stop
        # not sure why i am not only doing update params below
        # possibly because batch norm is not in update params?
        all_layer_params = lasagne.layers.get_all_params(self.final_layer)
        self.all_params = all_layer_params
        # now params from adam would still be missing... add them ...
        all_update_params = updates.keys()
        for param in all_update_params:
            if param not in self.all_params:
                self.all_params.append(param)

        self.train_func = theano.function([input_var, target_var],
                                          updates=updates)
        self.monitor_manager.create_theano_functions(input_var, target_var,
                                                     test_prediction,
                                                     test_loss)