예제 #1
0
    valid_y = valid_y.reshape((valid_y.shape[0], 1))
    test_y = test_y.reshape((test_y.shape[0], 1))

    input_dim = train_x.shape[1]

    if args.reinit:
        init_batch_size = 16
        init_batch = train_x[:size][-init_batch_size:]

    else:
        init_batch = None

    if args.model == 'BHN_MLPWN':
        model = MLPWeightNorm_BHN(lbda=lbda,
                                  perdatapoint=perdatapoint,
                                  srng=RandomStreams(seed=args.seed + 2000),
                                  prior=prior,
                                  coupling=coupling,
                                  n_hiddens=n_hiddens,
                                  n_units=n_units,
                                  input_dim=input_dim,
                                  flow=args.flow,
                                  init_batch=init_batch)
    elif args.model == 'MCdropout_MLP':
        model = MCdropout_MLP(n_hiddens=n_hiddens, n_units=n_units)
    else:
        raise Exception('no model named `{}`'.format(args.model))

    va_rec_name = name + '_recs'
    tr_rec_name = name + '_recs_train'  # TODO (we're already saving the valid_recs!)
    save_path = name + '.params.npy'
예제 #2
0
import cPickle
import numpy
try:
    import pylab
except ImportError:
    print(
        "pylab isn't available. If you use its functionality, it will crash.")
    print "It can be installed with 'pip install -q Pillow'"

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

#Don't use a python long as this don't work on 32 bits computers.
numpy.random.seed(0xbeef)
theano_rng = RandomStreams(seed=numpy.random.randint(1 << 30))
theano.config.warn.subtensor_merge_bug = False

from theano.compat.python2x import OrderedDict

signal_width = 1000


def load_fruitspeech(fruit_list=['apple', 'pineapple']):
    # Check if dataset is in the data directory.
    data_path = os.path.join(os.path.split(__file__)[0], "data")
    if not os.path.exists(data_path):
        os.makedirs(data_path)

    dataset = 'audio.tar.gz'
    data_file = os.path.join(data_path, dataset)
    def __init__(self,
                 input,
                 nvis,
                 nhid=None,
                 nvis_dec=None,
                 nhid_dec=None,
                 rnd=None,
                 bhid=None,
                 cost_type=CostType.MeanSquared,
                 momentum=1,
                 num_pieces=1,
                 L2_reg=-1,
                 L1_reg=-1,
                 sparse_initialize=False,
                 nonlinearity=NonLinearity.TANH,
                 W=None,
                 b=None,
                 bvis=None,
                 tied_weights=True,
                 reverse=False):

        assert reverse is False
        self.input = input
        self.nvis = nvis
        self.nhid = nhid
        self.bhid = bhid
        self.bvis = bvis
        self.momentum = momentum
        self.nonlinearity = nonlinearity
        self.tied_weights = tied_weights
        self.gparams = None
        self.reverse = reverse
        self.activation = self.get_non_linearity_fn()
        self.catched_params = {}

        if cost_type == CostType.MeanSquared:
            self.cost_type = CostType.MeanSquared
        elif cost_type == CostType.CrossEntropy:
            self.cost_type = CostType.CrossEntropy

        if rnd is None:
            self.rnd = np.random.RandomState(1231)
        else:
            self.rnd = rnd

        self.srng = RandomStreams(seed=1231)

        self.hidden = AEHiddenLayer(input=input,
                                    n_in=nvis,
                                    n_out=nhid,
                                    num_pieces=num_pieces,
                                    n_in_dec=nvis_dec,
                                    W=W,
                                    b=b,
                                    n_out_dec=nhid_dec,
                                    activation=self.activation,
                                    tied_weights=tied_weights,
                                    sparse_initialize=sparse_initialize,
                                    rng=rnd)

        self.params = self.hidden.params

        self.sparse_initialize = sparse_initialize

        self.L1_reg = L1_reg
        self.L2_reg = L2_reg

        self.L1 = 0
        self.L2 = 0

        if input is not None:
            self.x = input
        else:
            self.x = T.matrix('x_input', dtype=theano.config.floatX)
예제 #4
0
    def __init__(self,
                 numvis,
                 numnote,
                 numfac,
                 numvel,
                 numvelfac,
                 numacc,
                 numaccfac,
                 numjerk,
                 seq_len_to_train,
                 seq_len_to_predict,
                 output_type='real',
                 coststart=4,
                 vis_corruption_type="zeromask",
                 vis_corruption_level=0.0,
                 numpy_rng=None,
                 theano_rng=None):
        self.numvis = numvis
        self.numnote = numnote
        self.numfac = numfac
        self.numvel = numvel
        self.numvelfac = numvelfac
        self.numacc = numacc
        self.numaccfac = numaccfac
        self.numjerk = numjerk
        self.seq_len_to_train = seq_len_to_train
        self.seq_len_to_predict = seq_len_to_predict
        self.output_type = output_type
        self.vis_corruption_type = vis_corruption_type
        self.vis_corruption_level = theano.shared(value=numpy.array(
            [vis_corruption_level]),
                                                  name='vis_corruption_level')
        self.coststart = coststart
        self.inputs = T.matrix(name='inputs')

        if not numpy_rng:
            self.numpy_rng = numpy.random.RandomState(1)
        else:
            self.numpy_rng = numpy_rng

        if not theano_rng:
            theano_rng = RandomStreams(1)

        self.wxf_left = theano.shared(value=self.numpy_rng.normal(
            size=(numvis + numnote, numfac)).astype(theano.config.floatX) *
                                      0.01,
                                      name='wxf_left')  # U
        self.wxf_right = theano.shared(value=self.numpy_rng.normal(
            size=(numvis + numnote, numfac)).astype(theano.config.floatX) *
                                       0.01,
                                       name='wxf_right')  # V
        self.wv = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numfac, numvel)).astype(theano.config.floatX),
                                name='wv')  # W
        self.wvf_left = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numvel, numvelfac)).astype(theano.config.floatX),
                                      name='wvf_left')
        self.wvf_right = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numvel, numvelfac)).astype(theano.config.floatX),
                                       name='wvf_right')
        self.wa = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numvelfac, numacc)).astype(theano.config.floatX),
                                name='wa')
        self.waf_left = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numacc, numaccfac)).astype(theano.config.floatX),
                                      name='waf_left')
        self.waf_right = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numacc, numaccfac)).astype(theano.config.floatX),
                                       name='waf_right')
        self.wj = theano.shared(value=self.numpy_rng.uniform(
            low=-0.01, high=+0.01,
            size=(numaccfac, numjerk)).astype(theano.config.floatX),
                                name='wj')
        self.bx = theano.shared(
            value=0.0 *
            numpy.ones(numvis + numnote, dtype=theano.config.floatX),
            name='bx')
        self.bv = theano.shared(value=0.0 *
                                numpy.ones(numvel, dtype=theano.config.floatX),
                                name='bv')
        self.ba = theano.shared(value=0.0 *
                                numpy.ones(numacc, dtype=theano.config.floatX),
                                name='ba')
        self.bj = theano.shared(
            value=0.0 * numpy.ones(numjerk, dtype=theano.config.floatX),
            name='bj')
        self.params = [
            self.wxf_left, self.wxf_right, self.wv, self.wvf_left,
            self.wvf_right, self.wa, self.waf_left, self.waf_right, self.wj,
            self.bx, self.bv, self.ba, self.bj
        ]

        self._inputframes = [None] * self.seq_len_to_predict
        self._inputframes_and_notebook = [None] * self.seq_len_to_predict
        self._xfactors_left = [None] * self.seq_len_to_predict
        self._xfactors_right = [None] * self.seq_len_to_predict
        self._vels = [None] * self.seq_len_to_predict
        self._accs = [None] * self.seq_len_to_predict
        self._prejerks = [None] * self.seq_len_to_predict
        self._recons_with_notebook = [None] * self.seq_len_to_predict

        #extract all input frames and project onto input/output filters:
        for t in range(self.seq_len_to_predict):
            if t < self.seq_len_to_train:
                self._inputframes[t] = self.inputs[:,
                                                   t * numvis:(t + 1) * numvis]
            else:
                self._inputframes[t] = T.zeros(
                    (self._inputframes[0].shape[0], self.numvis))

            if t > 3:
                if self.vis_corruption_type == 'zeromask':
                    self._inputframes[t] = theano_rng.binomial(
                        size=self._inputframes[t].shape,
                        n=1,
                        p=1.0 - self.vis_corruption_level,
                        dtype=theano.config.floatX) * self._inputframes[t]
                elif self.vis_corruption_type == 'mixedmask':
                    self._inputframes[t] = theano_rng.binomial(
                        size=self._inputframes[t].shape,
                        n=1,
                        p=1.0 - self.vis_corruption_level / 2,
                        dtype=theano.config.floatX) * self._inputframes[t]
                    self._inputframes[t] = (1 - theano_rng.binomial(
                        size=self._inputframes[t].shape,
                        n=1,
                        p=1.0 - self.vis_corruption_level / 2,
                        dtype=theano.config.floatX)) * self._inputframes[t]
                elif self.vis_corruption_type == 'gaussian':
                    self._inputframes[t] = theano_rng.normal(
                        size=self._inputframes[t].shape,
                        avg=0.0,
                        std=self.vis_corruption_level,
                        dtype=theano.config.floatX) + self._inputframes[t]
                else:
                    assert False, "vis_corruption type not understood"
            self._inputframes_and_notebook[t] = T.concatenate(
                (self._inputframes[t],
                 T.zeros((self._inputframes[t].shape[0], self.numnote))), 1)
            self._recons_with_notebook[t] = self._inputframes_and_notebook[t]

        for t in range(4, self.seq_len_to_predict):
            self._xfactors_left[t - 4] = T.dot(
                self._recons_with_notebook[t - 4], self.wxf_left)
            self._xfactors_right[t - 4] = T.dot(
                self._recons_with_notebook[t - 4], self.wxf_right)
            self._xfactors_left[t - 3] = T.dot(
                self._recons_with_notebook[t - 3], self.wxf_left)
            self._xfactors_right[t - 3] = T.dot(
                self._recons_with_notebook[t - 3], self.wxf_right)
            self._xfactors_left[t - 2] = T.dot(
                self._recons_with_notebook[t - 2], self.wxf_left)
            self._xfactors_right[t - 2] = T.dot(
                self._recons_with_notebook[t - 2], self.wxf_right)
            self._xfactors_left[t - 1] = T.dot(
                self._recons_with_notebook[t - 1], self.wxf_left)
            self._xfactors_right[t - 1] = T.dot(
                self._recons_with_notebook[t - 1], self.wxf_right)
            self._xfactors_left[t] = T.dot(self._recons_with_notebook[t],
                                           self.wxf_left)
            self._xfactors_right[t] = T.dot(self._recons_with_notebook[t],
                                            self.wxf_right)

            #re-infer current velocities v12 and v23:
            self._prevel01 = T.dot(
                self._xfactors_left[t - 4] * self._xfactors_right[t - 3],
                self.wv) + self.bv
            self._prevel12 = T.dot(
                self._xfactors_left[t - 3] * self._xfactors_right[t - 2],
                self.wv) + self.bv
            self._prevel23 = T.dot(
                self._xfactors_left[t - 2] * self._xfactors_right[t - 1],
                self.wv) + self.bv
            self._prevel34 = T.dot(
                self._xfactors_left[t - 1] * self._xfactors_right[t],
                self.wv) + self.bv

            #re-infer acceleration a123:
            self._preacc012 = T.dot(
                T.dot(T.nnet.sigmoid(self._prevel01), self.wvf_left) *
                T.dot(T.nnet.sigmoid(self._prevel12), self.wvf_right),
                self.wa) + self.ba
            self._preacc123 = T.dot(
                T.dot(T.nnet.sigmoid(self._prevel12), self.wvf_left) *
                T.dot(T.nnet.sigmoid(self._prevel23), self.wvf_right),
                self.wa) + self.ba
            self._preacc234 = T.dot(
                T.dot(T.nnet.sigmoid(self._prevel23), self.wvf_left) *
                T.dot(T.nnet.sigmoid(self._prevel34), self.wvf_right),
                self.wa) + self.ba

            if t == 4:
                self._prejerks[t - 1] = T.dot(
                    T.dot(T.nnet.sigmoid(self._preacc012), self.waf_left) *
                    T.dot(T.nnet.sigmoid(self._preacc123), self.waf_right),
                    self.wj) + self.bj

            #infer jerk as weighted sum of past and re-infered:
            self._prejerks[t] = self._prejerks[t - 1]

            #fill in all remaining activations from top-level jerk and past:
            self._accs[t] = T.dot(
                T.dot(T.nnet.sigmoid(self._prejerks[t]), self.wj.T) *
                T.dot(self._preacc123, self.waf_left),
                self.waf_right.T) + self.ba
            self._vels[t] = T.dot(
                T.dot(self._accs[t], self.wa.T) * T.dot(
                    self._prevel23, self.wvf_left), self.wvf_right.T) + self.bv
            self._recons_with_notebook[t] = T.dot(
                T.dot(self._recons_with_notebook[t - 1], self.wxf_left) *
                T.dot(self._vels[t], self.wv.T), self.wxf_right.T) + self.bx

        self._prediction = T.concatenate(
            [pred[:, :self.numvis] for pred in self._recons_with_notebook], 1)
        self._notebook = T.concatenate(
            [pred[:, self.numvis:] for pred in self._recons_with_notebook], 1)
        if self.output_type == 'binary':
            self._prediction_for_training = T.concatenate([
                T.nnet.sigmoid(pred[:, :self.numvis]) for pred in self.
                _recons_with_notebook[self.coststart:self.seq_len_to_train]
            ], 1)
        else:
            self._prediction_for_training = T.concatenate([
                pred[:, :self.numvis] for pred in self.
                _recons_with_notebook[self.coststart:self.seq_len_to_train]
            ], 1)

        print self.output_type
        if self.output_type == 'real':
            self._cost = T.mean(
                (self._prediction_for_training -
                 self.inputs[:, self.coststart *
                             self.numvis:self.seq_len_to_train *
                             self.numvis])**2)
        elif self.output_type == 'binary':
            self._cost = -T.mean(
                self.inputs[:, self.coststart *
                            self.numvis:self.seq_len_to_train * self.numvis] *
                T.log(self._prediction_for_training) +
                (1.0 - self.inputs[:, self.coststart * self.numvis:self.
                                   seq_len_to_train * self.numvis]) *
                T.log(1.0 - self._prediction_for_training))

        self._grads = T.grad(self._cost, self.params)

        self.prediction = theano.function([self.inputs], self._prediction)
        self.notebook = theano.function([self.inputs], self._notebook)
        self.vels = [theano.function([self.inputs], v) for v in self._vels[4:]]
        self.accs = [theano.function([self.inputs], a) for a in self._accs[4:]]
        self.jerks = [
            theano.function([self.inputs], j) for j in self._prejerks[4:]
        ]
        self.cost = theano.function([self.inputs], self._cost)
        self.grads = theano.function([self.inputs], self._grads)

        def get_cudandarray_value(x):
            if type(x) == theano.sandbox.cuda.CudaNdarray:
                return numpy.array(x.__array__()).flatten()
            else:
                return x.flatten()

        self.grad = lambda x: numpy.concatenate(
            [get_cudandarray_value(g) for g in self.grads(x)])
예제 #5
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10, finetune_lr=0.1, input_x=None, label=None):

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        # wudi add the mean and standard deviation of the activation values to exam the neural net
        # Reference: Understanding the difficulty of training deep feedforward neural networks, Xavier Glorot, Yoshua Bengio
        self.out_mean = []
        self.out_std = []

        assert self.n_layers > 0
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        
            
        if input_x is None:
            self.x = T.matrix('x')  # the data is presented as rasterized images
        else: 
            self.x = input_x
        if label is None:
            self.y = T.ivector('y')  # the labels are presented as 1D vector
                                     # of [int] labels
        else:
            self.y = label

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.out_mean.append(T.mean(sigmoid_layer.output))
            self.out_std.append(T.std(sigmoid_layer.output))

            self.params.extend(sigmoid_layer.params)
            # Construct an RBM that shared weights with this layer
            if i == 0:
                rbm_layer = GBRBM(input=layer_input, n_in=input_size, n_hidden=hidden_layers_sizes[i], \
                W=None, hbias=None, vbias=None, numpy_rng=None, transpose=False, activation=T.nnet.sigmoid,
                theano_rng=None, name='grbm', W_r=None, dropout=0, dropconnect=0)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                                theano_rng=theano_rng,
                                input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layers_sizes[i],
                                W=sigmoid_layer.W,
                                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        #################################################
        # Wudi change the annealing learning rate:
        #################################################
        self.state_learning_rate =  theano.shared(numpy.asarray(finetune_lr,
                                               dtype=theano.config.floatX),
                                               borrow=True)
# Carlos Morato, PhD.
# [email protected]
# Deep Learning for Advanced Robot Perception
#
# Naive LSTM to learn one-char to one-char mapping
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
from theano.tensor.shared_randomstreams import RandomStreams
# fix random seed for reproducibility
numpy.random.seed(7)
srng = RandomStreams(7)
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, '->', seq_out)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))
예제 #7
0
    def __init__(
            self,
            numpy_rng,
            theano_rng=None,
            cfg=None,  # the network configuration
            dnn_shared=None,
            shared_layers=[],
            input=None):
        self.layers = []
        self.params = []
        self.delta_params = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        self.non_updated_layers = cfg.non_updated_layers

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input
        if cfg.multi_label is True:
            self.y = T.imatrix('y')
        else:
            self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            W = None
            b = None
            if (i in shared_layers):
                W = dnn_shared.layers[i].W
                b = dnn_shared.layers[i].b
            if self.do_maxout == True:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i] *
                                           self.pool_size,
                                           W=W,
                                           b=b,
                                           activation=(lambda x: 1.0 * x),
                                           do_maxout=True,
                                           pool_size=self.pool_size)
            else:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i],
                                           W=W,
                                           b=b,
                                           activation=self.activation)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated
            if (i not in self.non_updated_layers):
                self.params.extend(hidden_layer.params)
                self.delta_params.extend(hidden_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=self.hidden_layers_sizes[-1],
                                           n_out=self.n_outs,
                                           multi_label=cfg.multi_label)

        if self.n_outs > 0:
            self.layers.append(self.logLayer)
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
    minibatch_size = 100
    input_dim = 784
    # number of hidden units in encoder (x -> z) network
    encoder_hidden_dim = 500
    # number of hidden units in decoder (z -> x) network
    decoder_hidden_dim = 500
    # number of latent variables
    latent_dim = 2  # pairs of mu and sigma

    # l2 regularization weight
    lamda = 0.001
    # learning rate
    learning_rate = 0.02

    # random number generator used for sampling latent variables
    srng = RandomStreams(seed=123)

    # input to the network
    x = T.fmatrix(name='x')

    # build the model
    l_input = lasagne.layers.InputLayer(shape=(None, input_dim), input_var=x)

    l_encoder_hidden = lasagne.layers.DenseLayer(
        l_input,
        num_units=encoder_hidden_dim,
        W=lasagne.init.Normal(0.01),
        b=lasagne.init.Normal(0.01),
        nonlinearity=lasagne.nonlinearities.tanh)

    l_encoder_mu = lasagne.layers.DenseLayer(
예제 #9
0
    def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False):
        """ Initialise a gated recurrent unit

        :param rng: random state, fixed value for randome state for reproducible objective results
        :param x: input to a network
        :param n_in: number of input features
        :type n_in: integer
        :param n_h: number of hidden units
        :type n_h: integer
        :param p: the probability of dropout
        :param training: a binary value to indicate training or testing (for dropout training)
        """

        self.n_in = int(n_in)
        self.n_h  = int(n_h)

        self.rnn_batch_training = rnn_batch_training

        self.input = x

        if p > 0.0:
            if training==1:
                srng = RandomStreams(seed=123456)
                self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0)
            else:
                self.input =  (1-p) * x

        self.W_xz = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)),
                     size=(n_in, n_h)), dtype=config.floatX), name = 'W_xz')
        self.W_hz = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)),
                     size=(n_h, n_h)), dtype=config.floatX), name = 'W_hz')

        self.W_xr = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)),
                     size=(n_in, n_h)), dtype=config.floatX), name = 'W_xr')
        self.W_hr = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)),
                     size=(n_h, n_h)), dtype=config.floatX), name = 'W_hr')

        self.W_xh = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)),
                     size=(n_in, n_h)), dtype=config.floatX), name = 'W_xh')
        self.W_hh = theano.shared(value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)),
                     size=(n_h, n_h)), dtype=config.floatX), name = 'W_hh')

        self.b_z = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_z')

        self.b_r = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_r')

        self.b_h = theano.shared(value = np.zeros((n_h, ), dtype = config.floatX), name = 'b_h')

        if self.rnn_batch_training:
            self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0')

            self.h0 = T.repeat(self.h0, x.shape[1], 0)
            self.c0 = T.repeat(self.c0, x.shape[1], 0)
        else:
            self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0')


        ## pre-compute these for fast computation
        self.Wzx = T.dot(self.input, self.W_xz)
        self.Wrx = T.dot(self.input, self.W_xr)
        self.Whx = T.dot(self.input, self.W_xh)

        [self.h, self.c], _ = theano.scan(self.gru_as_activation_function,
                                               sequences = [self.Wzx, self.Wrx, self.Whx],
                                               outputs_info = [self.h0, self.c0])  #


        self.output = self.h

        self.params = [self.W_xz, self.W_hz, self.W_xr, self.W_hr, self.W_xh, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        self.L2_cost = (self.W_xz ** 2).sum() + (self.W_hz ** 2).sum() + (self.W_xr ** 2).sum() + (self.W_hr ** 2).sum() + (self.W_xh ** 2).sum() + (self.W_hh ** 2).sum()
예제 #10
0
    def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False):
        """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections

        :param rng: random state, fixed value for randome state for reproducible objective results
        :param x: input to a network
        :param n_in: number of input features
        :type n_in: integer
        :param n_h: number of hidden units
        :type n_h: integer
        :param p: the probability of dropout
        :param training: a binary value to indicate training or testing (for dropout training)
        """

        self.input = x

        if p > 0.0:
            if training==1:
                srng = RandomStreams(seed=123456)
                self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0)
            else:
                self.input =  (1-p) * x

        self.n_in = int(n_in)
        self.n_h  = int(n_h)

        self.rnn_batch_training = rnn_batch_training

        # random initialisation
        Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX)
        Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX)
        Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX)
        Wy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_h)), dtype=config.floatX)

        # Input gate weights
        self.W_xi = theano.shared(value=Wx_value, name='W_xi')
        self.W_hi = theano.shared(value=Wh_value, name='W_hi')
        self.w_ci = theano.shared(value=Wc_value, name='w_ci')
        self.W_yi = theano.shared(value=Wy_value, name='W_yi')

        # random initialisation
        Uh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_out)), dtype=config.floatX)

        # Output gate weights
        self.U_ho = theano.shared(value=Uh_value, name='U_ho')

        # random initialisation
        Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX)
        Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX)
        Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX)

        # Forget gate weights
        self.W_xf = theano.shared(value=Wx_value, name='W_xf')
        self.W_hf = theano.shared(value=Wh_value, name='W_hf')
        self.w_cf = theano.shared(value=Wc_value, name='w_cf')

        # random initialisation
        Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX)
        Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX)
        Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX)

        # Output gate weights
        self.W_xo = theano.shared(value=Wx_value, name='W_xo')
        self.W_ho = theano.shared(value=Wh_value, name='W_ho')
        self.w_co = theano.shared(value=Wc_value, name='w_co')

        # random initialisation
        Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX)
        Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX)
        Wc_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, )), dtype=config.floatX)

        # Cell weights
        self.W_xc = theano.shared(value=Wx_value, name='W_xc')
        self.W_hc = theano.shared(value=Wh_value, name='W_hc')

        # bias
        self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i')
        self.b_f = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_f')
        self.b_o = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_o')
        self.b_c = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_c')
        self.b   = theano.shared(value=np.zeros((n_out, ), dtype=config.floatX), name='b')

        ### make a layer

        # initial value of hidden and cell state
        if self.rnn_batch_training:
            self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0')
            self.y0 = theano.shared(value=np.zeros((1, n_out), dtype = config.floatX), name = 'y0')

            self.h0 = T.repeat(self.h0, x.shape[1], 0)
            self.c0 = T.repeat(self.c0, x.shape[1], 0)
            self.y0 = T.repeat(self.c0, x.shape[1], 0)
        else:
            self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0')
            self.y0 = theano.shared(value=np.zeros((n_out, ), dtype = config.floatX), name = 'y0')


        self.Wix = T.dot(self.input, self.W_xi)
        self.Wfx = T.dot(self.input, self.W_xf)
        self.Wcx = T.dot(self.input, self.W_xc)
        self.Wox = T.dot(self.input, self.W_xo)

        [self.h, self.c, self.y], _ = theano.scan(self.recurrent_fn, sequences = [self.Wix, self.Wfx, self.Wcx, self.Wox],
                                                             outputs_info = [self.h0, self.c0, self.y0])

        self.output = self.y
예제 #11
0
    def __init__(self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training=False):
        """ This is to initialise a standard RNN hidden unit

        :param rng: random state, fixed value for randome state for reproducible objective results
        :param x: input data to current layer
        :param n_in: dimension of input data
        :param n_h: number of hidden units/blocks
        :param n_out: dimension of output data
        :param p: the probability of dropout
        :param training: a binary value to indicate training or testing (for dropout training)
        """
        self.input = x

        if p > 0.0:
            if training==1:
                srng = RandomStreams(seed=123456)
                self.input = T.switch(srng.binomial(size=x.shape,p=p), x, 0)
            else:
                self.input =  (1-p) * x #(1-p) *

        self.n_in  = int(n_in)
        self.n_h   = int(n_h)
        self.n_out = int(n_out)

        self.rnn_batch_training = rnn_batch_training

        # random initialisation
        Wx_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_h)), dtype=config.floatX)
        Wh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_h)), dtype=config.floatX)
        Wy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_h)), dtype=config.floatX)
        Ux_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_in)), size=(n_in, n_out)), dtype=config.floatX)
        Uh_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_h)), size=(n_h, n_out)), dtype=config.floatX)
        Uy_value = np.asarray(rng.normal(0.0, old_div(1.0,np.sqrt(n_out)), size=(n_out, n_out)), dtype=config.floatX)

        # Input gate weights
        self.W_xi = theano.shared(value=Wx_value, name='W_xi')
        self.W_hi = theano.shared(value=Wh_value, name='W_hi')
        self.W_yi = theano.shared(value=Wy_value, name='W_yi')

        # Output gate weights
        self.U_xi = theano.shared(value=Ux_value, name='U_xi')
        self.U_hi = theano.shared(value=Uh_value, name='U_hi')
        self.U_yi = theano.shared(value=Uy_value, name='U_yi')

        # bias
        self.b_i = theano.shared(value=np.zeros((n_h, ), dtype=config.floatX), name='b_i')
        self.b   = theano.shared(value=np.zeros((n_out, ), dtype=config.floatX), name='b')


        # initial value of hidden and cell state and output
        if self.rnn_batch_training:
            self.h0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((1, n_h), dtype = config.floatX), name = 'c0')
            self.y0 = theano.shared(value=np.zeros((1, n_out), dtype = config.floatX), name = 'y0')

            self.h0 = T.repeat(self.h0, x.shape[1], 0)
            self.c0 = T.repeat(self.c0, x.shape[1], 0)
            self.y0 = T.repeat(self.c0, x.shape[1], 0)
        else:
            self.h0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'h0')
            self.c0 = theano.shared(value=np.zeros((n_h, ), dtype = config.floatX), name = 'c0')
            self.y0 = theano.shared(value=np.zeros((n_out, ), dtype = config.floatX), name = 'y0')


        self.Wix = T.dot(self.input, self.W_xi)

        [self.h, self.c, self.y], _ = theano.scan(self.recurrent_as_activation_function, sequences = [self.Wix],
                                                                      outputs_info = [self.h0, self.c0, self.y0])

        self.output = self.y

        self.params = [self.W_xi, self.W_hi, self.W_yi, self.U_hi, self.b_i, self.b]

        self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() + (self.W_yi ** 2).sum() + (self.U_hi ** 2).sum()
예제 #12
0
파일: SdA.py 프로젝트: slykid/Python3
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1]
    ):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        self.x = T.matrix('x')
        self.y = T.ivector('y')

        for i in range(self.n_layers):

            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)

            self.dA_layers.append(dA_layer)

        self.logLayer = LogisticRegression(input=self.sigmoid_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs
                                           )

        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
예제 #13
0
import theano.tensor as T
from theano import function
from theano.tensor.shared_randomstreams import RandomStreams
import numpy

"""
demo for how to define a function with a random variable.
use case:
where we want to define a function having a random variable, for example, introducing minor corruptions in inputs.
"""

random = RandomStreams(seed = 42)

a = random.normal((1,3))
b = T.dmatrix('b')
f = a * b
g = function([b], f)
print("Invocation1: ", g(numpy.ones((1,3))))
print("Invocation2: ", g(numpy.ones((1,3))))
print("Invocation3: ", g(numpy.ones((1,3))))