Ejemplo n.º 1
0
def main():
    """"""
    print("{}; lr: {}; SG {}; SG lr: {}; layer size: {}".format(
        conf["layer_name"], conf["learning_rate"], conf["enable_SG"],
        conf["sg_learning_rate"], conf["layer_size"]))
    hidden_layer = HiddenLayer(conf["layer_name"], conf["upper_layer"],
                               conf["lower_layer"], conf["lower_layer_size"],
                               conf["layer_size"], relu, relu_prime,
                               conf["learning_rate"], conf["enable_SG"],
                               conf["sg_learning_rate"])
    # weights initialization
    hidden_layer.init_weights(None)

    MAX_MESSAGE_LENGTH = 128 * 1024 * 1024
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=4),
                         options=[('grpc.max_send_message_length',
                                   MAX_MESSAGE_LENGTH),
                                  ('grpc.max_receive_message_length',
                                   MAX_MESSAGE_LENGTH)])
    nn_pb2_grpc.add_LayerDataExchangeServicer_to_server(hidden_layer, server)
    # listen on
    server.add_insecure_port(conf["listen_on"])
    server.start()

    # idle
    try:
        while True:
            time.sleep(24 * 60 * 60)
    except KeyboardInterrupt:
        server.stop(0)
Ejemplo n.º 2
0
    def __init__(self,rng,params,cost_function='mse',optimizer = RMSprop):

        lr=params["lr"]
        batch_size=params["batch_size"]
        n_output=params['n_output']
        corruption_level=params["corruption_level"]

        X = T.matrix(name="input",dtype=dtype) # batch of sequence of vector
        Y = T.matrix(name="output",dtype=dtype) # batch of sequence of vector
        is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction
        bin_noise=rng.binomial(size=(batch_size,n_output/3,1), n=1,p=1 - corruption_level,dtype=theano.config.floatX)
        #bin_noise_3d= T.reshape(T.concatenate((bin_noise, bin_noise,bin_noise),axis=1),(batch_size,n_output/3,3))
        bin_noise_3d= T.concatenate((bin_noise, bin_noise,bin_noise),axis=2)

        noise= rng.normal(size=(batch_size,n_output), std=0.03, avg=0.0,dtype=theano.config.floatX)
        noise_bin=T.reshape(noise,(batch_size,n_output/3,3))*bin_noise_3d
        X_train=T.reshape(noise_bin,(batch_size,n_output))+X

        X_tilde= T.switch(T.neq(is_train, 0), X_train, X)

        W_1_e =u.init_weight(shape=(n_output,1024),rng=rng,name="w_hid",sample="glorot")
        b_1_e=u.init_bias(1024,rng)

        W_2_e =u.init_weight(shape=(1024,2048),rng=rng,name="w_hid",sample="glorot")
        b_2_e=u.init_bias(2048,rng)

        W_2_d = W_2_e.T
        b_2_d=u.init_bias(1024,rng)

        W_1_d = W_1_e.T
        b_1_d=u.init_bias(n_output,rng)

        h_1_e=HiddenLayer(rng,X_tilde,0,0, W=W_1_e,b=b_1_e,activation=nn.relu)
        h_2_e=HiddenLayer(rng,h_1_e.output,0,0, W=W_2_e,b=b_2_e,activation=nn.relu)
        h_2_d=HiddenLayer(rng,h_2_e.output,0,0, W=W_2_d,b=b_2_d,activation=u.do_nothing)
        h_1_d=LogisticRegression(rng,h_2_d.output,0,0, W=W_1_d,b=b_1_d)

        self.output = h_1_d.y_pred

        self.params =h_1_e.params+h_2_e.params
        self.params.append(b_2_d)
        self.params.append(b_1_d)

        cost=get_err_fn(self,cost_function,Y)
        L2_reg=0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0] ** 2)+T.sum(param[1] ** 2))

        cost += L2_reg*L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X,Y,is_train],outputs=cost,updates=_optimizer.getUpdates(),allow_input_downcast=True)
        self.predictions = theano.function(inputs = [X,is_train], outputs = self.output,allow_input_downcast=True)
        self.mid_layer = theano.function(inputs = [X,is_train], outputs = h_2_e.output,allow_input_downcast=True)
        self.n_param=count_params(self.params)
Ejemplo n.º 3
0
    def __init__(self, cropsize, batch_size, nkerns=[10, 10], filters=[11, 6]):
        self.X_batch, self.y_batch = T.tensor4('x'), T.matrix('y')
        self.layers, self.params = [], []
        rng = np.random.RandomState(23455)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=self.X_batch,
                                    image_shape=(batch_size, 1, cropsize,
                                                 cropsize),
                                    filter_shape=(nkerns[0], 1, filters[0],
                                                  filters[0]),
                                    poolsize=(2, 2))
        self.layers += [layer0]
        self.params += layer0.params
        # 400 - 11 + 1 = 390 / 2 = 195
        # 300 - 11 + 1 = 290 / 2 = 145
        map_size = (cropsize - filters[0] + 1) / 2
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(batch_size, nkerns[0],
                                                 map_size, map_size),
                                    filter_shape=(nkerns[1], nkerns[0],
                                                  filters[1], filters[1]),
                                    poolsize=(2, 2))
        self.layers += [layer1]
        self.params += layer1.params
        # 195 - 6 + 1 = 190 / 2 = 95
        # 145 - 6 + 1 = 140 / 2 = 70
        map_size = (map_size - filters[1] + 1) / 2
        layer2_input = layer1.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer2 = HiddenLayer(rng,
                             input=layer2_input,
                             n_in=nkerns[1] * map_size * map_size,
                             n_out=1000,
                             activation=None)
        self.layers += [layer2]
        self.params += layer2.params

        layer3 = HiddenLayer(rng,
                             input=layer2.output,
                             n_in=1000,
                             n_out=1,
                             activation=None)
        self.layers += [layer3]
        self.params += layer3.params

        nparams = np.sum(
            [p.get_value().flatten().shape[0] for p in self.params])
        print "model contains %i parameters" % nparams
        self.output = self.layers[-1].output
Ejemplo n.º 4
0
    def build_symbolic_graph(self):
        # allocate symbolic variables (defaults to floatX)
        self.x = T.matrix('x')
        if self.target_is_int:
            self.y = T.ivector('y')
        else:
            self.y = T.matrix('y')

        # assemble layers
        self.hidden_layers = []

        this_input = self.x
        this_nin = self.n_in
        for i in xrange(len(self.n_h)):
            hidden_layer = HiddenLayer(self.rng,
                                       input=this_input,
                                       n_in=this_nin,
                                       n_out=self.n_h[i],
                                       activation=self.activations[i],
                                       dropout=self.dropout,
                                       params=self.params_init[i])
            this_input = hidden_layer.output
            this_nin = self.n_h[i]
            self.hidden_layers.append(hidden_layer)

        self.output_layer = OutputLayer(self.rng,
                                        input=self.hidden_layers[-1].output,
                                        n_in=this_nin,
                                        n_out=self.n_out,
                                        non_linearities=self.activations[-1],
                                        params=self.params_init[-1])

        self.layers = self.hidden_layers + [self.output_layer]
Ejemplo n.º 5
0
    def test_forward_pass(self):
        # GIVEN
        in_dim = 2
        out_dim = 4
        layer = HiddenLayer(in_dim, out_dim, ActivationLiterals.RELU, InitLiterals.NORMAL)

        input_matrix = np.array([[1, 2],
                                 [4, 5]])
        batch_size = input_matrix.shape[1]

        # WHEN
        transformed_matrix = layer.forward(input_matrix)

        # THEN
        self.assertEqual(input_matrix.shape[0], in_dim)
        self.assertEqual(transformed_matrix.shape, (out_dim, batch_size))
Ejemplo n.º 6
0
    def __init__(self, input_size, output_size, hidden_layer_sizes):

        self.learning_rate = 0.1

        self.input_layer = InputLayer(input_size)
        self.output_layer = OutputLayer(output_size)
        self.hidden_layers = [
            HiddenLayer(hidden_layer_size)
            for hidden_layer_size in hidden_layer_sizes
        ]

        for i, hidden_layer in enumerate(self.hidden_layers):
            if i == 0 and i == len(self.hidden_layers) - 1:
                hidden_layer.initialize(self.input_layer, self.output_layer)
            elif i == 0:
                hidden_layer.initialize(self.input_layer,
                                        self.hidden_layers[i + 1])
            elif i == len(self.hidden_layers) - 1:
                hidden_layer.initialize(self.hidden_layers[i - 1],
                                        self.output_layer)
            else:
                hidden_layer.initialize(self.hidden_layers[i - 1],
                                        self.hidden_layers[i + 1])

        if (len(self.hidden_layers)):
            self.output_layer.initialize(self.hidden_layers[-1])
        else:
            self.output_layer.initialize(self.input_layer)
Ejemplo n.º 7
0
    def __init__(self, rng, in_x, in_size, architecture, activation=T.tanh):
        ''' Single feedforward Deep Neural Network '''

        self.layers = []
        self.params = []
        self.n_layers = len(architecture)

        assert self.n_layers > 0

        self.x = in_x

        for i in xrange(self.n_layers):
            if i == 0:
                input_size = in_size
            else:
                input_size = architecture[i-1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.layers[-1].output

            hidden_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=architecture[i],
                                        activation=activation)
            self.layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)

        self.output = self.layers[-1].output
Ejemplo n.º 8
0
    def __init__(self, input_size, hidden_size, output_size, init_weight=0.01):
        self.input_size = input_size
        self.params = {}
        self.params['W1'] = init_weight * np.random.randn(
            input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = init_weight * np.random.randn(
            hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # setup layers
        self.layers = OrderedDict()
        self.layers['Hidden1'] = HiddenLayer(self.params['W1'],
                                             self.params['b1'])
        self.layers['ReLU1'] = ReLULayer()
        self.layers['Hidden2'] = HiddenLayer(self.params['W2'],
                                             self.params['b2'])
        self.lastLayer = SoftmaxWithLossLayer()
Ejemplo n.º 9
0
    def _build(self, args):

        self.encoder1 = HiddenLayer(input_dim=self.input_dim,
                                    output_dim=args.hidden_dim_1,
                                    act=tf.nn.relu,
                                    dropout=self.dropout,
                                    logging=self.logging)(self.inputs)

        self.encoder2 = HiddenLayer(input_dim=args.hidden_dim_1,
                                    output_dim=args.hidden_dim_2,
                                    act=tf.nn.relu,
                                    dropout=self.dropout,
                                    logging=self.logging)(self.encoder1)

        self.z_mean = HiddenLayer(input_dim=args.hidden_dim_2,
                                  output_dim=args.hidden_dim_3,
                                  act=lambda x: x,
                                  dropout=self.dropout,
                                  logging=self.logging)(self.encoder2)

        self.z_log_std = HiddenLayer(input_dim=args.hidden_dim_2,
                                     output_dim=args.hidden_dim_3,
                                     act=lambda x: x,
                                     dropout=self.dropout,
                                     logging=self.logging)(self.encoder2)

        self.z = self.z_mean + tf.random_normal(
            shape=[self.batch_size, args.hidden_dim_3]) * tf.exp(
                self.z_log_std / 2.)

        self.decoder1 = HiddenLayer(input_dim=args.hidden_dim_3,
                                    output_dim=args.hidden_dim_2,
                                    act=tf.nn.relu,
                                    dropout=self.dropout,
                                    logging=self.logging)(self.z)

        self.decoder2 = HiddenLayer(input_dim=args.hidden_dim_2,
                                    output_dim=args.hidden_dim_1,
                                    act=tf.nn.relu,
                                    dropout=self.dropout,
                                    logging=self.logging)(self.decoder1)

        self.reconstructions = HiddenLayer(input_dim=args.hidden_dim_1,
                                           output_dim=self.input_dim,
                                           act=tf.nn.tanh,
                                           dropout=self.dropout,
                                           logging=self.logging)(self.decoder2)

        self.preds = HiddenLayer(input_dim=args.hidden_dim_1,
                                 output_dim=2,
                                 act=lambda x: x,
                                 dropout=self.dropout,
                                 logging=self.logging)(self.decoder2)
Ejemplo n.º 10
0
    def __init__(self, nkerns=[48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 65
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # Fully connected sigmoidal layer, goes from
        # X  -> 200
        #--------------------------------------------------
        layer1_input = layer0.output.flatten(2)
        layer1 = HiddenLayer(rng,
                             input=layer1_input,
                             n_in=nkerns[0] * os0 * os0,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 2
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer2 = LogisticRegression(input=layer1.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2)
Ejemplo n.º 11
0
    def _build(self, args):
        self.layer1 = HiddenLayer(input_dim=self.input_dim,
                                              output_dim=args.hidden_dim_1,
                                              act=tf.nn.relu,
                                              dropout=self.dropout,
                                              logging=self.logging)(self.inputs)

        self.layer2 = HiddenLayer(input_dim=args.hidden_dim_1,
                                       output_dim=args.hidden_dim_2,
                                       act=tf.nn.relu,
                                       dropout=self.dropout,
                                       logging=self.logging)(self.layer1)

        self.layer3 = HiddenLayer(input_dim=args.hidden_dim_2,
                                          output_dim=args.hidden_dim_3,
                                          act=tf.nn.relu,
                                          dropout=self.dropout,
                                          logging=self.logging)(self.layer2)

        self.preds = HiddenLayer(input_dim=args.hidden_dim_3,
                                          output_dim=self.num_labels,
                                          act=lambda x: x,
                                          dropout=self.dropout,
                                          logging=self.logging)(self.layer3)
Ejemplo n.º 12
0
 def build_test_model(self, data):
     rng = np.random.RandomState(3435)
     lstm_params, hidden_params, hidden_relu_params, full_connect_params = self.load_trained_params()
     data_x, data_y, maxlen = data
     test_len = len(data_x)
     n_test_batches = test_len // self.batch_size
     x = T.matrix('x')
     y = T.ivector('y')
     index = T.lscalar()
     Words = theano.shared(value=self.word_vectors, name="Words", borrow=True)
     input_width = self.hidden_sizes[0]
     layer0_input = T.cast(Words[T.cast(x.flatten(), dtype="int32")], dtype=floatX).reshape((self.batch_size, maxlen, input_width))
     lstm = LSTM(dim=input_width, batch_size=self.batch_size, number_step=maxlen, params=lstm_params)
     layer0_input = lstm.feed_foward(layer0_input)
     lstm.mean_pooling_input(layer0_input)
     hidden_sizes = [self.hidden_sizes[0], self.hidden_sizes[0]]
     hidden_layer = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=lstm.output, activation=utils.Tanh, name="Hidden_Tanh", W=hidden_params[0], b=hidden_params[1]) 
     hidden_layer.predict()
     hidden_layer_relu = HiddenLayer(rng, hidden_sizes=hidden_sizes, input_vectors=hidden_layer.output, W=hidden_relu_params[0], b=hidden_relu_params[1])
     hidden_layer_relu.predict()
     # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b)
     full_connect = FullConnectLayer(rng, layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]], 
                                     input_vector=hidden_layer_relu.output, W=full_connect_params[0], b=full_connect_params[1])
     full_connect.predict()
     test_data_x = theano.shared(np.asarray(data_x, dtype=floatX), borrow=True)
     test_data_y = theano.shared(np.asarray(data_y, dtype='int32'), borrow=True)
   
     errors = 0.
     if test_len == 1:
         test_model = theano.function([index],outputs=full_connect.get_predict(), on_unused_input='ignore', givens={
             x: test_data_x[index * self.batch_size: (index + 1) * self.batch_size],
             y: test_data_y[index * self.batch_size: (index + 1) * self.batch_size]
         })
         index = 0
         avg_errors = test_model(index)
     else:
         test_model = theano.function([index], outputs=full_connect.errors(y), givens={
             x: test_data_x[index * self.batch_size: (index + 1) * self.batch_size],
             y: test_data_y[index * self.batch_size: (index + 1) * self.batch_size]
         })
         for i in xrange(n_test_batches):
             errors += test_model(i)
         avg_errors = errors / n_test_batches
     return avg_errors
Ejemplo n.º 13
0
    def build_symbolic_graph(self):
        self.theano_rng = RandomStreams(self.rng.randint(100))

        # allocate symbolic variables (defaults to floatX)
        self.x = T.matrix('x')

        if not self.denoising == None:
            self.corrupted = self.get_corrupted_input(self.x, self.denoising)
        else:
            self.corrupted = self.x * T.ones_like(self.x)

        # assemble layers
        self.hidden_layer = HiddenLayer(self.rng,
                                        input=self.corrupted,
                                        n_in=self.n_in,
                                        n_out=self.n_h,
                                        activation=self.activations[0],
                                        dropout=self.dropout,
                                        params=self.params_init[0])

        if self.tie_weights:
            if self.params_init[1] == None:
                self.params_init[1] = [
                    self.hidden_layer.params[0].T,
                    theano.shared(np.zeros(self.n_in,
                                           dtype=theano.config.floatX),
                                  name='b2')
                ]
            else:
                self.params_init[1] = [
                    self.hidden_layer.params[0].T, self.params_init[1][1]
                ]

        self.output_layer = OutputLayer(self.rng,
                                        input=self.hidden_layer.output,
                                        n_in=self.n_h,
                                        n_out=self.n_in,
                                        non_linearities=self.activations[-1],
                                        params=self.params_init[1])

        self.layers = [self.hidden_layer, self.output_layer]
Ejemplo n.º 14
0
    def __init__(self, rng, params, cost_function='mse', optimizer=RMSprop):

        lr = params["lr"]
        batch_size = params["batch_size"]
        sequence_length = params["seq_length"]

        # minibatch)
        X = T.matrix(name="input", dtype=dtype)  # batch of sequence of vector
        Y = T.matrix(name="output", dtype=dtype)  # batch of sequence of vector
        is_train = T.iscalar(
            'is_train'
        )  # pseudo boolean for switching between training and prediction

        #CNN global parameters.
        subsample = (1, 1)
        p_1 = 0.5
        border_mode = "same"
        cnn_batch_size = batch_size
        pool_size = (2, 2)

        #Layer1: conv2+pool+drop
        filter_shape = (128, 1, 10, 10)
        input_shape = (cnn_batch_size, 1, 144, 176
                       )  #input_shape= (samples, channels, rows, cols)
        input = X.reshape(input_shape)
        c1 = ConvLayer(rng,
                       input,
                       filter_shape,
                       input_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p1 = PoolLayer(c1.output,
                       pool_size=pool_size,
                       input_shape=c1.output_shape)
        dl1 = DropoutLayer(rng, input=p1.output, prob=p_1, is_train=is_train)

        #Layer2: conv2+pool
        subsample = (1, 1)
        filter_shape = (256, p1.output_shape[1], 3, 3)
        c2 = ConvLayer(rng,
                       dl1.output,
                       filter_shape,
                       p1.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p2 = PoolLayer(c2.output,
                       pool_size=pool_size,
                       input_shape=c2.output_shape)

        #Layer3: conv2+pool
        filter_shape = (256, p2.output_shape[1], 3, 3)
        c3 = ConvLayer(rng,
                       p2.output,
                       filter_shape,
                       p2.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p3 = PoolLayer(c3.output,
                       pool_size=pool_size,
                       input_shape=c3.output_shape)

        #Layer4: conv2+pool
        filter_shape = (128, p3.output_shape[1], 3, 3)
        c4 = ConvLayer(rng,
                       p3.output,
                       filter_shape,
                       p3.output_shape,
                       border_mode,
                       subsample,
                       activation=nn.relu)
        p4 = PoolLayer(c4.output,
                       pool_size=pool_size,
                       input_shape=c4.output_shape)

        #Layer5: hidden
        n_in = reduce(lambda x, y: x * y, p4.output_shape[1:])
        x_flat = p4.output.flatten(2)

        h1 = HiddenLayer(rng, x_flat, n_in, 1024, activation=nn.relu)

        #Layer6: hidden
        lreg = LogisticRegression(rng, h1.output, 1024, params['n_output'])
        self.output = lreg.y_pred

        self.params = c1.params + c2.params + c3.params + c4.params + h1.params + lreg.params

        cost = get_err_fn(self, cost_function, Y)
        L2_reg = 0.0001
        L2_sqr = theano.shared(0.)
        for param in self.params:
            L2_sqr += (T.sum(param[0]**2) + T.sum(param[1]**2))

        cost += L2_reg * L2_sqr

        _optimizer = optimizer(cost, self.params, lr=lr)
        self.train = theano.function(inputs=[X, Y, is_train],
                                     outputs=cost,
                                     updates=_optimizer.getUpdates(),
                                     allow_input_downcast=True)
        self.predictions = theano.function(inputs=[X, is_train],
                                           outputs=self.output,
                                           allow_input_downcast=True)
        self.n_param = count_params(self.params)
Ejemplo n.º 15
0
def cifar_fast_net(batch_size=128,n_epochs=300,test_frequency=13, learning_rate=0.001):

    rng1 = numpy.random.RandomState(23455)
    rng2 = numpy.random.RandomState(12423)
    rng3 = numpy.random.RandomState(23245)
    rng4 = numpy.random.RandomState(12123)
    rng5 = numpy.random.RandomState(25365)
    rng6 = numpy.random.RandomState(15323)
    train_set_x, train_set_y = load_cifar_data(['data_batch_1','data_batch_2','data_batch_3','data_batch_4'])
    valid_set_x, valid_set_y = load_cifar_data(['data_batch_5'],WHICHSET='valid')
    test_set_x, test_set_y = load_cifar_data(['test_batch'],WHICHSET='test')

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    index = T.lscalar()

    x = T.matrix('x')
    y = T.ivector('y')

    img_input = x.reshape((batch_size,3,32,32))
    img_input = img_input.dimshuffle(1,2,3,0)
####define the layers:
    conv_pool1 = LeNetConvPoolLayer(rng=rng1,input=img_input,
                                    filter_shape=(3,5,5,32),
                                    image_shape=(3,32,32,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.0001,initB=0,partial_sum=4,
                                    pooling='max',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004
                                    )

    conv_pool2 = LeNetConvPoolLayer(rng=rng2,input=conv_pool1.output,
                                    filter_shape=(32,5,5,32),
                                    image_shape=(32,16,16,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)
    conv_pool3 = LeNetConvPoolLayer(rng=rng3,input=conv_pool2.output,
                                    filter_shape=(32,5,5,64),
                                    image_shape=(32,8,8,batch_size),
                                    activation='vshape',
                                    poolsize=(3,3),poolstride=2,pad=2,
                                    convstride=1,initW=0.01,initB=0,partial_sum=4,
                                    pooling='average',
                                    epsW=0.001,
                                    epsB=0.002,
                                    momW=0.9,
                                    momB=0.9,
                                    wc=0.004)

    layer4_input = conv_pool3.output.dimshuffle(3,0,1,2).flatten(2)
    #fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0)
    fc_64 = HiddenLayer(rng=rng4,input=layer4_input,n_in=64*4*4,n_out=64,initW=0.1,initB=0,
                        epsW=0.001,
                        epsB=0.002,
                        momW=0.9,
                        momB=0.9,
                        wc=0.03)
    fc_10 = LogisticRegression(input=fc_64.output,rng=rng5,n_in=64,n_out=10,initW=0.1,
                               epsW=0.001,
                                epsB=0.002,
                                momW=0.9,
                                momB=0.9,
                                wc=0.03)
####build the models:
    cost = fc_10.negative_log_likelihood(y)
    test_model = theano.function([index], fc_10.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], fc_10.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    Ws = [conv_pool1.W, conv_pool2.W, conv_pool3.W, fc_64.W, fc_10.W]
    pgradWs = [conv_pool1.grad_W, conv_pool2.grad_W, conv_pool3.grad_W, fc_64.grad_W, fc_10.grad_W]

    bs = [conv_pool1.b, conv_pool2.b, conv_pool3.b, fc_64.b, fc_10.b]
    pgradbs = [conv_pool1.grad_b, conv_pool2.grad_b, conv_pool3.grad_b, fc_64.grad_b, fc_10.grad_b]

    momWs = [conv_pool1.momW, conv_pool2.momW, conv_pool3.momW, fc_64.momW, fc_10.momW]
    momBs = [conv_pool1.momB, conv_pool2.momB, conv_pool3.momB, fc_64.momB, fc_10.momB]
    wcs = [conv_pool1.wc, conv_pool2.wc, conv_pool3.wc, fc_64.wc, fc_10.wc]
    epsWs = [conv_pool1.epsW, conv_pool2.epsW, conv_pool3.epsW, fc_64.epsW, fc_10.epsW]
    epsBs = [conv_pool1.epsB, conv_pool2.epsB, conv_pool3.epsB, fc_64.epsB, fc_10.epsB]

    gradWs = T.grad(cost, Ws)
    gradbs = T.grad(cost, bs)
    updates = []
    for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
        grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
        updates.append((W_i, W_i+grad_i))
        updates.append((pgW_i,grad_i))

    for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
        grad_i = - epsB_i*gradb_i + momB_i*pgB_i
        updates.append((b_i, b_i+grad_i))
        updates.append((pgB_i,grad_i))







    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        #below is the code for reduce learning_rate
        ###########################################

        if epoch == 50:
            epsWs = [k/10.0 for k in epsWs]
            epsBs = [k/10.0 for k in epsBs]
            print 'reduce eps by a factor of 10'
            updates = []
            for W_i, gradW_i, momW_i, wc_i, epsW_i, pgW_i in zip(Ws,gradWs,momWs,wcs, epsWs,pgradWs):
                grad_i = - epsW_i*gradW_i - wc_i*epsW_i*W_i + momW_i*pgW_i
                updates.append((W_i, W_i+grad_i))
                updates.append((pgW_i,grad_i))

            for b_i, gradb_i, momB_i, epsB_i, pgB_i in zip(bs,gradbs,momBs, epsBs,pgradbs):
                grad_i = - epsB_i*gradb_i + momB_i*pgB_i
                updates.append((b_i, b_i+grad_i))
                updates.append((pgB_i,grad_i))
            train_model = theano.function([index], cost, updates=updates,
              givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]})

        ##############################################
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    conv_pool1.bestW = conv_pool1.W.get_value().copy()
                    conv_pool1.bestB = conv_pool1.b.get_value().copy()
                    conv_pool2.bestW = conv_pool2.W.get_value().copy()
                    conv_pool2.bestB = conv_pool2.b.get_value().copy()
                    conv_pool3.bestW = conv_pool3.W.get_value().copy()
                    conv_pool3.bestB = conv_pool3.b.get_value().copy()
                    fc_64.bestW = fc_64.W.get_value().copy()
                    fc_64.bestB = fc_64.b.get_value().copy()
                    fc_10.bestW = fc_10.W.get_value().copy()
                    fc_10.bestB = fc_10.b.get_value().copy()

                    ##saving current best
                    print 'saving current best params..'
                    current_params = (conv_pool1.bestW,conv_pool1.bestB,conv_pool2.bestW,
                    conv_pool2.bestB,conv_pool3.bestW,conv_pool3.bestB,fc_64.bestW,fc_64.bestB,
                    fc_10.bestW,fc_10.bestB,momWs,momBs,epsWs,epsBs,wcs)
                    outfile = file('current_best_params.pkl','wb')
                    cPickle.dump(current_params,outfile)
                    outfile.close()


                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 16
0
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
                image_shape=signals_shape,
                filter_shape=filters_shape, poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
            filter_shape=(60, flt_channels, 3, 3), poolsize=(2, 2))


    layer2 = LeNetConvPoolLayer(rng, input=layer1.output,
                image_shape=(batch_size, 60, layer2_w, layer2_h),
                filter_shape=(90, 60, 3, 3), poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)


    layer3 = HiddenLayer(rng, input=layer3_input, n_in=90 * layer3_w * layer3_h  ,
                         n_out=500, activation=T.tanh)
  

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)   # change the number of output labels


    cost = layer4.negative_log_likelihood(y)

    classify = theano.function([index], outputs=layer4.get_output_labels(y),
                               givens={
                                   x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                   y: test_set_y[index * batch_size: (index + 1) * batch_size]})
           
    # load weights  
    print 'loading weights state'
    f = file('weights.save', 'rb')
  def __init__(self, configfile, train = False):

    self.config = readConfig(configfile)

    self.addInputSize = 1
    logger.info("additional mlp input")

    wordvectorfile = self.config["wordvectors"]
    logger.info("wordvectorfile " + str(wordvectorfile))
    networkfile = self.config["net"]
    logger.info("networkfile " + str(networkfile))
    hiddenunits = int(self.config["hidden"])
    logger.info("hidden units " + str(hiddenunits))
    hiddenunitsNER = 50
    if "hiddenunitsNER" in self.config:
      hiddenunitsNER = int(self.config["hiddenunitsNER"])
    logger.info("hidden units NER " + str(hiddenunitsNER))
    learning_rate = float(self.config["lrate"])
    logger.info("learning rate " + str(learning_rate))
    if train:
      self.batch_size = int(self.config["batchsize"])
    else:
      self.batch_size = 1
    logger.info("batch size " + str(self.batch_size))
    self.filtersize = [1,int(self.config["filtersize"])]
    nkerns = [int(self.config["nkerns"])]
    logger.info("nkerns " + str(nkerns))
    pool = [1, int(self.config["kmax"])]

    self.contextsize = int(self.config["contextsize"])
    logger.info("contextsize " + str(self.contextsize))

    if self.contextsize < self.filtersize[1]:
      logger.info("setting filtersize to " + str(self.contextsize))
      self.filtersize[1] = self.contextsize
    logger.info("filtersize " + str(self.filtersize))

    sizeAfterConv = self.contextsize - self.filtersize[1] + 1

    sizeAfterPooling = -1
    if sizeAfterConv < pool[1]:
      logger.info("setting poolsize to " + str(sizeAfterConv))
      pool[1] = sizeAfterConv
    sizeAfterPooling = pool[1]
    logger.info("kmax pooling: k = " + str(pool[1]))

    # reading word vectors
    self.wordvectors, self.vectorsize = readWordvectors(wordvectorfile)

    self.representationsize = self.vectorsize + 1

    rng = numpy.random.RandomState(23455)
    if train:
      seed = rng.get_state()[1][0]
      logger.info("seed: " + str(seed))

    # allocate symbolic variables for the data
    self.index = T.lscalar()  # index to a [mini]batch
    self.xa = T.matrix('xa')   # left context
    self.xb = T.matrix('xb')   # middle context
    self.xc = T.matrix('xc')   # right context
    self.y = T.imatrix('y')   # label (only present in training)
    self.yNER1 = T.imatrix('yNER1') # label for first entity
    self.yNER2 = T.imatrix('yNER2') # label for second entity
    ishape = [self.representationsize, self.contextsize]  # this is the size of context matrizes

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    logger.info('... building the model')

    # Reshape input matrix to be compatible with our LeNetConvPoolLayer
    layer0a_input = self.xa.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0b_input = self.xb.reshape((self.batch_size, 1, ishape[0], ishape[1]))
    layer0c_input = self.xc.reshape((self.batch_size, 1, ishape[0], ishape[1]))

    self.y_reshaped = self.y.reshape((self.batch_size, 1))
    yNER1reshaped = self.yNER1.reshape((self.batch_size, 1))
    yNER2reshaped = self.yNER2.reshape((self.batch_size, 1))

    # Construct convolutional pooling layer:
    filter_shape = (nkerns[0], 1, self.representationsize, self.filtersize[1])
    poolsize=(pool[0], pool[1])
    fan_in = numpy.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
              numpy.prod(poolsize))
    W_bound = numpy.sqrt(6. / (fan_in + fan_out))
    # the convolution weight matrix
    convW = theano.shared(numpy.asarray(
           rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
           dtype=theano.config.floatX),
                               borrow=True)
    # the bias is a 1D tensor -- one bias per output feature map
    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
    convB = theano.shared(value=b_values, borrow=True)

    self.layer0a = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0a_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0b = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0b_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)
    self.layer0c = LeNetConvPoolLayer(rng, W=convW, b=convB, input=layer0c_input,
            image_shape=(self.batch_size, 1, ishape[0], ishape[1]),
            filter_shape=filter_shape, poolsize=poolsize)

    #layer0_output = T.concatenate([self.layer0a.output, self.layer0b.output, self.layer0c.output], axis = 3)
    layer0aflattened = self.layer0a.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0bflattened = self.layer0b.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0cflattened = self.layer0c.output.flatten(2).reshape((self.batch_size, nkerns[0] * sizeAfterPooling))
    layer0_output = T.concatenate([layer0aflattened, layer0bflattened, layer0cflattened], axis = 1)

    self.layer1a = HiddenLayer(rng = rng, input = self.yNER1, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh)
    self.layer1b = HiddenLayer(rng = rng, input = self.yNER2, n_in = 6, n_out = hiddenunitsNER, activation = T.tanh, W = self.layer1a.W, b = self.layer1a.b)


    layer2_input = T.concatenate([layer0_output, self.layer1a.output, self.layer1b.output], axis = 1)
    layer2_inputSize = 3 * nkerns[0] * sizeAfterPooling + 2 * hiddenunitsNER

    self.additionalFeatures = T.matrix('additionalFeatures')
    additionalFeatsShaped = self.additionalFeatures.reshape((self.batch_size, 1))
    layer2_input = T.concatenate([layer2_input, additionalFeatsShaped], axis = 1)
    layer2_inputSize += self.addInputSize

    self.layer2 = HiddenLayer(rng, input=layer2_input, n_in=layer2_inputSize,
                         n_out=hiddenunits, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    self.layer3 = LogisticRegression(input=self.layer2.output, n_in=hiddenunits, n_out=23)

    # create a list of all model parameters
    self.paramList = [self.layer3.params, self.layer2.params, self.layer1a.params, self.layer0a.params]
    self.params = []
    for p in self.paramList:
      self.params += p
      logger.info(p)

    if not train:
      self.gotNetwork = 1
      # load parameters
      if not os.path.isfile(networkfile):
        logger.error("network file does not exist")
        self.gotNetwork = 0
      else:
        save_file = open(networkfile, 'rb')
        for p in self.params:
          p.set_value(cPickle.load(save_file), borrow=False)
        save_file.close()
Ejemplo n.º 18
0
def learnAndPredict(Ti, C, TOList):

    rng = np.random.RandomState(SEED)
    learning_rate = learning_rate0
    print np.mean(Ti[1000, :])
    aminW = np.amin(Ti[:1000, :])
    amaxW = np.amax(Ti[:1000, :])
    Ti[:1000, :] = (Ti[:1000, :] - aminW) / (amaxW - aminW)
    astdW = np.std(Ti[:1000, :])
    ameanW = np.mean(Ti[:1000, :])
    Ti[:1000, :] = (Ti[:1000, :] - ameanW) / astdW
    aminacW = np.amin(Ti[1000, :])
    amaxacW = np.amax(Ti[1000, :])
    print aminW, amaxW, aminacW, amaxacW
    Ti[1000, :] = (Ti[1000, :] - aminacW) / (amaxacW - aminacW)
    astdacW = np.std(Ti[1000, :])
    ameanacW = np.mean(Ti[1000, :])
    Ti[1000, :] = (Ti[1000, :] - ameanacW) / astdacW

    ile__ = len(TOList)
    ileList = np.zeros(ile__)
    for titer in range(len(TOList)):
        print np.mean(TOList[titer][1000, :])
        TOList[titer][:1000, :] = (TOList[titer][:1000, :] - aminW) / (amaxW -
                                                                       aminW)
        TOList[titer][:1000, :] = (TOList[titer][:1000, :] - ameanW) / astdW
        TOList[titer][1000, :] = (TOList[titer][1000, :] -
                                  aminacW) / (amaxacW - aminacW)
        TOList[titer][1000, :] = (TOList[titer][1000, :] - ameanacW) / astdacW
        _, ileList[titer] = TOList[titer].shape

    _, ile = Ti.shape
    N = NN

    data = []
    yyy = []
    need = 1
    BYL = {}
    j = 0
    dwa = 0
    ONES = []
    ZEROS = []
    for i in range(NN):
        for j in range(NN):
            if i != j:
                if C[i][j] == 1:
                    ONES.append((i, j))
                else:
                    ZEROS.append((i, j))
    Nones = len(ONES)
    rng.shuffle(ONES)
    Nzeros = len(ZEROS)
    print Nones
    print Nzeros
    Needed = NUM_TRAIN / 2
    onesPerPair = Needed / Nones + 1
    onesIter = 0
    jj = 0
    while jj < NUM_TRAIN:
        if jj % 300000 == 0:
            print jj / 300000,
        need = 1 - need
        if need == 1:
            pairNo = onesIter % Nones
            ppp = onesIter / Nones
            s, t = ONES[pairNo]
            shift = rng.randint(0, ile - L)
            onesIter += 1
        if need == 0:
            zer = rng.randint(Nzeros)
            s, t = ZEROS[zer]
            del ZEROS[zer]
            Nzeros -= 1
            shift = rng.randint(0, ile - L)
        x = np.hstack((Ti[s][shift:shift + L], Ti[t][shift:shift + L],
                       Ti[1000][shift:shift + L]))
        y = C[s][t]
        data.append(x)
        yyy.append(y)
        jj += 1

    data = np.array(data, dtype=theano.config.floatX)
    is_train = np.array(([0] * 96 + [1, 1, 2, 2]) * (NUM_TRAIN / 100))
    yyy = np.array(yyy)

    train_set_x0, train_set_y0 = np.array(
        data[is_train == 0]), yyy[is_train == 0]
    test_set_x, test_set_y = np.array(data[is_train == 1]), yyy[is_train == 1]
    valid_set_x, valid_set_y = np.array(
        data[is_train == 2]), yyy[is_train == 2]
    n_train_batches = len(train_set_y0) / batch_size
    n_valid_batches = len(valid_set_y) / batch_size
    n_test_batches = len(test_set_y) / batch_size
    epoch = T.scalar()
    index = T.lscalar()
    x = T.matrix('x')
    inone2 = T.matrix('inone2')
    y = T.ivector('y')
    print '... building the model'
    #-------- my layers -------------------

    #---------------------
    layer0_input = x.reshape((batch_size, 1, 3, L))
    Cx = 5
    layer0 = ConvolutionalLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 3, L),
                                filter_shape=(nkerns[0], 1, 2, Cx),
                                poolsize=(1, 1),
                                fac=0)
    ONE = (3 - 2 + 1) / 1
    L2 = (L - Cx + 1) / 1
    #---------------------
    Cx2 = 5
    layer1 = ConvolutionalLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], ONE, L2),
                                filter_shape=(nkerns[1], nkerns[0], 2, Cx2),
                                poolsize=(1, 1),
                                activation=ReLU,
                                fac=0)
    ONE = (ONE - 2 + 1) / 1
    L3 = (L2 - Cx2 + 1) / 1
    #---------------------
    Cx3 = 1
    layer1b = ConvolutionalLayer(rng,
                                 input=layer1.output,
                                 image_shape=(batch_size, nkerns[1], ONE, L3),
                                 filter_shape=(nkerns[2], nkerns[1], 1, Cx3),
                                 poolsize=(1, POOL),
                                 activation=ReLU,
                                 fac=0)
    ONE = (ONE - 1 + 1) / 1
    L4 = (L3 - Cx3 + 1) / POOL

    REGx = 100
    #---------------------
    layer2_input = layer1b.output.flatten(2)
    print layer2_input.shape
    use_b = False
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[2] * L4,
                         n_out=REGx,
                         activation=T.tanh,
                         use_bias=use_b)
    layer3 = LogisticRegression(input=layer2.output, n_in=REGx, n_out=2)

    cost = layer3.negative_log_likelihood(y)
    out_x2 = theano.shared(
        np.asarray(np.zeros((N, L)), dtype=theano.config.floatX))
    inone2 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    inone3 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    inone4 = theano.shared(
        np.asarray(np.zeros((1, L)), dtype=theano.config.floatX))
    test_set_x = theano.shared(
        np.asarray(test_set_x, dtype=theano.config.floatX))
    train_set_x = theano.shared(
        np.asarray(train_set_x0, dtype=theano.config.floatX))
    train_set_y = T.cast(
        theano.shared(np.asarray(train_set_y0, dtype=theano.config.floatX)),
        'int32')
    test_set_y = T.cast(
        theano.shared(np.asarray(test_set_y, dtype=theano.config.floatX)),
        'int32')
    valid_set_y = T.cast(
        theano.shared(np.asarray(valid_set_y, dtype=theano.config.floatX)),
        'int32')
    valid_set_x = theano.shared(
        np.asarray(valid_set_x, dtype=theano.config.floatX))

    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    mom_start = 0.5
    mom_end = 0.98
    mom_epoch_interval = n_epochs * 1.0
    #### @@@@@@@@@@@
    class_params0 = [layer3, layer2, layer1, layer1b, layer0]
    class_params = [param for layer in class_params0 for param in layer.params]

    gparams = []
    for param in class_params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)
    gparams_mom = []
    for param in class_params:
        gparam_mom = theano.shared(
            np.zeros(param.get_value(borrow=True).shape,
                     dtype=theano.config.floatX))
        gparams_mom.append(gparam_mom)
    mom = ifelse(
        epoch < mom_epoch_interval,
        mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end *
        (epoch / mom_epoch_interval), mom_end)
    updates = OrderedDict()
    for gparam_mom, gparam in zip(gparams_mom, gparams):
        updates[gparam_mom] = mom * gparam_mom - (1. -
                                                  mom) * learning_rate * gparam
    for param, gparam_mom in zip(class_params, gparams_mom):
        stepped_param = param + updates[gparam_mom]
        squared_filter_length_limit = 15.0
        if param.get_value(borrow=True).ndim == 2:
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0,
                                   T.sqrt(squared_filter_length_limit))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param

    output = cost
    train_model = theano.function(
        inputs=[epoch, index],
        outputs=output,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    keep = theano.function(
        [index],
        layer3.errorsFull(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        on_unused_input='warn')

    timer = time.clock()
    print "finished reading", (timer - start_time0) / 60., "minutes "

    # TRAIN MODEL #
    print '... training'
    validation_frequency = n_train_batches
    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    epochc = 0

    while (epochc < n_epochs):
        epochc = epochc + 1
        learning_rate = learning_rate0 * (1.2 - ((1.0 * epochc) / n_epochs))
        for minibatch_index in xrange(n_train_batches):
            iter = (epochc - 1) * n_train_batches + minibatch_index
            cost_ij = train_model(epochc, minibatch_index)
            if (iter + 1) % validation_frequency == 0:
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print(' %i) err %.2f ' % (epochc, this_validation_loss / 10)
                      ), L, nkerns, REGx, "|", Cx, Cx2, Cx3, batch_size
                if this_validation_loss < best_validation_loss or epochc % 30 == 0:
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(
                        ('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') % (epochc, minibatch_index + 1,
                                           n_train_batches, test_score / 10))
    ############
    timel = time.clock()
    print "finished learning", (timel - timer) / 60., "minutes "
    ppm = theano.function(
        [index],
        layer3.pred_proba_mine(),
        givens={
            x:
            T.horizontal_stack(
                T.tile(inone2, (batch_size, 1)),
                out_x2[index * batch_size:(index + 1) * batch_size],
                T.tile(inone3, (batch_size, 1))),
            y:
            train_set_y[0 * (batch_size):(0 + 1) * (batch_size)]
        },
        on_unused_input='warn')

    NONZERO = (N * N - N)
    gc.collect()
    RESList = [np.zeros((N, N)) for it in range(ile__)]
    for __net in range(ile__):
        TO = TOList[__net]
        ileO = ileList[__net]
        RES = RESList[__net]
        shift = 0.1
        DELTAshift = (ileO - L) / (Q - 1)
        print "DELTAshift:", DELTAshift
        for q in range(Q):
            dataO = []
            print(q + 1), "/", Q, "  ",
            out_x2.set_value(
                np.asarray(np.array(TO[:, shift:shift + L]),
                           dtype=theano.config.floatX))
            PARTIAL = np.zeros((N, N))
            inone3.set_value(
                np.asarray(np.array(TO[1000][shift:shift + L]).reshape(1, L),
                           dtype=theano.config.floatX))
            for i in range(N):
                inone2.set_value(
                    np.asarray(np.array(TO[i][shift:shift + L]).reshape(1, L),
                               dtype=theano.config.floatX))
                p = [ppm(ii) for ii in xrange(N / batch_size)]
                for pos in range(N):
                    if pos != i:
                        PARTIAL[i][pos] += p[pos / batch_size][pos %
                                                               batch_size][1]
            for i in range(N):
                for j in range(N):
                    RES[i][j] += PARTIAL[i][j]
            shift += DELTAshift
        print "Finished", __net
        RESList[__net] = RES / np.max(RES)
        gc.collect()

    end_time = time.clock()
    print "finished predicting", (end_time - timel) / 60., "minutes ", str(
        nkerns), "using SEED = ", SEED
    print('The code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time0) / 60.))
    return RESList
Ejemplo n.º 19
0
def train_rep(
    learning_rate=0.002,
    L1_reg=0.0002,
    L2_reg=0.005,
    n_epochs=200,
    nkerns=[20, 50],
    batch_size=25,
):

    rng = numpy.random.RandomState(23455)

    train_dir = "../out/h5/"
    valid_dir = "../out/h5/"

    weights_dir = "./weights/"

    print("... load input data")
    filename = train_dir + "rep_train_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    train_set_x, train_set_y, shared_train_set_y = datasets

    filename = valid_dir + "rep_valid_data_1.gzip.h5"
    datasets = load_initial_data(filename)
    valid_set_x, valid_set_y, shared_valid_set_y = datasets

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets

    # compute number of minibatches for training, validation and testing
    n_all_train_batches = 30000
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_all_train_batches /= batch_size
    n_train_batches /= batch_size
    n_valid_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) / 2
    layer1_h = (layer0_h - 4) / 2
    layer2_w = (layer1_w - 2) / 2
    layer2_h = (layer1_h - 2) / 2
    layer3_w = (layer2_w - 2) / 2
    layer3_h = (layer2_h - 2) / 2

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # image sizes
    batchsize = batch_size
    in_channels = 20
    in_width = 50
    in_height = 50
    # filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=signals_shape,
        filter_shape=filters_shape,
        poolsize=(2, 2),
    )

    # TODO: incase of flt_time < in_time the output dimension will be different
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, flt_channels, layer1_w, layer1_h),
        filter_shape=(60, flt_channels, 3, 3),
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = (
        layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    )

    # symbolic Theano variable that represents the L1 regularization term
    L1 = (
        T.sum(abs(layer4.params[0]))
        + T.sum(abs(layer3.params[0]))
        + T.sum(abs(layer2.params[0]))
        + T.sum(abs(layer1.params[0]))
        + T.sum(abs(layer0.params[0]))
    )
    # symbolic Theano variable that represents the squared L2 term
    L2_sqr = (
        T.sum(layer4.params[0] ** 2)
        + T.sum(layer3.params[0] ** 2)
        + T.sum(layer2.params[0] ** 2)
        + T.sum(layer1.params[0] ** 2)
        + T.sum(layer0.params[0] ** 2)
    )
    # the loss
    cost = layer4.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")

    start_time = time.clock()

    epoch = 0
    done_looping = False
    cost_ij = 0
    train_files_num = 600
    val_files_num = 100

    startc = time.clock()
    while (epoch < n_epochs) and (not done_looping):
        endc = time.clock()
        print(("epoch %i, took %.2f minutes" % (epoch, (endc - startc) / 60.0)))
        startc = time.clock()
        epoch = epoch + 1
        for nTrainSet in range(1, train_files_num + 1):
            # load next train data
            if nTrainSet % 50 == 0:
                print("training @ nTrainSet =  ", nTrainSet, ", cost = ", cost_ij)
            filename = train_dir + "rep_train_data_" + str(nTrainSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_train_set_x, ns_train_set_y = datasets
            train_set_x.set_value(ns_train_set_x, borrow=True)
            shared_train_set_y.set_value(
                numpy.asarray(ns_train_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_train_batches = train_set_x.get_value(borrow=True).shape[0]
            n_train_batches /= batch_size

            # train
            for minibatch_index in range(n_train_batches):

                # training itself
                # --------------------------------------
                cost_ij = train_model(minibatch_index)
                # -------------------------

        # at the end of each epoch run validation
        this_validation_loss = 0
        for nValSet in range(1, val_files_num + 1):
            filename = valid_dir + "rep_valid_data_" + str(nValSet) + ".gzip.h5"
            datasets = load_next_data(filename)
            ns_valid_set_x, ns_valid_set_y = datasets
            valid_set_x.set_value(ns_valid_set_x, borrow=True)
            shared_valid_set_y.set_value(
                numpy.asarray(ns_valid_set_y, dtype=theano.config.floatX), borrow=True
            )
            n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
            n_valid_batches /= batch_size

            # compute zero-one loss on validation set
            validation_losses = [validate_model(i) for i in range(n_valid_batches)]
            this_validation_loss += numpy.mean(validation_losses)
        this_validation_loss /= val_files_num
        print((
            "epoch %i, minibatch %i/%i, validation error %f %%"
            % (
                epoch,
                minibatch_index + 1,
                n_train_batches,
                this_validation_loss * 100.0,
            )
        ))

        # save snapshots
        print("saving weights state, epoch = ", epoch)
        f = file(weights_dir + "weights_epoch" + str(epoch) + ".save", "wb")
        state_L0 = layer0.__getstate__()
        pickle.dump(state_L0, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L1 = layer1.__getstate__()
        pickle.dump(state_L1, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L2 = layer2.__getstate__()
        pickle.dump(state_L2, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L3 = layer3.__getstate__()
        pickle.dump(state_L3, f, protocol=pickle.HIGHEST_PROTOCOL)
        state_L4 = layer4.__getstate__()
        pickle.dump(state_L4, f, protocol=pickle.HIGHEST_PROTOCOL)
        f.close()

    end_time = time.clock()
    print ("Optimization complete.")
    print((
        "The code for file "
        + os.path.split(__file__)[1]
        + " ran for %.2fm" % ((end_time - start_time) / 60.0)
    ), file=sys.stderr)
Ejemplo n.º 20
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, emb=None):
        if trained_model is not None:
            param_dic = {'nums_chars': self.nums_chars, 'nums_tags': self.nums_tags, 'crf': self.crf, 'emb_dim': emb_dim,
                         'gru': gru, 'rnn_dim': rnn_dim, 'rnn_num': rnn_num, 'drop_out': drop_out, 'buckets_char': self.buckets_char,
                         'ngram': self.ngram, 'is_space': self.is_space, 'sent_seg': self.sent_seg, 'emb_path': self.emb_path,
                         'tag_scheme': self.tag_scheme}
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        self.emb_layer = EmbeddingLayer(self.nums_chars + 20, emb_dim, weights=emb, name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 5000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags, activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            word_out = self.emb_layer(input_v)
            emb_set.append(word_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)

            else:
                emb_out = emb_set[0]

            emb_out = DropoutLayer(dr)(emb_out)
            emb_out = tf.unpack(emb_out)

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)
            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Ejemplo n.º 21
0
    def build_symbolic_graph(self):
        # allocate symbolic variables (defaults to floatX)
        self.x1 = T.matrix('x1')  # context
        self.x2 = T.matrix('x2')  # partial trace
        if self.target_is_int:
            self.y = T.ivector('y')  # next pixel
        else:
            self.y = T.matrix('y')
        self.z = T.matrix('z')  #reconstruction of trace (full trace)

        # make bad outputs
        bad_outputs = make_bad_outs(self.n_trace, self.x2)

        # assemble layers
        self.hidden_layers = []

        hidden_layer_1a = HiddenLayer(self.rng,
                                      input=self.x1,
                                      n_in=self.n_context,
                                      n_out=self.n_h[0],
                                      dropout=self.dropout,
                                      activation=self.activations[0],
                                      params=self.params_init[0])
        hidden_layer_1b = HiddenLayer(self.rng,
                                      input=self.x2,
                                      n_in=self.n_trace,
                                      n_out=self.n_h[1],
                                      dropout=self.dropout,
                                      activation=self.activations[1],
                                      params=self.params_init[1])

        input_to_h2 = T.concatenate(
            [hidden_layer_1a.output, hidden_layer_1b.output], axis=1)

        hidden_layer_2 = HiddenLayer(self.rng,
                                     input=input_to_h2,
                                     n_in=self.n_h[0] + self.n_h[1],
                                     n_out=self.n_h[2],
                                     dropout=self.dropout,
                                     activation=self.activations[2],
                                     params=self.params_init[2])

        recon_layer = OutputLayer(self.rng,
                                  input=hidden_layer_2.output,
                                  n_in=self.n_h[2],
                                  n_out=self.n_recon,
                                  non_linearities=self.activations[3],
                                  params=params_init[3])

        if not self.n_out == 0:
            pred_layer = OutputLayer(self.rng,
                                     input=recon_layer.output,
                                     n_in=self.n_recon,
                                     n_out=self.n_out,
                                     bad_output=bad_outputs,
                                     non_linearities=self.activations[4],
                                     params=self.params_init[4])

        if self.n_out == 0:
            self.layers = [
                hidden_layer_1a, hidden_layer_1b, hidden_layer_2, recon_layer
            ]
        else:
            self.layers = [
                hidden_layer_1a, hidden_layer_1b, hidden_layer_2, recon_layer,
                pred_layer
            ]
Ejemplo n.º 22
0
    def __init__(self, numpy_rng = numpy.random.RandomState(2**30), theano_rng=None, n_ins=601,
                 n_outs=259, l1_reg = None, l2_reg = None, 
                 hidden_layers_sizes= [256, 256, 256, 256, 256], 
                 hidden_activation='tanh', output_activation='sigmoid'):
        
        print "DNN Initialisation"
        #logger = logging.getLogger("DNN initialization")

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)
        
        self.n_ins = n_ins
        self.n_outs = n_outs
        #self.speaker_ID = []
        
        self.output_activation = output_activation

        self.l1_reg = l1_reg
        self.l2_reg = l2_reg       
        #vctk_class = Code_01.VCTK_feat_collection()
        
        assert self.n_layers > 0
        
        if not theano_rng:
            theano_rng = RandomStreams(numpy.random.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.matrix('y') 
        
        
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.tanh)  ##T.nnet.sigmoid)  # 
           
           
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params) 
            self.delta_params.extend(sigmoid_layer.delta_params)
         
     
        # add final layer
        if self.output_activation == 'linear':
            self.final_layer = LinearLayer(rng = numpy_rng,
                                           input=self.sigmoid_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
            
        elif self.output_activation == 'sigmoid':
            self.final_layer = SigmoidLayer(
                 rng = numpy_rng,
                 input=self.sigmoid_layers[-1].output,
                 n_in=hidden_layers_sizes[-1],
                 n_out=n_outs, activation=T.nnet.sigmoid)
        else:
            print ("This output activation function: %s is not supported right now!" %(self.output_activation))
            sys.exit(1)

        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)
    
        ### MSE
        self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        ### L1-norm
        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        ### L2-norm
        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()  
Ejemplo n.º 23
0
    def __init__(self, nkerns=[48, 48, 48, 48], miniBatchSize=200):
        rng = numpy.random.RandomState(23455)
        nClasses = 2
        nMaxPool = 2
        nHidden = 200

        self.p = 95
        #self.x = T.tensor3('x')     # membrane data set
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # labels := 1D vector of [int] labels
        self.miniBatchSize = miniBatchSize

        # Reshape matrix of rasterized images # to a 4D tensor,
        # compatible with the LeNetConvPoolLayer
        #layer0_input = self.x.reshape((self.miniBatchSize, 1, self.p, self.p))
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 95 -> 92 -> 46
        #--------------------------------------------------
        fs0 = 4  # filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 46)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, 1, self.p,
                                                 self.p),
                                    filter_shape=(nkerns[0], 1, fs0, fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 46 -> 42 -> 21
        #--------------------------------------------------
        fs1 = 5  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 21)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 21 -> 18 -> 9
        #--------------------------------------------------
        fs2 = 4
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 9)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # layer3 convolution+max pool reduces image dimensions by:
        # 9 -> 6 -> 3
        #--------------------------------------------------
        fs3 = 4
        os3 = (os2 - fs3 + 1) / nMaxPool
        assert (os3 == 3)
        layer3 = LeNetConvPoolLayer(rng,
                                    input=layer2.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os2, os2),
                                    filter_shape=(nkerns[3], nkerns[2], fs3,
                                                  fs3),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 4
        # Fully connected sigmoidal layer, goes from
        # 3*3*48 ~ 450 -> 200
        #--------------------------------------------------
        layer4_input = layer3.output.flatten(2)
        layer4 = HiddenLayer(rng,
                             input=layer4_input,
                             n_in=nkerns[3] * os3 * os3,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 5
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer5 = LogisticRegression(input=layer4.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4, layer5)
Ejemplo n.º 24
0
if __name__ == "__main__":
    data_dir = '/home/mmay/data/mnist'
    trX, _, teX, _ = load_mnist(data_dir)

    augmenter = SaltAndPepper(low=0.,high=1.,p_corrupt=0.5)

    bce = T.nnet.binary_crossentropy
    # Factor out trainer
    # Generalize to multiple layers
    n_vis=784
    n_hidden=2000
    batch_size = 128
    activation = T.nnet.sigmoid
    layers = [
        InputLayer(n_vis,batch_size=batch_size,augmenter=augmenter),
        HiddenLayer(n_hidden, activation),
        HiddenLayer(n_vis, activation)
    ]


    lr_scheduler = ExponentialDecay(value=0.1, decay=0.99)
    trainer = Momentum(lr=lr_scheduler, m=0.9)

    model = AutoEncoder(n_vis=n_vis, layers=layers, trainer=trainer, loss=bce, batch_size=batch_size, n_batches=32, n_epochs=100, lr_decay=0.99)
    model.fit(trX, teX)

    w1 = model.layers[1].W.get_value().T
    w2 = model.layers[2].W.get_value()
    pred = model.predict(teX)

    grayscale_grid_vis(pred[:100],transform=lambda x:unit_scale(x.reshape(28,28)),show=True)
Ejemplo n.º 25
0
    def __init__(self,
                 nkerns=[48, 48, 48],
                 miniBatchSize=200,
                 nHidden=200,
                 nClasses=2,
                 nMaxPool=2,
                 nChannels=1):
        """
        nClasses : the number of target classes (e.g. 2 for binary classification)
        nMaxPool : number of pixels to max pool
        nChannels : number of input channels (e.g. 1 for single grayscale channel)
        """
        rng = numpy.random.RandomState(23455)

        self.p = 65
        self.miniBatchSize = miniBatchSize

        # Note: self.x and self.y will be re-bound to a subset of the
        # training/validation/test data dynamically by the update
        # stage of the appropriate function.
        self.x = T.tensor4('x')  # membrane mini-batch
        self.y = T.ivector('y')  # 1D vector of [int] labels

        # We now assume the input will already be reshaped to the
        # proper size (i.e. we don't need a theano resize op here).
        layer0_input = self.x

        #--------------------------------------------------
        # LAYER 0
        # layer0 convolution+max pool reduces image dimensions by:
        # 65 -> 62 -> 31
        #--------------------------------------------------
        fs0 = 4  # conv. filter size, layer 0
        os0 = (self.p - fs0 + 1) / nMaxPool  # image out size 0
        assert (os0 == 31)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(self.miniBatchSize, nChannels,
                                                 self.p, self.p),
                                    filter_shape=(nkerns[0], nChannels, fs0,
                                                  fs0),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 1
        # layer1 convolution+max pool reduces image dimensions by:
        # 31 -> 28 -> 14
        #--------------------------------------------------
        fs1 = 4  # filter size, layer 1
        os1 = (os0 - fs1 + 1) / nMaxPool  # image out size 1
        assert (os1 == 14)
        layer1 = LeNetConvPoolLayer(rng,
                                    input=layer0.output,
                                    image_shape=(self.miniBatchSize, nkerns[0],
                                                 os0, os0),
                                    filter_shape=(nkerns[1], nkerns[0], fs1,
                                                  fs1),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 2
        # layer2 convolution+max pool reduces image dimensions by:
        # 14 -> 10 -> 5
        #--------------------------------------------------
        fs2 = 5
        os2 = (os1 - fs2 + 1) / nMaxPool
        assert (os2 == 5)
        layer2 = LeNetConvPoolLayer(rng,
                                    input=layer1.output,
                                    image_shape=(self.miniBatchSize, nkerns[1],
                                                 os1, os1),
                                    filter_shape=(nkerns[2], nkerns[1], fs2,
                                                  fs2),
                                    poolsize=(nMaxPool, nMaxPool))

        #--------------------------------------------------
        # LAYER 3
        # Fully connected sigmoidal layer, goes from
        # 5*5*48  -> 200
        #--------------------------------------------------
        layer3_input = layer2.output.flatten(2)
        layer3 = HiddenLayer(rng,
                             input=layer3_input,
                             n_in=nkerns[2] * os2 * os2,
                             n_out=nHidden,
                             activation=T.tanh)

        #--------------------------------------------------
        # LAYER 4
        # Classification via a logistic regression layer
        # 200 -> 2
        #--------------------------------------------------
        # classify the values of the fully-connected sigmoidal layer
        layer4 = LogisticRegression(input=layer3.output,
                                    n_in=nHidden,
                                    n_out=nClasses)

        self.layers = (layer0, layer1, layer2, layer3, layer4)
def prepare_network():

    rng = numpy.random.RandomState(23455)

    print('Preparing Theano model...')

    mydatasets = load_initial_test_data()
    test_set_x, test_set_y, shared_test_set_y, valid_ds = mydatasets
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    # allocate symbolic variables for the data
    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    # image size
    layer0_w = 50
    layer0_h = 50
    layer1_w = (layer0_w - 4) // 2
    layer1_h = (layer0_h - 4) // 2
    layer2_w = (layer1_w - 2) // 2
    layer2_h = (layer1_h - 2) // 2
    layer3_w = (layer2_w - 2) // 2
    layer3_h = (layer2_h - 2) // 2

    ######################
    # BUILD NETWORK #
    ######################
    # image sizes
    batchsize = 1
    in_channels = 20
    in_width = 50
    in_height = 50
    #filter sizes
    flt_channels = 40
    flt_time = 20
    flt_width = 5
    flt_height = 5

    signals_shape = (batchsize, in_channels, in_height, in_width)
    filters_shape = (flt_channels, in_channels, flt_height, flt_width)

    layer0_input = x.reshape(signals_shape)

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=signals_shape,
                                filter_shape=filters_shape,
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batchsize, flt_channels, layer1_w,
                                             layer1_h),
                                filter_shape=(60, flt_channels, 3, 3),
                                poolsize=(2, 2))

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batchsize, 60, layer2_w,
                                             layer2_h),
                                filter_shape=(90, 60, 3, 3),
                                poolsize=(2, 2))
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=90 * layer3_w * layer3_h,
                         n_out=500,
                         activation=T.tanh)

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batchsize:(index + 1) * batchsize],
            y: test_set_y[index * batchsize:(index + 1) * batchsize]
        })

    print('Loading network weights...')
    weightFile = '../live_count/weights.save'
    f = open(weightFile, 'rb')
    loaded_objects = []
    for i in range(5):
        loaded_objects.append(pickle.load(f))
    f.close()
    layer0.__setstate__(loaded_objects[0])
    layer1.__setstate__(loaded_objects[1])
    layer2.__setstate__(loaded_objects[2])
    layer3.__setstate__(loaded_objects[3])
    layer4.__setstate__(loaded_objects[4])

    return test_set_x, test_set_y, shared_test_set_y, valid_ds, classify, batchsize
Ejemplo n.º 27
0
    def main_graph(self, trained_model, scope, emb_dim, gru, rnn_dim, rnn_num, drop_out=0.5, rad_dim=30, emb=None, ng_embs=None, pixels=None, con_width=None, filters=None, pooling_size=None):
        if trained_model is not None:
            param_dic = {}
            param_dic['nums_chars'] = self.nums_chars
            param_dic['nums_tags'] = self.nums_tags
            param_dic['tag_scheme'] = self.tag_scheme
            param_dic['graphic'] = self.graphic
            param_dic['pic_size'] = self.pic_size
            param_dic['word_vec'] = self.word_vec
            param_dic['radical'] = self.radical
            param_dic['crf'] = self.crf
            param_dic['emb_dim'] = emb_dim
            param_dic['gru'] = gru
            param_dic['rnn_dim'] = rnn_dim
            param_dic['rnn_num'] = rnn_num
            param_dic['drop_out'] = drop_out
            param_dic['filter_size'] = con_width
            param_dic['filters'] = filters
            param_dic['pooling_size'] = pooling_size
            param_dic['font'] = self.font
            param_dic['buckets_char'] = self.buckets_char
            param_dic['ngram'] = self.ngram
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables

        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.drop_out = dr
        self.drop_out_v = drop_out

        if self.word_vec:
            self.emb_layer = EmbeddingLayer(self.nums_chars + 500, emb_dim, weights=emb, name='emb_layer')

        if self.radical:
            self.radical_layer = EmbeddingLayer(216, rad_dim, name='radical_layer')

        if self.ngram is not None:
            if ng_embs is not None:
                assert len(ng_embs) == len(self.ngram)
            else:
                ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(EmbeddingLayer(n_gram + 1000 * (i + 2), emb_dim, weights=ng_embs[i], name= str(i + 2) + 'gram_layer'))

        wrapper_conv_1, wrapper_mp_1, wrapper_conv_2, wrapper_mp_2, wrapper_dense, wrapper_dr = None, None, None, None, None, None

        if self.graphic:
            self.input_p = []
            assert pixels is not None and filters is not None and pooling_size is not None and con_width is not None

            self.pixels = pixels
            pixel_dim = int(math.sqrt(len(pixels[0])))

            wrapper_conv_1 = TimeDistributed(Convolution(con_width, 1, filters, name='conv_1'), name='wrapper_c1')
            wrapper_mp_1 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_1'), name='wrapper_p1')

            p_size_1 = toolbox.down_pool(pixel_dim, pooling_size)

            wrapper_conv_2 = TimeDistributed(Convolution(con_width, filters, filters, name='conv_2'), name='wrapper_c2')
            wrapper_mp_2 = TimeDistributed(Maxpooling(pooling_size, pooling_size, name='pooling_2'), name='wrapper_p2')

            p_size_2 = toolbox.down_pool(p_size_1, pooling_size)

            wrapper_dense = TimeDistributed(HiddenLayer(p_size_2 * p_size_2 * filters, 100, activation='tanh', name='conv_dense'), name='wrapper_3')
            wrapper_dr = TimeDistributed(DropoutLayer(self.drop_out), name='wrapper_dr')

        with tf.variable_scope('BiRNN'):

            if gru:
                fw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
                bw_rnn_cell = tf.nn.rnn_cell.GRUCell(rnn_dim)
            else:
                fw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_dim, state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([fw_rnn_cell]*rnn_num, state_is_tuple=True)
                bw_rnn_cell = tf.nn.rnn_cell.MultiRNNCell([bw_rnn_cell]*rnn_num, state_is_tuple=True)

        output_wrapper = TimeDistributed(HiddenLayer(rnn_dim * 2, self.nums_tags[0], activation='linear', name='hidden'), name='wrapper')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()

            input_v = tf.placeholder(tf.int32, [None, bucket], name='input_' + str(bucket))

            self.input_v.append([input_v])

            emb_set = []

            if self.word_vec:
                word_out = self.emb_layer(input_v)
                emb_set.append(word_out)

            if self.radical:
                input_r = tf.placeholder(tf.int32, [None, bucket], name='input_r' + str(bucket))

                self.input_v[-1].append(input_r)
                radical_out = self.radical_layer(input_r)
                emb_set.append(radical_out)

            if self.ngram is not None:
                for i in range(len(self.ngram)):
                    input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    self.input_v[-1].append(input_g)
                    gram_out = self.gram_layers[i](input_g)
                    emb_set.append(gram_out)

            if self.graphic:
                input_p = tf.placeholder(tf.float32, [None, bucket, pixel_dim*pixel_dim])
                self.input_p.append(input_p)

                pix_out = tf.reshape(input_p, [-1, bucket, pixel_dim, pixel_dim, 1])
                pix_out = tf.unpack(pix_out, axis=1)

                conv_out_1 = wrapper_conv_1(pix_out)
                pooling_out_1 = wrapper_mp_1(conv_out_1)

                conv_out_2 = wrapper_conv_2(pooling_out_1)
                pooling_out_2 = wrapper_mp_2(conv_out_2)

                assert p_size_2 == pooling_out_2[0].get_shape().as_list()[1]
                pooling_out = tf.reshape(pooling_out_2, [-1, bucket, p_size_2 * p_size_2 * filters])
                pooling_out = tf.unpack(pooling_out, axis=1)

                graphic_out = wrapper_dense(pooling_out)
                graphic_out = wrapper_dr(graphic_out)

                emb_set.append(graphic_out)


            if len(emb_set) > 1:
                emb_out = tf.concat(2, emb_set)
                emb_out = tf.unpack(emb_out)

            else:
                emb_out = emb_set[0]

            rnn_out = BiLSTM(rnn_dim, fw_cell=fw_rnn_cell, bw_cell=bw_rnn_cell, p=dr, name='BiLSTM' + str(bucket), scope='BiRNN')(emb_out, input_v)

            output = output_wrapper(rnn_out)

            output_c = tf.pack(output, axis=1)

            self.output.append([output_c])

            self.output_.append([tf.placeholder(tf.int32, [None, bucket], name='tags' + str(bucket))])

            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v) == len(self.output) and len(self.output) == len(self.output_) and len(self.output) == len(self.counts)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Ejemplo n.º 28
0
    def build_model(self, flag_preserve_params=False):

        logging.info('... building the model')

        # allocate symbolic variables for the data
        self.index = T.lscalar()  # index to a [mini]batch
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(rng=self.rng,
                                       input=self.x,
                                       n_in=self.n_in,
                                       n_out=self.n_hidden,
                                       activation=self.hidden_activation)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=self.n_hidden,
            n_out=self.n_out,
            activation=self.logreg_activation)

        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = abs(self.hiddenLayer.W).sum() \
            + abs(self.logRegressionLayer.W).sum()

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
            + (self.logRegressionLayer.W ** 2).sum()

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

        self.cost = self.negative_log_likelihood(self.y) \
            + self.alpha_l1 * self.L1 \
            + self.alpha_l2 * self.L2_sqr

        self.grads = T.grad(self.cost, self.params)

        # fixed batch size based prediction
        self.predict_proba_batch = theano.function(
            [self.x], self.logRegressionLayer.p_y_given_x)
        self.predict_batch = theano.function(
            [self.x], T.argmax(self.logRegressionLayer.p_y_given_x, axis=1))
        self.predict_cost_batch = theano.function([self.x, self.y],
                                                  self.cost,
                                                  allow_input_downcast=True)
Ejemplo n.º 29
0
        self.params = []
        for l in self.layers:
            self.params += l.params


# Tensor variables for the message and key
msg_in = T.matrix('msg_in')
key = T.matrix('key')

# Alice's input is the concatenation of the message and the key
alice_in = T.concatenate([msg_in, key], axis=1)

# Alice's hidden layer
alice_hid = HiddenLayer(alice_in,
                        input_size=msg_len + key_len,
                        hidden_size=msg_len + key_len,
                        name='alice_to_hid',
                        act_fn='relu')
if skip_conv:
    alice_conv = HiddenLayer(alice_hid,
                             input_size=msg_len + key_len,
                             hidden_size=msg_len,
                             name='alice_hid_to_comm',
                             act_fn='tanh')
    alice_comm = alice_conv.output
else:
    # Reshape the output of Alice's hidden layer for convolution
    alice_conv_in = alice_hid.output.reshape(
        (batch_size, 1, msg_len + key_len, 1))
    # Alice's convolutional layers
    alice_conv = StandardConvSetup(alice_conv_in, 'alice')
Ejemplo n.º 30
0
        poolsize=(2, 2),
    )

    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, 60, layer2_w, layer2_h),
        filter_shape=(90, 60, 3, 3),
        poolsize=(2, 2),
    )
    layer3_input = layer2.output.flatten(2)

    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=90 * layer3_w * layer3_h,
        n_out=500,
        activation=T.tanh,
    )

    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=8)

    cost = layer4.negative_log_likelihood(y)

    classify = theano.function(
        [index],
        outputs=layer4.get_output_labels(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
        },
Ejemplo n.º 31
0
    def main_graph(self,
                   trained_model,
                   scope,
                   emb_dim,
                   cell,
                   rnn_dim,
                   rnn_num,
                   drop_out=0.5,
                   emb=None):
        if trained_model is not None:
            param_dic = {
                'nums_chars': self.nums_chars,
                'nums_tags': self.nums_tags,
                'crf': self.crf,
                'emb_dim': emb_dim,
                'cell': cell,
                'rnn_dim': rnn_dim,
                'rnn_num': rnn_num,
                'drop_out': drop_out,
                'buckets_char': self.buckets_char,
                'ngram': self.ngram,
                'is_space': self.is_space,
                'sent_seg': self.sent_seg,
                'emb_path': self.emb_path,
                'tag_scheme': self.tag_scheme
            }
            #print param_dic
            f_model = open(trained_model, 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()

        # define shared weights and variables
        batch_size_h = tf.placeholder(tf.int32, [], name='batch_size_holder')
        dr = tf.placeholder(tf.float32, [], name='drop_out_holder')
        self.batch_size_h = batch_size_h
        self.drop_out = dr
        self.drop_out_v = drop_out
        # pdb.set_trace()
        self.emb_layer = EmbeddingLayer(self.nums_chars + 20,
                                        emb_dim,
                                        weights=emb,
                                        name='emb_layer')

        if self.ngram is not None:
            ng_embs = [None for _ in range(len(self.ngram))]
            for i, n_gram in enumerate(self.ngram):
                self.gram_layers.append(
                    EmbeddingLayer(n_gram + 5000 * (i + 2),
                                   emb_dim,
                                   weights=ng_embs[i],
                                   name=str(i + 2) + 'gram_layer'))

        with tf.variable_scope('BiRNN'):

            if cell == 'gru':
                fw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #forward
                bw_rnn_cell = tf.contrib.rnn.GRUCell(rnn_dim)  #backward
            else:
                fw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.LSTMCell(rnn_dim,
                                                      state_is_tuple=True)

            if rnn_num > 1:
                fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([fw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)
                bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([bw_rnn_cell] *
                                                          rnn_num,
                                                          state_is_tuple=True)

        output_wrapper = HiddenLayer(rnn_dim * 2,
                                     self.nums_tags,
                                     activation='linear',
                                     name='hidden')

        #define model for each bucket
        for idx, bucket in enumerate(self.buckets_char):
            if idx == 1:
                scope.reuse_variables()
            t1 = time()
            batch_size = self.real_batches[idx]

            input_v1 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_1' + str(bucket))
            input_v2 = tf.placeholder(tf.int32, [None, bucket],
                                      name='input_2' + str(bucket))
            self.input_v1.append([input_v1])
            self.input_v2.append([input_v2])
            #output = None
            output = []
            for i in range(self.num_gpus):
                with tf.device('/gpu:{}'.format(i)):
                    input_1 = input_v1[i * batch_size_h:(i + 1) * batch_size_h]

                    input_2 = input_v2[i * batch_size_h:(i + 1) * batch_size_h]

                    emb_set1 = []
                    emb_set2 = []

                    word_out1 = self.emb_layer(input_1)
                    word_out2 = self.emb_layer(input_2)
                    emb_set1.append(word_out1)
                    emb_set2.append(word_out2)

                    # if self.ngram is not None:
                    # 	for i in range(len(self.ngram)):
                    # 		input_g = tf.placeholder(tf.int32, [None, bucket], name='input_g' + str(i) + str(bucket))
                    # 		self.input_v[-1].append(input_g)
                    # 		gram_out = self.gram_layers[i](input_g)
                    # 		emb_set.append(gram_out)

                    if len(emb_set1) > 1:
                        emb_out1 = tf.concat(axis=2, values=emb_set1)
                        emb_out2 = tf.concat(axis=2, values=emb_set2)

                    else:
                        emb_out1 = emb_set1[0]
                        emb_out2 = emb_set2[0]

                    emb_out1 = DropoutLayer(dr)(emb_out1)
                    emb_out2 = DropoutLayer(dr)(emb_out2)

                    rnn_out = BiLSTM(rnn_dim,
                                     fw_cell=fw_rnn_cell,
                                     bw_cell=bw_rnn_cell,
                                     p=dr,
                                     name='BiLSTM' + str(bucket),
                                     scope='BiRNN')(emb_out1, emb_out2,
                                                    input_v1)

                    output_g = output_wrapper(rnn_out)
                    # if output == None:
                    # output = output_g
                    # else:
                    # output = tf.concat([output,output_g],axis = 0)
                    #pdb.set_trace()
                    output.append(output_g)
            self.output.append([output])

            self.output_.append([
                tf.placeholder(tf.int32, [None, bucket - 1],
                               name='tags' + str(bucket))
            ])
            self.bucket_dit[bucket] = idx

            print 'Bucket %d, %f seconds' % (idx + 1, time() - t1)

        assert len(self.input_v1) == len(self.output)

        self.params = tf.trainable_variables()

        self.saver = tf.train.Saver()
Ejemplo n.º 32
0
                                              representationsize,
                                              filtersizeContext),
                                poolsize=(1, kmaxContext))
layers.append(cnnContext)
if "middleContext" in config:
    hidden_in = nkernsContext * kmaxContext
else:
    cnnEntities = LeNetConvPoolLayer(rng=rng,
                                     filter_shape=(nkernsEntities, 1,
                                                   representationsize,
                                                   filtersizeEntities),
                                     poolsize=(1, kmaxEntities))
    layers.append(cnnEntities)
    hidden_in = 2 * (2 * nkernsContext * kmaxContext +
                     nkernsEntities * kmaxEntities)
hiddenLayer = HiddenLayer(rng=rng, n_in=hidden_in, n_out=hiddenUnits)
layers.append(hiddenLayer)
hiddenLayerET = HiddenLayer(rng=rng,
                            n_in=2 * nkernsContext * kmaxContext +
                            nkernsEntities * kmaxEntities,
                            n_out=hiddenUnitsET)
layers.append(hiddenLayerET)
randomInit = False
if doCRF:
    randomInit = True
outputLayer = LogisticRegression(n_in=hiddenUnits,
                                 n_out=numClasses,
                                 rng=rng,
                                 randomInit=randomInit)
layers.append(outputLayer)
outputLayerET = LogisticRegression(n_in=hiddenUnitsET,