def build_model(): l_in = nn.layers.InputLayer(input_dims) conv1 = Conv2DLayer(incoming=l_in, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv1', nn.layers.get_output_shape(conv1) bn1 = BatchNormLayer(incoming=conv1, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.very_leaky_rectify) print 'bn1', nn.layers.get_output_shape(bn1) pool1 = Pool2DLayer(incoming=bn1, pool_size=(1, 4), stride=(1, 4)) print 'pool1', nn.layers.get_output_shape(pool1) drop1 = nn.layers.DropoutLayer(incoming=pool1, p=p1) print 'drop1', nn.layers.get_output_shape(drop1) conv2 = Conv2DLayer(incoming=drop1, num_filters=128, filter_size=(1, 1), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv2', nn.layers.get_output_shape(conv2) bn2 = BatchNormLayer(incoming=conv2, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.very_leaky_rectify) print 'bn2', nn.layers.get_output_shape(bn2) conv2a = Conv2DLayer(incoming=bn2, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), b=None, nonlinearity=None) print 'conv2a', nn.layers.get_output_shape(conv2a) sum2a = SumLayer(incomings=[conv2, conv2a], coeffs=1) print 'sum2a', nn.layers.get_output_shape(sum2a) bn2a = BatchNormLayer(incoming=sum2a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn2a', nn.layers.get_output_shape(bn2a) conv2b = Conv2DLayer(incoming=bn2a, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv2a.W, b=None, nonlinearity=None) print 'conv2b', nn.layers.get_output_shape(conv2b) sum2b = SumLayer(incomings=[conv2, conv2b], coeffs=1) print 'sum2b', nn.layers.get_output_shape(sum2b) bn2b = BatchNormLayer(incoming=sum2b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn2b', nn.layers.get_output_shape(bn2b) conv2c = Conv2DLayer(incoming=bn2b, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv2a.W, b=None, nonlinearity=None) print 'conv2c', nn.layers.get_output_shape(conv2c) sum2c = SumLayer(incomings=[conv2, conv2c], coeffs=1) print 'sum2c', nn.layers.get_output_shape(sum2c) bn2c = BatchNormLayer(incoming=sum2c, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn2c', nn.layers.get_output_shape(bn2c) pool2 = Pool2DLayer(incoming=bn2c, pool_size=(1, 4), stride=(1, 4)) print 'pool2', nn.layers.get_output_shape(pool2) drop2 = nn.layers.DropoutLayer(incoming=pool2, p=p2) print 'drop2', nn.layers.get_output_shape(drop2) conv3 = Conv2DLayer(incoming=drop2, num_filters=128, filter_size=(1, 1), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv3', nn.layers.get_output_shape(conv3) bn3 = BatchNormLayer(incoming=conv3, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.very_leaky_rectify) print 'bn3', nn.layers.get_output_shape(bn3) conv3a = Conv2DLayer(incoming=bn3, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), b=None, nonlinearity=None) print 'conv3a', nn.layers.get_output_shape(conv3a) sum3a = SumLayer(incomings=[conv3, conv3a], coeffs=1) print 'sum3a', nn.layers.get_output_shape(sum3a) bn3a = BatchNormLayer(incoming=sum3a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn3a', nn.layers.get_output_shape(bn3a) conv3b = Conv2DLayer(incoming=bn3a, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv3a.W, b=None, nonlinearity=None) print 'conv3b', nn.layers.get_output_shape(conv3b) sum3b = SumLayer(incomings=[conv3, conv3b], coeffs=1) print 'sum3b', nn.layers.get_output_shape(sum3b) bn3b = BatchNormLayer(incoming=sum3b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn3b', nn.layers.get_output_shape(bn3b) conv3c = Conv2DLayer(incoming=bn3b, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv3a.W, b=None, nonlinearity=None) print 'conv3c', nn.layers.get_output_shape(conv3c) sum3c = SumLayer(incomings=[conv3, conv3c], coeffs=1) print 'sum3c', nn.layers.get_output_shape(sum3c) bn3c = BatchNormLayer(incoming=sum3c, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn3c', nn.layers.get_output_shape(bn3c) pool3 = Pool2DLayer(incoming=bn3c, pool_size=(1, 4), stride=(1, 4)) print 'pool3', nn.layers.get_output_shape(pool3) drop3 = nn.layers.DropoutLayer(incoming=pool3, p=p3) print 'drop3', nn.layers.get_output_shape(drop3) conv4 = Conv2DLayer(incoming=drop3, num_filters=128, filter_size=(1, 1), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv4', nn.layers.get_output_shape(conv4) bn4 = BatchNormLayer(incoming=conv4, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.very_leaky_rectify) print 'bn4', nn.layers.get_output_shape(bn4) conv4a = Conv2DLayer(incoming=bn4, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), b=None, nonlinearity=None) print 'conv4a', nn.layers.get_output_shape(conv4a) sum4a = SumLayer(incomings=[conv4, conv4a], coeffs=1) print 'sum4a', nn.layers.get_output_shape(sum4a) bn4a = BatchNormLayer(incoming=sum4a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn4a', nn.layers.get_output_shape(bn4a) conv4b = Conv2DLayer(incoming=bn4a, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv4a.W, b=None, nonlinearity=None) print 'conv4b', nn.layers.get_output_shape(conv4b) sum4b = SumLayer(incomings=[conv4, conv4b], coeffs=1) print 'sum4b', nn.layers.get_output_shape(sum4b) bn4b = BatchNormLayer(incoming=sum4b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn4b', nn.layers.get_output_shape(bn4b) conv4c = Conv2DLayer(incoming=bn4b, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv4a.W, b=None, nonlinearity=None) print 'conv4c', nn.layers.get_output_shape(conv4c) sum4c = SumLayer(incomings=[conv4, conv4c], coeffs=1) print 'sum4c', nn.layers.get_output_shape(sum4c) bn4c = BatchNormLayer(incoming=sum4c, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn4c', nn.layers.get_output_shape(bn4c) pool4 = Pool2DLayer(incoming=bn4c, pool_size=(1, 4), stride=(1, 4)) print 'pool4', nn.layers.get_output_shape(pool4) drop4 = nn.layers.DropoutLayer(incoming=pool4, p=p4) print 'drop4', nn.layers.get_output_shape(drop4) conv5 = Conv2DLayer(incoming=drop4, num_filters=128, filter_size=(1, 1), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv5', nn.layers.get_output_shape(conv5) bn5 = BatchNormLayer(incoming=conv5, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.very_leaky_rectify) print 'bn5', nn.layers.get_output_shape(bn5) conv5a = Conv2DLayer(incoming=bn5, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), b=None, nonlinearity=None) print 'conv5a', nn.layers.get_output_shape(conv5a) sum5a = SumLayer(incomings=[conv5, conv5a], coeffs=1) print 'sum5a', nn.layers.get_output_shape(sum5a) bn5a = BatchNormLayer(incoming=sum5a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn5a', nn.layers.get_output_shape(bn5a) conv5b = Conv2DLayer(incoming=bn5a, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv5a.W, b=None, nonlinearity=None) print 'conv5b', nn.layers.get_output_shape(conv5b) sum5b = SumLayer(incomings=[conv5, conv5b], coeffs=1) print 'sum5b', nn.layers.get_output_shape(sum5b) bn5b = BatchNormLayer(incoming=sum5b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn5b', nn.layers.get_output_shape(bn5b) conv5c = Conv2DLayer(incoming=bn5b, num_filters=128, filter_size=(1, 9), stride=1, border_mode='same', W=conv5a.W, b=None, nonlinearity=None) print 'conv5c', nn.layers.get_output_shape(conv5c) sum5c = SumLayer(incomings=[conv5, conv5c], coeffs=1) print 'sum5c', nn.layers.get_output_shape(sum5c) bn5c = BatchNormLayer(incoming=sum5c, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.rectify) print 'bn5c', nn.layers.get_output_shape(bn5c) pool5 = Pool2DLayer(incoming=bn5c, pool_size=(1, 4), stride=(1, 4)) print 'pool5', nn.layers.get_output_shape(pool5) l_out = nn.layers.DenseLayer(incoming=pool5, num_units=num_events, W=nn.init.Normal(std=std), nonlinearity=nn.nonlinearities.sigmoid) print 'l_out', nn.layers.get_output_shape(l_out) return l_out
def build_model(): l_in = nn.layers.InputLayer(input_dims) conv1 = Conv2DLayer(incoming = l_in, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv1', nn.layers.get_output_shape(conv1) bn1 = BatchNormLayer(incoming = conv1, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn1', nn.layers.get_output_shape(bn1) pool1 = Pool2DLayer(incoming = bn1, pool_size = (1, 2), stride = (1, 2)) print 'pool1', nn.layers.get_output_shape(pool1) drop1 = nn.layers.DropoutLayer(incoming = pool1, p = p) print 'drop1', nn.layers.get_output_shape(drop1) conv2 = Conv2DLayer(incoming = drop1, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv2', nn.layers.get_output_shape(conv2) bn2 = BatchNormLayer(incoming = conv2, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn2', nn.layers.get_output_shape(bn2) pool2 = Pool2DLayer(incoming = bn2, pool_size = (1, 2), stride = (1, 2)) print 'pool2', nn.layers.get_output_shape(pool2) drop2 = nn.layers.DropoutLayer(incoming = pool2, p = p) print 'drop2', nn.layers.get_output_shape(drop2) conv3 = Conv2DLayer(incoming = drop2, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv3', nn.layers.get_output_shape(conv3) bn3 = BatchNormLayer(incoming = conv3, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn3', nn.layers.get_output_shape(bn3) pool3 = Pool2DLayer(incoming = bn3, pool_size = (1, 2), stride = (1, 2)) print 'pool3', nn.layers.get_output_shape(pool3) drop3 = nn.layers.DropoutLayer(incoming = pool3, p = p) print 'drop3', nn.layers.get_output_shape(drop3) conv4 = Conv2DLayer(incoming = drop3, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv4', nn.layers.get_output_shape(conv4) bn4 = BatchNormLayer(incoming = conv4, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn4', nn.layers.get_output_shape(bn4) pool4 = Pool2DLayer(incoming = bn4, pool_size = (1, 2), stride = (1, 2)) print 'pool4', nn.layers.get_output_shape(pool4) drop4 = nn.layers.DropoutLayer(incoming = pool4, p = p) print 'drop4', nn.layers.get_output_shape(drop4) conv5 = Conv2DLayer(incoming = drop4, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv5', nn.layers.get_output_shape(conv5) bn5 = BatchNormLayer(incoming = conv5, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn5', nn.layers.get_output_shape(bn5) pool5 = Pool2DLayer(incoming = bn5, pool_size = (1, 2), stride = (1, 2)) print 'pool5', nn.layers.get_output_shape(pool5) drop5 = nn.layers.DropoutLayer(incoming = pool5, p = p) print 'drop5', nn.layers.get_output_shape(drop5) conv6 = Conv2DLayer(incoming = drop5, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv6', nn.layers.get_output_shape(conv6) bn6 = BatchNormLayer(incoming = conv6, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn6', nn.layers.get_output_shape(bn6) pool6 = Pool2DLayer(incoming = bn6, pool_size = (1, 2), stride = (1, 2)) print 'pool6', nn.layers.get_output_shape(pool6) drop6 = nn.layers.DropoutLayer(incoming = pool6, p = p) print 'drop6', nn.layers.get_output_shape(drop6) conv7 = Conv2DLayer(incoming = drop6, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv7', nn.layers.get_output_shape(conv7) bn7 = BatchNormLayer(incoming = conv7, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn7', nn.layers.get_output_shape(bn7) pool7 = Pool2DLayer(incoming = bn7, pool_size = (1, 2), stride = (1, 2)) print 'pool7', nn.layers.get_output_shape(pool7) drop7 = nn.layers.DropoutLayer(incoming = pool7, p = p) print 'drop7', nn.layers.get_output_shape(drop7) conv8 = Conv2DLayer(incoming = drop7, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv8', nn.layers.get_output_shape(conv8) bn8 = BatchNormLayer(incoming = conv8, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn8', nn.layers.get_output_shape(bn8) pool8 = Pool2DLayer(incoming = bn8, pool_size = (1, 2), stride = (1, 2)) print 'pool8', nn.layers.get_output_shape(pool8) drop8 = nn.layers.DropoutLayer(incoming = pool8, p = p) print 'drop8', nn.layers.get_output_shape(drop8) conv9 = Conv2DLayer(incoming = drop8, num_filters = 64, filter_size = (1, 9), stride = 1, border_mode = 'same', W = nn.init.Normal(std = std), nonlinearity = None) print 'conv9', nn.layers.get_output_shape(conv9) bn9 = BatchNormLayer(incoming = conv9, epsilon = 0.0000000001, nonlinearity = nn.nonlinearities.leaky_rectify) print 'bn9', nn.layers.get_output_shape(bn9) pool9 = Pool2DLayer(incoming = bn9, pool_size = (1, 2), stride = (1, 2)) print 'pool9', nn.layers.get_output_shape(pool9) l_out = nn.layers.DenseLayer(incoming = pool9, num_units = num_events, W = nn.init.Normal(std = std), nonlinearity = nn.nonlinearities.sigmoid) print 'l_out', nn.layers.get_output_shape(l_out) return l_out
def build_model(): l_in = nn.layers.InputLayer(input_dims) pool0 = Pool2DLayer(incoming=l_in, pool_size=(1, 8), stride=(1, 8), mode='average') print 'pool0', nn.layers.get_output_shape(pool0) conv1 = Conv2DLayer(incoming=l_in, num_filters=8, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv1', nn.layers.get_output_shape(conv1) bn1 = BatchNormLayer(incoming=conv1, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn1', nn.layers.get_output_shape(bn1) pool1 = Pool2DLayer(incoming=bn1, pool_size=(1, 2), stride=(1, 2)) print 'pool1', nn.layers.get_output_shape(pool1) drop1 = nn.layers.DropoutLayer(incoming=pool1, p=p) print 'drop1', nn.layers.get_output_shape(drop1) conv2 = Conv2DLayer(incoming=drop1, num_filters=16, filter_size=(1, 9), stride=1, border_mode='same', W=nn.init.Normal(std=std), nonlinearity=None) print 'conv2', nn.layers.get_output_shape(conv2) bn2 = BatchNormLayer(incoming=conv2, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn2', nn.layers.get_output_shape(bn2) pool2 = Pool2DLayer(incoming=bn2, pool_size=(1, 2), stride=(1, 2)) print 'pool2', nn.layers.get_output_shape(pool2) drop2 = nn.layers.DropoutLayer(incoming=pool2, p=p) print 'drop2', nn.layers.get_output_shape(drop2) fc3 = nn.layers.DenseLayer(incoming=drop2, num_units=1024, W=nn.init.Normal(std=std), nonlinearity=None) bn3 = BatchNormLayer(incoming=fc3, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn3', nn.layers.get_output_shape(bn3) drop3 = nn.layers.DropoutLayer(incoming=bn3, p=p) fc4 = nn.layers.DenseLayer(incoming=drop3, num_units=1024, W=nn.init.Normal(std=std), nonlinearity=None) bn4 = BatchNormLayer(incoming=fc4, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn4', nn.layers.get_output_shape(bn4) drop4 = nn.layers.DropoutLayer(incoming=bn4, p=p) fc5 = nn.layers.DenseLayer(incoming=drop4, num_units=1024, W=nn.init.Normal(std=std), nonlinearity=None) bn5 = BatchNormLayer(incoming=fc5, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn5', nn.layers.get_output_shape(bn5) drop5 = nn.layers.DropoutLayer(incoming=bn5, p=p) l_out = nn.layers.DenseLayer(incoming=drop5, num_units=num_events, W=nn.init.Normal(std=std), nonlinearity=nn.nonlinearities.sigmoid) print 'l_out', nn.layers.get_output_shape(l_out) return l_out
def build_single_scale(l_in, param=None): print if param is None: param = {} conv1 = Conv2DLayer( incoming=l_in, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv1_w'] if param.has_key('conv1_w') else nn.init.Normal( std=std), b=param['conv1_b'] if param.has_key('conv1_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv1', nn.layers.get_output_shape(conv1) bn1 = BatchNormLayer(incoming=conv1, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn1', nn.layers.get_output_shape(bn1) pool1 = Pool2DLayer(incoming=bn1, pool_size=(1, 4), stride=(1, 4)) print 'pool1', nn.layers.get_output_shape(pool1) drop1 = nn.layers.DropoutLayer(incoming=pool1, p=p1) print 'drop1', nn.layers.get_output_shape(drop1) conv2 = Conv2DLayer( incoming=drop1, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv2_w'] if param.has_key('conv2_w') else nn.init.Normal( std=std), b=param['conv2_b'] if param.has_key('conv2_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv2', nn.layers.get_output_shape(conv2) bn2 = BatchNormLayer(incoming=conv2, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn2', nn.layers.get_output_shape(bn2) conv2a = Conv2DLayer( incoming=bn2, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv2a_w'] if param.has_key('conv2a_w') else nn.init.Normal( std=std), b=param['conv2a_b'] if param.has_key('conv2a_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv2a', nn.layers.get_output_shape(conv2a) bn2a = BatchNormLayer(incoming=conv2a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn2a', nn.layers.get_output_shape(bn2a) pool2 = Pool2DLayer(incoming=bn2a, pool_size=(1, 4), stride=(1, 4)) print 'pool2', nn.layers.get_output_shape(pool2) drop2 = nn.layers.DropoutLayer(incoming=pool2, p=p2) print 'drop2', nn.layers.get_output_shape(drop2) conv3 = Conv2DLayer( incoming=drop2, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv3_w'] if param.has_key('conv3_w') else nn.init.Normal( std=std), b=param['conv3_b'] if param.has_key('conv3_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv3', nn.layers.get_output_shape(conv3) bn3 = BatchNormLayer(incoming=conv3, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn3', nn.layers.get_output_shape(bn3) conv3a = Conv2DLayer( incoming=bn3, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv3a_w'] if param.has_key('conv3a_w') else nn.init.Normal( std=std), b=param['conv3a_b'] if param.has_key('conv3a_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv3a', nn.layers.get_output_shape(conv3a) bn3a = BatchNormLayer(incoming=conv3a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn3a', nn.layers.get_output_shape(bn3a) conv3b = Conv2DLayer( incoming=bn3a, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv3b_w'] if param.has_key('conv3b_w') else nn.init.Normal( std=std), b=param['conv3b_b'] if param.has_key('conv3b_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv3b', nn.layers.get_output_shape(conv3b) bn3b = BatchNormLayer(incoming=conv3b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn3b', nn.layers.get_output_shape(bn3b) pool3 = Pool2DLayer(incoming=bn3b, pool_size=(1, 2), stride=(1, 2)) print 'pool3', nn.layers.get_output_shape(pool3) drop3 = nn.layers.DropoutLayer(incoming=pool3, p=p3) print 'drop3', nn.layers.get_output_shape(drop3) conv4 = Conv2DLayer( incoming=drop3, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv4_w'] if param.has_key('conv4_w') else nn.init.Normal( std=std), b=param['conv4_b'] if param.has_key('conv4_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv4', nn.layers.get_output_shape(conv4) bn4 = BatchNormLayer(incoming=conv4, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn4', nn.layers.get_output_shape(bn4) conv4a = Conv2DLayer( incoming=bn4, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv4a_w'] if param.has_key('conv4a_w') else nn.init.Normal( std=std), b=param['conv4a_b'] if param.has_key('conv4a_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv4a', nn.layers.get_output_shape(conv4a) bn4a = BatchNormLayer(incoming=conv4a, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn4a', nn.layers.get_output_shape(bn4a) conv4b = Conv2DLayer( incoming=bn4a, num_filters=64, filter_size=(1, 7), stride=1, border_mode='same', W=param['conv4b_w'] if param.has_key('conv4b_w') else nn.init.Normal( std=std), b=param['conv4b_b'] if param.has_key('conv4b_b') else nn.init.Constant(0.), nonlinearity=None) print 'conv4b', nn.layers.get_output_shape(conv4b) bn4b = BatchNormLayer(incoming=conv4b, epsilon=0.0000000001, nonlinearity=nn.nonlinearities.leaky_rectify) print 'bn4b', nn.layers.get_output_shape(bn4b) pool4 = Pool2DLayer(incoming=bn4b, pool_size=(1, 2), stride=(1, 2)) print 'pool4', nn.layers.get_output_shape(pool4) drop4 = nn.layers.DropoutLayer(incoming=pool4, p=p4) print 'drop4', nn.layers.get_output_shape(drop4) print if not param: param['conv1_w'] = conv1.W param['conv2_w'] = conv2.W param['conv2a_w'] = conv2a.W param['conv3_w'] = conv3.W param['conv3a_w'] = conv3a.W param['conv3b_w'] = conv3b.W param['conv4_w'] = conv4.W param['conv4a_w'] = conv4a.W param['conv4b_w'] = conv4b.W param['conv1_b'] = conv1.b param['conv2_b'] = conv2.b param['conv2a_b'] = conv2a.b param['conv3_b'] = conv3.b param['conv3a_b'] = conv3a.b param['conv3b_b'] = conv3b.b param['conv3_b'] = conv3.b param['conv3a_b'] = conv3a.b param['conv3b_b'] = conv3b.b return drop4, param else: return drop4
def __init__(self, nc, nf, kwargs): assert nf assert nc self.kwargs = extract_rnn_params(kwargs) for pname in RDNN.param_names: setattr(self, pname, kwargs[pname]) self.lr = theano.shared(np.array(self.lr, dtype='float32'), allow_downcast=True) self.gclip = False if self.gclip == 0 else self.gclip # mysteriously, we need this line self.activation = [self.activation] * len(self.n_hidden) self.deep_ltypes = [ act_str.split('-')[1] for act_str in self.activation ] self.opt = getattr(lasagne.updates, self.opt) ldepth = len(self.n_hidden) # network default_gate = lambda: lasagne.layers.Gate( W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform()) forget_gate = lambda: lasagne.layers.Gate( W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(self.fbias)) """default_gate = lambda : lasagne.layers.Gate(W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal()) forget_gate = lambda : lasagne.layers.Gate(W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), b=lasagne.init.Constant(self.fbias))""" l_in = lasagne.layers.InputLayer(shape=(None, None, nf)) logging.debug('l_in: {}'.format(lasagne.layers.get_output_shape(l_in))) N_BATCH_VAR, MAX_SEQ_LEN_VAR, _ = l_in.input_var.shape # symbolic ref to input_var shape # l_mask = lasagne.layers.InputLayer(shape=(N_BATCH_VAR, MAX_SEQ_LEN_VAR)) l_mask = lasagne.layers.InputLayer(shape=(None, None)) logging.debug('l_mask: {}'.format( lasagne.layers.get_output_shape(l_mask))) curlayer = l_in if self.emb: l_reshape = lasagne.layers.ReshapeLayer(l_in, (-1, nf)) logging.debug('l_reshape: {}'.format( lasagne.layers.get_output_shape(l_reshape))) l_emb = lasagne.layers.DenseLayer(l_reshape, num_units=self.emb, nonlinearity=None, b=None) logging.debug('l_emb: {}'.format( lasagne.layers.get_output_shape(l_emb))) l_emb = lasagne.layers.ReshapeLayer( l_emb, (N_BATCH_VAR, MAX_SEQ_LEN_VAR, self.emb)) logging.debug('l_emb: {}'.format( lasagne.layers.get_output_shape(l_emb))) curlayer = l_emb if self.drates[0] > 0: l_in_drop = lasagne.layers.DropoutLayer(curlayer, p=self.drates[0]) logging.debug('l_drop: {}'.format( lasagne.layers.get_output_shape(l_in_drop))) curlayer = l_in_drop self.layers = [curlayer] self.blayers = [] for level, ltype, n_hidden in zip(range(1, ldepth + 1), self.deep_ltypes, self.n_hidden): prev_layer = self.layers[level - 1] if ltype in ['relu', 'lrelu', 'relu6', 'elu']: LayerType = lasagne.layers.RecurrentLayer if ltype == 'relu': nonlin = lasagne.nonlinearities.rectify elif ltype == 'lrelu': nonlin = lasagne.nonlinearities.leaky_rectify elif ltype == 'relu6': nonlin = lambda x: T.min(lasagne.nonlinearities.rectify(x), 6) elif ltype == 'elu': nonlin = lambda x: T.switch(x >= 0, x, T.exp(x) - 1) l_forward = LayerType( prev_layer, n_hidden, mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate, W_hid_to_hid=Identity(), W_in_to_hid=lasagne.init.GlorotUniform(gain='relu'), nonlinearity=nonlin) l_backward = LayerType( prev_layer, n_hidden, mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate, W_hid_to_hid=Identity(), W_in_to_hid=lasagne.init.GlorotUniform(gain='relu'), nonlinearity=nonlin, backwards=True) elif ltype == 'lstm': LayerType = lasagne.layers.LSTMLayer l_forward = LayerType(prev_layer, n_hidden, ingate=default_gate(), forgetgate=forget_gate(), outgate=default_gate(), mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate) l_backward = LayerType(prev_layer, n_hidden, ingate=default_gate(), forgetgate=forget_gate(), outgate=default_gate(), mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate, backwards=True) elif ltype == 'gru': LayerType = lasagne.layers.GRULayer l_forward = LayerType(prev_layer, n_hidden, mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate) l_backward = LayerType(prev_layer, n_hidden, mask_input=l_mask, grad_clipping=self.gclip, gradient_steps=self.truncate, backwards=True) logging.debug('l_forward: {}'.format( lasagne.layers.get_output_shape(l_forward))) logging.debug('l_backward: {}'.format( lasagne.layers.get_output_shape(l_backward))) if self.fbmerge == 'concat': l_fbmerge = lasagne.layers.ConcatLayer([l_forward, l_backward], axis=2) elif self.fbmerge == 'sum': l_fbmerge = lasagne.layers.ElemwiseSumLayer( [l_forward, l_backward]) logging.debug('l_fbmerge: {}'.format( lasagne.layers.get_output_shape(l_fbmerge))) if self.batch_norm: logging.info('using batch norm') l_fbmerge = BatchNormLayer(l_fbmerge, axes=(0, 1)) if self.drates[level] > 0: l_fbmerge = lasagne.layers.DropoutLayer(l_fbmerge, p=self.drates[level]) self.blayers.append((l_forward, l_backward)) self.layers.append(l_fbmerge) l_fbmerge = lasagne.layers.ConcatLayer( [l_fbmerge, curlayer], axis=2) if self.in2out else l_fbmerge if self.recout == 1: logging.info('using recout:%d.' % self.recout) l_out = lasagne.layers.RecurrentLayer( l_fbmerge, num_units=nc, mask_input=l_mask, W_hid_to_hid=Identity(), W_in_to_hid=lasagne.init.GlorotUniform(), nonlinearity=log_softmax) # W_in_to_hid=lasagne.init.GlorotUniform(), nonlinearity=lasagne.nonlinearities.softmax) CHANGED logging.debug('l_out: {}'.format( lasagne.layers.get_output_shape(l_out))) elif self.recout == 2: logging.info('using recout:%d.' % self.recout) l_fout = lasagne.layers.RecurrentLayer( l_fbmerge, num_units=nc, mask_input=l_mask, W_hid_to_hid=Identity(), W_in_to_hid=lasagne.init.GlorotUniform(), nonlinearity=log_softmax) l_bout = lasagne.layers.RecurrentLayer( l_fbmerge, num_units=nc, mask_input=l_mask, W_hid_to_hid=Identity(), W_in_to_hid=lasagne.init.GlorotUniform(), nonlinearity=log_softmax, backwards=True) l_out = lasagne.layers.ElemwiseSumLayer([l_fout, l_bout], coeffs=0.5) # l_out = LogSoftMerge([l_fout, l_bout]) logging.debug('l_out: {}'.format( lasagne.layers.get_output_shape(l_out))) else: l_reshape = lasagne.layers.ReshapeLayer( l_fbmerge, (-1, self.n_hidden[-1] * (2 if self.fbmerge == 'concat' else 1))) logging.debug('l_reshape: {}'.format( lasagne.layers.get_output_shape(l_reshape))) l_rec_out = lasagne.layers.DenseLayer(l_reshape, num_units=nc, nonlinearity=log_softmax) logging.debug('l_rec_out: {}'.format( lasagne.layers.get_output_shape(l_rec_out))) l_out = lasagne.layers.ReshapeLayer( l_rec_out, (N_BATCH_VAR, MAX_SEQ_LEN_VAR, nc)) logging.debug('l_out: {}'.format( lasagne.layers.get_output_shape(l_out))) self.l_soft_out = l_rec_out self.output_layer = l_out target_output = T.tensor3('target_output') out_mask = T.tensor3('mask') """ def cost(output): return -T.sum(out_mask*target_output*T.log(output))/T.sum(out_mask) """ def cost(output): # expects log softmax output return -T.sum(out_mask * target_output * output) / T.sum(out_mask) cost_train = cost(lasagne.layers.get_output(l_out, deterministic=False)) cost_eval = cost(lasagne.layers.get_output(l_out, deterministic=True)) all_params = lasagne.layers.get_all_params(l_out, trainable=True) logging.debug(all_params) f_hid2hid = l_forward.get_params()[-1] b_hid2hid = l_backward.get_params()[-1] self.recout_hid2hid = lambda: l_out.get_params( ) if self.recout == 0 else lambda: l_out.get_params()[-1].get_value() grads = T.grad(cost_train, all_params) all_grads, total_norm = lasagne.updates.total_norm_constraint( grads, self.norm, return_norm=True) #all_grads.append(grads[-2]) #all_grads.append(grads[-1]) all_grads = [ T.switch(T.or_(T.isnan(total_norm), T.isinf(total_norm)), p * 0.01, g) for g, p in zip(all_grads, all_params) ] if self.gnoise: from theano.tensor.shared_randomstreams import RandomStreams srng = RandomStreams(seed=1234) e_prev = theano.shared(lasagne.utils.floatX(0.)) nu = 0.01 gamma = 0.55 gs = [ g + srng.normal(T.shape(g), std=(nu / ((1 + e_prev)**gamma))) for g in all_grads ] updates = self.opt(gs, all_params, self.lr, self.eps) updates[e_prev] = e_prev + 1 else: updates = self.opt(all_grads, all_params, self.lr, self.eps) logging.info("Compiling functions...") self.train_model = theano.function( inputs=[l_in.input_var, target_output, l_mask.input_var, out_mask], outputs=cost_train, updates=updates, allow_input_downcast=True) self.predict_model = theano.function( inputs=[l_in.input_var, target_output, l_mask.input_var, out_mask], outputs=[ cost_eval, lasagne.layers.get_output(l_out, deterministic=True) ]) # aux self.train_model_debug = theano.function( inputs=[l_in.input_var, target_output, l_mask.input_var, out_mask], outputs=[cost_train] + lasagne.layers.get_output([l_out, l_fbmerge], deterministic=True) + [total_norm], updates=updates) self.compute_cost = theano.function( [l_in.input_var, target_output, l_mask.input_var, out_mask], cost_eval) self.compute_cost_train = theano.function( [l_in.input_var, target_output, l_mask.input_var, out_mask], cost_train) # self.info_model = theano.function([],recout_hid2hid) logging.info("Compiling done.")