def vgg_train(weights=None): print "Compiling VGG Model..." model = Sequential() # input: 64x64 images with 3 channels -> (3, 64, 64) tensors. # this applies 32 convolution filters of size 3x3 each. model.add( Convolution2D(32, 3, 3, border_mode='valid', input_shape=(3, 64, 64))) model.add(Activation('relu')) model.add(Convolution2D(32, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(64, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.50)) model.add(Flatten()) # Note: Keras does automatic shape inference. model.add(Dense(256, W_regularizer=WeightRegularizer(l1=1e-6, l2=1e-6))) model.add(Activation('relu')) model.add(Dropout(0.50)) model.add(Dense(200, W_regularizer=WeightRegularizer(l1=1e-5, l2=1e-5))) model.add(Activation('softmax')) if weights != None: model.load_weights(weights) return model
def VGG_16(weights_path=None): #pretrained_weights=get_weight_dict_from_path(weights_path) model = Sequential() model.add(ZeroPadding2D((1,1),input_shape=(3,64,64))) model.add(Convolution2D(64, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_1'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(64, 3, 3, activation='relu',))#weights=pretrained_weights['layer_3'].values())) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_6'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_8'].values())) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_11'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_13'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(256, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_15'].values())) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_18'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_20'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu'))#,weights=pretrained_weights['layer_22'].values())) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu',W_regularizer=WeightRegularizer(l1=1e-7,l2=1e-7),activity_regularizer=ActivityRegularizer(l1=1e-7,l2=1e-7)))#,weights=pretrained_weights['layer_25'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu',W_regularizer=WeightRegularizer(l1=1e-6,l2=1e-6),activity_regularizer=ActivityRegularizer(l1=1e-6,l2=1e-6)))#,weights=pretrained_weights['layer_27'].values())) model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(512, 3, 3, activation='relu',W_regularizer=WeightRegularizer(l1=1e-5,l2=1e-5),activity_regularizer=ActivityRegularizer(l1=1e-5,l2=1e-5)))#,weights=pretrained_weights['layer_29'].values())) model.add(MaxPooling2D((2,2), strides=(2,2))) model.add(Flatten()) model.add(Dense(4096, activation='relu',W_regularizer=WeightRegularizer(l1=1e-4,l2=1e-4),activity_regularizer=ActivityRegularizer(l1=1e-5,l2=1e-5))) model.add(Dropout(0.75)) model.add(Dense(4096, activation='relu',W_regularizer=WeightRegularizer(l1=1e-4,l2=1e-4),activity_regularizer=ActivityRegularizer(l1=1e-5,l2=1e-5))) model.add(Dropout(0.75)) #model.add(Dense(200, activation='softmax')) model.add(Dense(200)) model.add(Activation('softmax')) if weights_path: model.load_weights(weights_path) #model.layers[-1]=Dense(200,activation="softmax") #replace with our layer return model
def new_model(self, fresh = False, compile = True): self.model = Sequential() drop_cl = core.Dropout if self.l1 != 0 or self.l2 != 0: regularizer = WeightRegularizer(l1=self.l1, l2=self.l2) else: regularizer = None if self.enc_decs and not fresh: for (i,enc) in enumerate(ae.layers[0].encoder for ae in self.enc_decs): if self.drop_rate != 0: self.model.add(drop_cl(self.drop_rate, input_shape=(self.layer_sizes[i],))) if self.sigma_base != 0: self.model.add(noise.GaussianNoise(self.sigma_base*(self.sigma_fact**-i))) self.model.add(enc) else: for (i,(n_in, n_out)) in enumerate(zip(self.layer_sizes[:-1], self.layer_sizes[1:])): if self.drop_rate != 0: self.model.add(drop_cl(self.drop_rate, input_shape=(n_in,))) if self.sigma_base != 0: self.model.add(noise.GaussianNoise(self.sigma_base*(self.sigma_fact**-i))) self.model.add(core.Dense(input_dim=n_in, output_dim=n_out, activation='sigmoid', W_regularizer=regularizer)) #TODO ? self.model.add(core.Dense(input_dim=self.layer_sizes[-1] ,output_dim=len(phase_names) ,activation='softmax' ,W_regularizer=regularizer)) if compile: self.model.compile(loss=self.cls_lss, optimizer=self.cls_opt)
def model(hidden_dim=512, input_dim=28 * 28, sigma_regularization=1e-3, mu_regularization=1e-5, k=10, activation=lambda x: K.relu(x, 1.0 / 5.5)): """Create two layer MLP with softmax output""" _x = Input(shape=(input_dim, )) layer = lambda output_dim, activation: BayesianDense( output_dim, activation=activation, W_sigma_regularizer=VariationalRegularizer(weight=sigma_regularization ), b_sigma_regularizer=VariationalRegularizer(weight=sigma_regularization ), W_regularizer=WeightRegularizer(l1=mu_regularization)) h1 = layer(hidden_dim, activation) h2 = layer(hidden_dim, activation) y = layer(k, 'softmax') _y = y(h2(h1(_x))) m = Model(_x, _y) m.compile(Adam(1e-3), loss='categorical_crossentropy') return m
def new_model(self, fresh=False, compile=True, use_dropout=None, use_noise=None): self.model = Sequential() if not use_dropout is None: self.mod_use_drop = self.drop_rate > 0 and use_dropout if not use_noise is None: self.mod_use_noise = self.sigma_base > 0 and use_noise drop_cl = core.Dropout if self.l1 != 0 or self.l2 != 0: regularizer = WeightRegularizer(l1=self.l1, l2=self.l2) else: regularizer = None if self.enc_decs and not fresh: # The encoder may already have a noise and/or drop layer! # But we don't know what kind so replace them # the if len... stuff only works if dropout isn't used for encdecs without gaussian for (i, enc) in enumerate( layers[0 if len(layers) == 1 else 1] for ae in self.enc_decs for layers in (ae.layers[0].encoder.layers, )): #if self.sigma_base != 0: # Always add this even if 0 because the encdec might have had one self.model.add( noise.GaussianNoise(self.sigma_base * (self.sigma_fact**-i), input_shape=(self.layer_sizes[i], ))) self.model.add(enc) if self.drop_rate > 0 and self.mod_use_drop: self.model.add(drop_cl(self.drop_rate)) else: for (i, (n_in, n_out)) in enumerate( zip(self.layer_sizes[:-1], self.layer_sizes[1:])): if self.sigma_base > 0 and self.mod_use_noise: self.model.add( noise.GaussianNoise( self.sigma_base * (self.sigma_fact**-i), input_shape=(self.layer_sizes[i], ))) self.model.add( core.Dense(input_dim=n_in, output_dim=n_out, activation='sigmoid', W_regularizer=regularizer)) if self.drop_rate > 0 and self.mod_use_drop: self.model.add( drop_cl(self.drop_rate, input_shape=(n_in, ))) #TODO ? self.model.add( core.Dense(input_dim=self.layer_sizes[-1], output_dim=len(phase_names), activation='softmax', W_regularizer=regularizer)) if compile: self.model.compile(loss=self.cls_lss, optimizer=self.cls_opt)
def make_net(num_classes): model = Sequential() model.add(ZeroPadding2D((2,2), input_shape = (64,64,3), dim_ordering = 'tf')) model.add(Convolution2D(64,5,5, subsample = (2,2),\ W_regularizer = WeightRegularizer(l1=1e-1,l2=1e-1),dim_ordering = 'tf')) model.add(BatchNormalization(axis=1)) model.add(Activation('relu')) model.add(ZeroPadding2D((1,1), dim_ordering = 'tf')) model.add(Convolution2D(64,3,3, subsample = (1,1),\ W_regularizer = WeightRegularizer(l1=1e-1,l2=1e-1), dim_ordering = 'tf')) model.add(BatchNormalization(axis=1)) model.add(Activation('relu')) model.add(ZeroPadding2D((1,1), dim_ordering = 'tf')) model.add(Convolution2D(128,3,3, subsample = (2,2), dim_ordering = 'tf')) model.add(BatchNormalization(axis=1)) model.add(Activation('relu')) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512, W_regularizer = WeightRegularizer(l1=1e-2,l2=1e-2))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes)) model.add(Activation('softmax')) # model.add(Convolution2D(32, 3, 3, border_mode='same',input_shape=(64,64,3), dim_ordering='tf')) # # # model.add(Flatten()) # model.add(Dense(num_classes)) # model.add(Activation('softmax')) optim = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optim, loss='categorical_crossentropy', metrics=['accuracy']) return model
def make_models(ins: int, outs: int): models = [] for ii in range(outs): model = Sequential() model.add( Dense(8, input_dim=ins, activation="relu", b_regularizer=WeightRegularizer(l2=1))) model.add(Dropout(.5)) model.add( Dense(1, W_regularizer=WeightRegularizer(l1=0.001, l2=0.001), b_regularizer=WeightRegularizer(l2=1))) model.compile(loss='mse', optimizer=Adam(lr=0.00001), metrics=['accuracy']) models.append(model) return models
def build_net(inshape, outshape): network = Sequential([ Dense(input_dim=inshape[0], output_dim=120, activation="tanh", W_regularizer=WeightRegularizer(l2=0.0)), Dense(output_dim=outshape[0], activation="softmax") ]) network.compile(SGD(lr=0.01, momentum=0.9), loss="categorical_crossentropy", metrics=["acc"]) return network
def quality_assessment(nf, l2=0, input_size=512, n_blocks=4, lr=2e-4, pooling='SWAP', pooling_wreg=1, pooling_breg=1e-1): """EyeQual implementation.""" out_size = get_out_size(input_size, n_blocks) img, xi = micnn(nf, input_size=input_size, n_blocks=n_blocks) quality_map = Convolution(1, k=1, s=1, activation='sigmoid')(xi) if pooling == 'SWAP': out = Flatten()(quality_map) out = SWAP(1, activation='sigmoid', W_regularizer=WeightRegularizer(l2=pooling_wreg), init='one', b_regularizer=WeightRegularizer(l2=pooling_breg), name='pool')(out) elif pooling == 'WAP': out = WeightedAveragePooling((1, 1, out_size, out_size), name='pool')(quality_map) elif pooling == 'AP': out = AveragePooling2D((out_size, out_size))(quality_map) out = Flatten()(out) elif pooling == 'MP': out = MaxPooling2D((out_size, out_size))(quality_map) out = Flatten()(out) quality_model = Model(img, quality_map) model = Model(img, out) opt = Adam(lr=lr) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) return model, quality_model
def vgg_like(weights=None): ''' VGG like model with two convolution blocks and two dense layers no pretraining ''' # Image dimensions ordering should follow the Theano convention if keras.backend.image_dim_ordering() != 'th': keras.backend.set_image_dim_ordering('th') model = Sequential() # input: 64x64 images with 3 channels -> (3, 64, 64) tensors. # this applies 32 convolution filters of size 3x3 each. model.add( Convolution2D(64, 3, 3, border_mode='valid', input_shape=(3, 64, 64))) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Dropout(0.25)) model.add(Convolution2D(64, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Dropout(0.50)) model.add(Flatten()) # Note: Keras does automatic shape inference. model.add(Dense(256, W_regularizer=WeightRegularizer(l1=1e-6, l2=1e-6))) model.add(Activation('relu')) #model.add(Dropout(0.50)) model.add(Dense(200, W_regularizer=WeightRegularizer(l1=1e-5, l2=1e-5))) model.add(Activation('softmax')) if weights != None: model.load_weights(weights) return model
def new_encdecs(self, compile=True, use_dropout=None, use_noise=None): self.enc_decs = [] if not use_dropout is None: self.enc_use_drop = self.drop_rate > 0 and use_dropout if not use_noise is None: self.enc_use_noise = self.sigma_base > 0 and use_noise if self.l1 != 0 or self.l2 != 0: regularizer = WeightRegularizer(l1=self.l1, l2=self.l2) else: regularizer = None for (i, (n_in, n_out)) in enumerate( zip(self.layer_sizes[:-1], self.layer_sizes[1:])): ae = Sequential() enc_l = [] if self.enc_use_noise: enc_l.append( noise.GaussianNoise(self.sigma_base * (self.sigma_fact**-i), input_shape=(n_in, ))) enc_l.append( core.Dense(input_dim=n_in, output_dim=n_out, activation='sigmoid', W_regularizer=regularizer)) if self.enc_use_drop: enc_l.append(core.Dropout(self.drop_rate)) enc = containers.Sequential(enc_l) dec = containers.Sequential([ core.Dense(input_dim=n_out, output_dim=n_in, activation='sigmoid') ]) ae.add( core.AutoEncoder(encoder=enc, decoder=dec, output_reconstruction=True)) if compile: ae.compile(loss='mse', optimizer=self.enc_opt) self.enc_decs.append(ae)
def _build_nn(net_container, n_features): model = Sequential() # Change scale from (-1, 1) to (0, 1) model.add( Lambda(lambda x: (x + 1) / 2, input_shape=(n_features, ), output_shape=(n_features, ))) if net_container.weight_decay > 0.0: weight_regularizer = WeightRegularizer(net_container.weight_decay) else: weight_regularizer = None last_dim = n_features for lidx, n_nodes in enumerate(net_container.hidden_layers): # Layer, activation, and dropout, in that order. model.add( Dense(output_dim=n_nodes, input_dim=last_dim, W_regularizer=weight_regularizer)) model.add(Activation('sigmoid')) if net_container.dropout_prob > 0.0: model.add(Dropout(net_container.dropout_prob)) last_dim = n_nodes model.add(Dense(output_dim=1, input_dim=last_dim, bias=False)) model.add(Activation('linear')) if not net_container.learning_rate is None: optimizer = Trainer(lr=net_container.learning_rate) else: #optimizer = Trainer(lr=0.0001) optimizer = Trainer() model.compile(optimizer=optimizer, loss='mean_squared_error') net_container.model = model
def _reset(self): reg = WeightRegularizer() # a hack to make regularization variable reg.l1 = K.variable(0.0) reg.l2 = K.variable(0.0) data, nb_classes = self.data_mix() X, Y, Xv, Yv = data # input square image dimensions img_rows, img_cols = X.shape[-1], X.shape[-1] img_channels = X.shape[1] # save number of classes and instances self.nb_classes = nb_classes self.nb_inst = len(X) # convert class vectors to binary class matrices Y = np_utils.to_categorical(Y, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes) # here definition of the model happens model = Sequential() # double true for icnreased probability of conv layers if random.choice([True, True, False]): # Choose convolution #1 self.convAsz = random.choice([32, 64, 128]) model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols), W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(Convolution2D(self.convAsz, 3, 3, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) # Choose convolution size B (if needed) self.convBsz = random.choice([0, 32, 64]) if self.convBsz > 0: model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same', W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(Convolution2D(self.convBsz, 3, 3, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) else: model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) self.convAsz = 0 self.convBsz = 0 # choose fully connected layer size self.densesz = random.choice([256, 512, 762]) model.add(Dense(self.densesz, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) X = X.astype('float32') Xv = Xv.astype('float32') X /= 255 Xv /= 255 self.data = (X, Y, Xv, Yv) self.model = model self.sgd = sgd # initial accuracy values self.best_val = 0.0 self.previous_acc = 0.0 self.reg = reg self.epoch_idx = 0 return self._get_obs()
def pretrained(weights_path, freezeAndStack): model = Sequential() pretrained_weights = get_weight_dict_from_path(weights_path) #conv-spatial batch norm - relu #1 model.add(ZeroPadding2D((2, 2), input_shape=(3, 64, 64))) model.add( Convolution2D( 64, 5, 5, subsample=(2, 2), weights=[pretrained_weights['W1'], pretrained_weights['b1']], W_regularizer=WeightRegularizer(l1=1e-7, l2=1e-7))) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma1'], pretrained_weights['beta1'], pretrained_weights['running_mean1'], pretrained_weights['running_var1'] ])) model.add(Activation('relu')) print "added conv1" #conv-spatial batch norm - relu #2 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 64, 3, 3, subsample=(1, 1), weights=[pretrained_weights['W2'], pretrained_weights['b2']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma2'], pretrained_weights['beta2'], pretrained_weights['running_mean2'], pretrained_weights['running_var2'] ])) model.add(Activation('relu')) print "added conv2" #conv-spatial batch norm - relu #3 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 128, 3, 3, subsample=(2, 2), weights=[pretrained_weights['W3'], pretrained_weights['b3']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma3'], pretrained_weights['beta3'], pretrained_weights['running_mean3'], pretrained_weights['running_var3'] ])) model.add(Activation('relu')) print "added conv3" model.add(Dropout(0.25)) #print "added dropout" #conv-spatial batch norm - relu #4 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 128, 3, 3, subsample=(1, 1), weights=[pretrained_weights['W4'], pretrained_weights['b4']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma4'], pretrained_weights['beta4'], pretrained_weights['running_mean4'], pretrained_weights['running_var4'] ])) model.add(Activation('relu')) print "added conv4" #conv-spatial batch norm - relu #5 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 256, 3, 3, subsample=(2, 2), weights=[pretrained_weights['W5'], pretrained_weights['b5']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma5'], pretrained_weights['beta5'], pretrained_weights['running_mean5'], pretrained_weights['running_var5'] ])) model.add(Activation('relu')) print "added conv5" #conv-spatial batch norm - relu #6 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 256, 3, 3, subsample=(1, 1), weights=[pretrained_weights['W6'], pretrained_weights['b6']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma6'], pretrained_weights['beta6'], pretrained_weights['running_mean6'], pretrained_weights['running_var6'] ])) model.add(Activation('relu')) print "added conv6" model.add(Dropout(0.25)) #print "added dropout" #conv-spatial batch norm - relu #7 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 512, 3, 3, subsample=(2, 2), weights=[pretrained_weights['W7'], pretrained_weights['b7']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma7'], pretrained_weights['beta7'], pretrained_weights['running_mean7'], pretrained_weights['running_var7'] ])) model.add(Activation('relu')) print "added conv7" #conv-spatial batch norm - relu #8 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 512, 3, 3, subsample=(1, 1), weights=[pretrained_weights['W8'], pretrained_weights['b8']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma8'], pretrained_weights['beta8'], pretrained_weights['running_mean8'], pretrained_weights['running_var8'] ])) model.add(Activation('relu')) print "added conv8" #conv-spatial batch norm - relu #9 model.add(ZeroPadding2D((1, 1))) model.add( Convolution2D( 1024, 3, 3, subsample=(2, 2), weights=[pretrained_weights['W9'], pretrained_weights['b9']])) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma9'], pretrained_weights['beta9'], pretrained_weights['running_mean9'], pretrained_weights['running_var9'] ])) model.add(Activation('relu')) print "added conv9" model.add(Dropout(0.50)) #print "added dropout" #Affine-spatial batch norm -relu #10 model.add(Flatten()) model.add( Dense(512, weights=[ np.transpose(np.asarray(pretrained_weights['W10'])), pretrained_weights['b10'] ], W_regularizer=WeightRegularizer(l1=1e-4, l2=1e-4))) model.add( BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9, weights=[ pretrained_weights['gamma10'], pretrained_weights['beta10'], pretrained_weights['running_mean10'], pretrained_weights['running_var10'] ])) model.add(Activation('relu')) print "added affine!" model.add(Dropout(0.75)) #print "added dropout!" #affine layer w/ softmax activation added model.add( Dense(200, activation='softmax', W_regularizer=WeightRegularizer(l1=1e-4, l2=1e-4)) ) #pretrained weights assume only 100 outputs, we need to train this layer from scratch... meh print "added final affine" if freezeAndStack == True: for layer in model.layers: layer.trainable = False model.layers[1].trainable = True return model
# NEURAL NETWORK # Convert Y to dummy from keras.utils import np_utils from keras.models import Sequential from keras.layers import Dense, Convolution2D from keras.regularizers import WeightRegularizer data = training_data X = data[0] Y = np_utils.to_categorical(data[1]) Xvalid = validation_data[0] Yvalid = np_utils.to_categorical(validation_data[1]) input_dim = X[0].shape[0] output_dim = 10 W_regularizer = WeightRegularizer(l1=0., l2=0.) model = Sequential() model.add(Dense(30, input_dim=input_dim, name='hidden_layer', activation='sigmoid', W_regularizer=W_regularizer)) model.add(Dense(output_dim, name='output_layer', activation='softmax')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) # fit res = model.fit(X, Y, batch_size=10, nb_epoch=10, validation_data=(Xvalid, Yvalid)) plt.figure() plt.plot(res.epoch, res.history['loss']) plt.figure() plt.plot(res.epoch, res.history['acc']) plt.plot(res.epoch, res.history['val_acc'])
def pretrained_finetune(weights_path, freezeAndStack): model = Sequential() if freezeAndStack == True: model.trainLayersIndividually = 1 #conv-spatial batch norm - relu #1 model.add(ZeroPadding2D((2, 2), input_shape=(3, 64, 64))) model.add( Convolution2D(64, 5, 5, subsample=(2, 2), W_regularizer=WeightRegularizer(l1=1e-7, l2=1e-7))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv1" #conv-spatial batch norm - relu #2 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, subsample=(1, 1))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv2" #conv-spatial batch norm - relu #3 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, subsample=(2, 2))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) model.add(Dropout(0.25)) print "added conv3" #conv-spatial batch norm - relu #4 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, subsample=(1, 1))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv4" #conv-spatial batch norm - relu #5 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, subsample=(2, 2))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv5" #conv-spatial batch norm - relu #6 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, subsample=(1, 1))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) model.add(Dropout(0.25)) print "added conv6" #conv-spatial batch norm - relu #7 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, subsample=(2, 2))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv7" #conv-spatial batch norm - relu #8 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, subsample=(1, 1))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv8" #conv-spatial batch norm - relu #9 model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(1024, 3, 3, subsample=(2, 2))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added conv9" model.add(Dropout(0.25)) #Affine-spatial batch norm -relu #10 model.add(Flatten()) model.add(Dense(512, W_regularizer=WeightRegularizer(l1=1e-5, l2=1e-5))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) print "added affine!" model.add(Dropout(0.5)) #affine layer w/ softmax activation added model.add( Dense(200, activation='softmax', W_regularizer=WeightRegularizer(l1=1e-5, l2=1e-5)) ) #pretrained weights assume only 100 outputs, we need to train this layer from scratch print "added final affine" if freezeAndStack == True: for layer in model.layers: layer.trainable = False model.layers[1].trainable = True model.load_weights(weights_path) return model
def train_blueprint(self, lr, decay, momentum, batch_size, l1, l2, convs, fcs): X, Y, Xv, Yv = self.data nb_classes = self.nb_classes reg = WeightRegularizer() # a hack to make regularization variable reg.l1 = K.variable(0.0) reg.l2 = K.variable(0.0) # input square image dimensions img_rows, img_cols = X.shape[-1], X.shape[-1] img_channels = X.shape[1] # convert class vectors to binary class matrices Y = np_utils.to_categorical(Y, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes) # here definition of the model happens model = Sequential() has_convs = False # create all convolutional layers for val, use in convs: # Size of convolutional layer cnvSz = int(val * 127) + 1 if use < 0.5: continue has_convs = True model.add( Convolution2D(cnvSz, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols), W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) if has_convs: model.add(Flatten()) else: model.add(Flatten( input_shape=(img_channels, img_rows, img_cols))) # avoid excetpions on no convs # create all fully connected layers for val, use in fcs: if use < 0.5: continue # choose fully connected layer size densesz = int(1023 * val) + 1 model.add(Dense(densesz, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) # model.add(Dropout(0.5)) model.add(Dense(nb_classes, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) X = X.astype('float32') Xv = Xv.astype('float32') X /= 255 Xv /= 255 model = model sgd = sgd reg = reg # set parameters of training step sgd.lr.set_value(lr) sgd.decay.set_value(decay) sgd.momentum.set_value(momentum) reg.l1.set_value(l1) reg.l2.set_value(l2) # train model for one epoch_idx H = model.fit(X, Y, batch_size=int(batch_size), nb_epoch=10, shuffle=True) diverged = math.isnan(H.history['loss'][-1]) acc = 0.0 if not diverged: _, acc = model.evaluate(Xv, Yv) return diverged, acc
def Convolution(f, k=3, s=2, border_mode='same', l2=.0, **kwargs): """Convenience method for Convolutions.""" return Convolution2D(f, k, k, border_mode=border_mode, subsample=(s, s), W_regularizer=WeightRegularizer(l2=l2), **kwargs)
from keras.layers.advanced_activations import ELU, LeakyReLU from keras.layers import Input, merge, Convolution1D, MaxPooling1D from keras.layers import UpSampling1D, Dense, Flatten from keras.layers.normalization import BatchNormalization from keras.layers.core import Activation from keras.layers.core import Dropout from keras.regularizers import WeightRegularizer x1, x2, x3, x4 = load("data/splits.npz") #TODO: investigate pooling layers and residual layers T = X.shape[1] inputs = Input(shape=(T, 16), name='eeg_input') l2_regularizer = WeightRegularizer(l2=.17) # 4 channels of output, window size 5 down_conv1 = Convolution1D( 4, 8, init='he_normal', bias=True, border_mode='same', #W_regularizer = l2_regularizer )(inputs) down_conv1 = MaxPooling1D(2, 2)(down_conv1) down_conv1 = BatchNormalization()(down_conv1) down_conv1 = Activation('sigmoid')(down_conv1) down_conv1 = Dropout(.1)(down_conv1)
def _reset(self): reg = WeightRegularizer() # a hack to make regularization variable reg.l1 = K.variable(0.0) reg.l2 = K.variable(0.0) data, nb_classes = self.data_mix() X, Y, Xv, Yv = data # input square image dimensions img_rows, img_cols = X.shape[-1], X.shape[-1] img_channels = X.shape[1] # save number of classes and instances self.nb_classes = nb_classes self.nb_inst = len(X) # convert class vectors to binary class matrices Y = np_utils.to_categorical(Y, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes) # here definition of the model happens model = Sequential() # double true for icnreased probability of conv layers if random.choice([True, True, False]): # Choose convolution #1 self.convAsz = random.choice([32,64,128]) model.add(Convolution2D(self.convAsz, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols), W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(Convolution2D(self.convAsz, 3, 3, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) # Choose convolution size B (if needed) self.convBsz = random.choice([0,32,64]) if self.convBsz > 0: model.add(Convolution2D(self.convBsz, 3, 3, border_mode='same', W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(Convolution2D(self.convBsz, 3, 3, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) else: model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) self.convAsz = 0 self.convBsz = 0 # choose fully connected layer size self.densesz = random.choice([256,512,762]) model.add(Dense(self.densesz, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes, W_regularizer = reg, b_regularizer = reg)) model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) X = X.astype('float32') Xv = Xv.astype('float32') X /= 255 Xv /= 255 self.data = (X,Y,Xv,Yv) self.model = model self.sgd = sgd # initial accuracy values self.best_val = 0.0 self.previous_acc = 0.0 self.reg = reg self.epoch_idx = 0 return self._get_obs()
emb1 = Embedding(len(vocab) + 2, emb_dim, input_length=max_len, trainable=True, mask_zero=True, weights=[vectors], dropout=0.5, W_constraint=maxnorm(2))(inlayer1) #emb1drop = Dropout(0.5)(inlayer1) # for keras 2: dropout not as argument to emb layer lstm1 = Bidirectional( LSTM(300, activation="tanh", input_dim=emb_dim, return_sequences=True, dropout_W=0.5, W_regularizer=WeightRegularizer( l2=0.0000025)) # for keras 2: W_reg...=l2() instead , merge_mode='sum')(emb1) attention1 = F.FarATTN(name="M_ATTN1")(lstm1) att1drop = Dropout(0.5)(attention1) output = Dense(y.shape[1], activation='softmax')(att1drop) opt = optimizers.Adam(lr=0.0001) ## Training and evaluation test_senses = resources.read_senses("conll16st-zh-01-08-2016-test/", ignore_types=["Explicit", "AltLex"]) #test_senses = resources.read_senses("conll16st-en-03-29-16-test/", ignore_types=["Explicit", "AltLex"]) test_labels = [[label2id[s] for s in ss] for ss in test_senses]
# continue for c in target_chars: if c==next_chars[i]: y[i, char_indices[c]] = 1-softness else: y[i, char_indices[c]] = softness/(len(target_chars)-1) #print(X[i,:,:]) #print(y[i,:]) # build the model: print('Build model...') model = Sequential() #model.add(LSTM(1000, consume_less='gpu', init='glorot_uniform', return_sequences=True, dropout_W=0.4, input_shape=(maxlen, num_features))) #model.add(BatchNormalization(axis=1)) model.add(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, W_regularizer=WeightRegularizer(l2=0.0005, l1=0.0001), input_shape=(maxlen, num_features))) model.add(BatchNormalization(axis=1)) #model.add(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, W_regularizer=WeightRegularizer(l2=0.0005, l1=0.0001), input_shape=(maxlen, num_features))) #model.add(BatchNormalization(axis=1)) #model.add(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=True, W_regularizer=WeightRegularizer(l2=0.0005, l1=0.0001), input_shape=(maxlen, num_features))) #model.add(BatchNormalization(axis=1)) #model.add(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, W_regularizer=WeightRegularizer(l2=0.0005, l1=0.0001), input_shape=(maxlen, num_features))) #model.add(BatchNormalization(axis=1)) #model.add(LSTM(100, consume_less='gpu', init='glorot_uniform', return_sequences=False, W_regularizer=WeightRegularizer(l1=0.00005), input_shape=(maxlen, num_features))) #model.add(Dense(len(target_chars), activation='relu', init='glorot_uniform', W_regularizer=l1)) model.add(Dense(len(target_chars), activation='softmax', init='glorot_uniform')) opt = Nadam(lr=0.0002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=2) model.compile(loss='categorical_crossentropy', optimizer=opt) early_stopping = EarlyStopping(monitor='val_loss', patience=31)
def init_reg(l1, l2): if l1 == 0 and l2 == 0: return None return WeightRegularizer(l1=l1, l2=l2)
def get_regularizer(l1=0.01, l2=0.01): return WeightRegularizer(l1=l1, l2=l2)
def create_model(model_para=[6, 12, 18, 18, 64, 0.01, 0.4, (5, 5, 5)], foldname=os.path.abspath('.')): # input image dimensions # img_rows, img_cols, img_depth = 38, 39, 48 img_rows, img_cols, img_depth = 49, 39, 38 # number of convolutional filters to use conv_l2 = 0.008 full_l2 = 0.3 # convolution kernel size kernel_size = (3, 3, 3) # size of pooling area for max pooling pool_size = (2, 2, 2) drop_out = (model_para[5], model_para[6]) act_function = 'tanh' full_connect = model_para[4] nb_filters = (model_para[0], model_para[1], model_para[2], model_para[3]) # nb_filters = (5, 10, 15, 15) l1_regularizer = 0.01 l2_regularizer = full_l2 nb_classes = 2 input_shape = (1, img_rows, img_cols, img_depth) wr = WeightRegularizer(l1=l1_regularizer, l2=l2_regularizer) #creat cnn model model = Sequential() model.add( Convolution3D(nb_filters[0], kernel_size[0], kernel_size[1], kernel_size[2], W_regularizer=l2(conv_l2), activation=act_function, input_shape=input_shape)) model.add(MaxPooling3D(pool_size=pool_size)) model.add(Dropout(drop_out[0])) model.add( Convolution3D(nb_filters[1], kernel_size[0], kernel_size[1], kernel_size[2], W_regularizer=l2(conv_l2), activation=act_function)) model.add(MaxPooling3D(pool_size=pool_size)) model.add(Dropout(drop_out[0])) model.add( Convolution3D(nb_filters[2], kernel_size[0], kernel_size[1], kernel_size[2], W_regularizer=l2(conv_l2), activation=act_function)) model.add(MaxPooling3D(pool_size=pool_size)) model.add(Dropout(drop_out[0])) model.add( Convolution3D(nb_filters[3], kernel_size[0], kernel_size[1], kernel_size[2], W_regularizer=l2(conv_l2), activation=act_function)) # model.add(MaxPooling3D(pool_size=pool_size)) model.add(Dropout(drop_out[1] / 2)) model.add(Flatten()) model.add(Dense(full_connect, W_regularizer=wr, activation=act_function)) model.add(Dropout(drop_out[1])) model.add(Dense(nb_classes, activation=act_function)) model.add(Activation('softmax')) # model.add(Activation(act_function)) model.summary() ADA = Adadelta(lr=2.0, rho=0.95) model.compile(loss='categorical_crossentropy', optimizer=ADA, metrics=['accuracy']) ## save parameters of cnn model to .txt sname = 'model_parameter.txt' full_namem = os.path.join(foldname, sname) fm = open(full_namem, 'wb') fm.write('************CNN model parameter************ ' + '\n') fm.write('Number of Convolution layer : ' + str(len(nb_filters)) + '\n') fm.write('Input shape : ' + str(input_shape) + '\n') fm.write('Number of kernal per layer ï¼? ' + str(nb_filters) + '\n') fm.write('Kernel size per layer : ' + str(kernel_size) + '\n') fm.write('Pool size per layer : ' + str(pool_size) + '\n') fm.write('Activation function per layer : ' + act_function + '\n') # fm.write('Dropout rate : '+str(drop_out)+'\n') fm.write('Number of full-connect layer : ' + str(full_connect) + '\n') fm.write('Coefficient of L1 regularizer : ' + str(l1_regularizer) + '\n') fm.write('Coefficient of L2 regularizer : ' + str(l2_regularizer) + '\n') fm.write('Output : ' + str(nb_classes) + ' classes' + '\n') fm.close() return model
X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, num_classes) Y_test = np_utils.to_categorical(y_test, num_classes) for loss_function in loss_functions: # for num_classes in num_classes_arr: # num classes loop model = Sequential() #conv-spatial batch norm - relu #1 model.add(ZeroPadding2D((2,2),input_shape=(3,64,64))) model.add(Convolution2D(64,5,5,subsample=(2,2),W_regularizer=WeightRegularizer(l1=1e-7,l2=1e-7))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) #conv-spatial batch norm - relu #2 model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(64,3,3,subsample=(1,1))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) #conv-spatial batch norm - relu #3 model.add(ZeroPadding2D((1,1))) model.add(Convolution2D(128,3,3,subsample=(2,2))) model.add(BatchNormalization(epsilon=1e-06, mode=0, axis=1, momentum=0.9)) model.add(Activation('relu')) model.add(Dropout(0.25))
def train_blueprint(self, lr, decay, momentum, batch_size, l1, l2, convs, fcs): X, Y, Xv, Yv = self.data nb_classes = self.nb_classes reg = WeightRegularizer() # a hack to make regularization variable reg.l1 = K.variable(0.0) reg.l2 = K.variable(0.0) # input square image dimensions img_rows, img_cols = X.shape[-1], X.shape[-1] img_channels = X.shape[1] # convert class vectors to binary class matrices Y = np_utils.to_categorical(Y, nb_classes) Yv = np_utils.to_categorical(Yv, nb_classes) # here definition of the model happens model = Sequential() has_convs = False # create all convolutional layers for val, use in convs: # Size of convolutional layer cnvSz = int(val * 127) + 1 if use < 0.5: continue has_convs = True model.add(Convolution2D(cnvSz, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols), W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.25)) if has_convs: model.add(Flatten()) else: model.add(Flatten(input_shape=(img_channels, img_rows, img_cols))) # avoid excetpions on no convs # create all fully connected layers for val, use in fcs: if use < 0.5: continue # choose fully connected layer size densesz = int(1023 * val) + 1 model.add(Dense(densesz, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('relu')) # model.add(Dropout(0.5)) model.add(Dense(nb_classes, W_regularizer=reg, b_regularizer=reg)) model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) X = X.astype('float32') Xv = Xv.astype('float32') X /= 255 Xv /= 255 model = model sgd = sgd reg = reg # set parameters of training step sgd.lr.set_value(lr) sgd.decay.set_value(decay) sgd.momentum.set_value(momentum) reg.l1.set_value(l1) reg.l2.set_value(l2) # train model for one epoch_idx H = model.fit(X, Y, batch_size=int(batch_size), nb_epoch=10, shuffle=True) diverged = math.isnan(H.history['loss'][-1]) acc = 0.0 if not diverged: _, acc = model.evaluate(Xv, Yv) return diverged, acc
def main(): argparser = argparse.ArgumentParser( description="Run span prediction model for fill-the-gap questions") argparser.add_argument("--data_path", type=str, default='../data/', help="path to Omnibus-Gr04/Omnibus-Gr04/Barron's data."+\ "If this doesn't work specify globally in loaders.py") argparser.add_argument("--output_file", type=str, default='barrons_predictions-1.txt', help="File with predicted examples, one per line.") argparser.add_argument("--barrons_file", type=str, help="Filepath of Barrons-1.sentences.txt") argparser.add_argument("--evaluate", type=bool, default=True, help="run per-sentence evaluation") argparser.add_argument("--barrons", type=bool, default=True, help="generate examples for Barron's statements.") argparser.add_argument("--neural", type=bool, default=False, help="additional intermediate layer") argparser.add_argument( "--plot", type=bool, default=False, help="whether to create plot with feature importances") args = argparser.parse_args() # load fill-the-gap data training_examples = load_questions(args.data_path) training_statements, training_spans = zip(*training_examples) #'Barrons-4thGrade.sentences-d1/Barrons-1.sentences.txt' barrons_statements = read_barrons(args.barrons_file) # getting parses. Assume CoreNLP is installed. # Parse tree annotation must be run before, both for training and new data. write_sentences(training_statements, filename='training.txt') write_sentences(barrons_statements, filename='barrons.txt') barron_trees = load_parses(filename='barrons.txt.json') training_trees = load_parses(filename='training.txt.json') # general characteristics of sentences/ spans max_span_length = max([span[1] - span[0] for span in training_spans]) #11 num_sentence_words = max([len(stmnt) for stmnt in training_statements]) #46 ### build span dataset: {spans (their feature repr.)} --> {True/False} pos_tags = get_pos_set(list(training_statements) + barrons_statements) # list of legal constituent labels constituents = set() for tree in training_trees + barron_trees: for subtree in tree.subtrees(lambda t: t.height() > 1): constituents.add(subtree.label()) constituents = sorted(list(constituents)) #### load training data all_examples, all_labels, examples_per_sentence, _ = \ create_training_examples(training_statements, training_trees, training_spans, True, max_span_length, num_sentence_words, args, pos_tags, constituents) # shuffle training data order n_examples, n_features = all_examples.shape perm = np.random.permutation(n_examples) all_examples = all_examples[perm, :] all_labels = all_labels[perm] print(n_examples, 'examples; ', n_features, 'features') # split into training and (preliminary) validation part cut = 180 x_train = all_examples[:cut, :] x_test = all_examples[cut:, :] y_train = all_labels[:cut] y_test = all_labels[cut:] examples_per_sentence_train = examples_per_sentence[:cut] examples_per_sentence_test = examples_per_sentence[cut:] ##### define keras model model = Sequential() regul = 0.01 # extra neural layer. if args.neural: n_latent = 5 model.add( Dense(output_dim=n_latent, input_dim=n_features, activation='tanh', W_regularizer=WeightRegularizer(l1=regul))) model.add( Dense(output_dim=1, input_dim=n_latent, activation='sigmoid', W_regularizer=WeightRegularizer(l1=regul))) # sigmoid model if not args.neural: model.add( Dense(output_dim=1, input_dim=n_features, activation='sigmoid', W_regularizer=WeightRegularizer(l2=regul))) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # training model.fit(x_train, y_train, nb_epoch=200, batch_size=8) # per-span evaluation loss_and_metrics = model.evaluate(x_train, y_train, batch_size=100) loss_and_metrics_test = model.evaluate(x_test, y_test, batch_size=100) print('Training performance (sigmoid):', loss_and_metrics) print('Validation performance (sigmoid):', loss_and_metrics_test) # per-sentence evaluation. # evaluate on all legal spans for a given sentence: if args.evaluate: accuracies = [] lengths = [] for sentence_spans in examples_per_sentence_train: lengths.append(len(sentence_spans)) scores_this_sentence = [] correct_index = -1 for i, (features, truth) in enumerate(sentence_spans): x_input = np.reshape(features, [1, n_features]) #y_input = np.atleast_1d(truth) score = model.predict_proba(x_input, verbose=False) scores_this_sentence.append(score) if truth: correct_index = i #loss, accuracy = model.evaluate(x_input, y_input, batch_size=1) #sentence_accuracies.apopend(accuracy) prediction = np.argmax(scores_this_sentence) accurate = (prediction == correct_index) accuracies.append(accurate) print(np.mean(accuracies), 'Training accuracy (sentence level)') print('Average legal spans per sentence:', np.mean(lengths)) # same as above, but for evaluation examples. Too lazy to properly factor out. accuracies = [] lengths = [] for sentence_spans in examples_per_sentence_test: lengths.append(len(sentence_spans)) scores_this_sentence = [] correct_index = -1 for i, (features, truth) in enumerate(sentence_spans): x_input = np.reshape(features, [1, n_features]) #y_input = np.atleast_1d(truth) score = model.predict_proba(x_input, verbose=False) scores_this_sentence.append(score) if truth: correct_index = i #loss, accuracy = model.evaluate(x_input, y_input, batch_size=1) #sentence_accuracies.apopend(accuracy) prediction = np.argmax(scores_this_sentence) accurate = (prediction == correct_index) accuracies.append(accurate) print(np.mean(accuracies), 'Validation accuracy (sentence level)') # weight interpretation/ plot if args.plot: weights = model.layers[0].get_weights()[0] #important_features = np.where(np.abs(weights) > 0.05)[0] feature_names = [ "f_bias", "f_span_match", "f_length", "f_science_token", "f_avg_word_frequency", "f_stop_word_begin", "f_max_token_length", "f_science_token_count", ] feature_names += pos_tags feature_names += ["begin_" + x for x in pos_tags] feature_names += ["end_" + x for x in pos_tags] feature_names += constituents feature_names += ["big_" + x for x in constituents] #order = np.argsort(np.abs(weights)) plt.stem(weights) plt.xticks(range(0, len(feature_names)), feature_names, rotation='vertical') plt.grid() plt.show() # generating predictions for barron's if args.barrons: barron_statements = [tree.leaves() for tree in barron_trees] for statement in barron_statements: try: statement[statement.index('-LRB-')] = '(' except ValueError: pass try: statement[statement.index('-RRB-')] = ')' except ValueError: pass # compute features for barron's _, _, barron_span_features, span_indexes = \ create_training_examples(barron_statements, barron_trees, False, False, max_span_length, num_sentence_words, args, pos_tags, constituents) # identify span for each sentence with highest score predicted_spans = [] for i_sent, sentence_spans in enumerate(barron_span_features): scores_this_sentence = [] for features in sentence_spans: x_input = np.reshape(features, [1, n_features]) #y_input = np.atleast_1d(truth) score = model.predict_proba(x_input, verbose=False) scores_this_sentence.append(score) predicted_span = span_indexes[i_sent][np.argmax( scores_this_sentence)] predicted_spans.append(predicted_span) # write predictions to file with open(args.output_file, 'w') as writefile: for i in range(0, len(barron_statements)): printstring = barron_statements[ i] # full sentence without gap. gap = predicted_spans[i] gap_tokens = printstring[gap[0]:gap[1]] printstring[gap[0]:gap[1]] = ['_____'] #*len(gap_tokens) printstring = " ".join(printstring) + '\t' printgap = " ".join(gap_tokens) + '\n' writefile.write(printstring + printgap)