def build(self, hp): maxnorm = hp.Choice('maxnorm', values=self.hyperparam['maxnorm']) resnet_model = Resnet(input_shape=(self.seqlen, self.channels), norm_max=maxnorm) if self.pretrained_weights is not None: resnet_model.set_weights(self.pretrained_weights) inp = Input(shape=(self.seqlen, self.channels)) enc_inp = resnet_model(inp) dense_units = hp.Int('preclassification', min_value = self.hyperparam['dense_units']['min'],\ max_value = self.hyperparam['dense_units']['max'], step = self.hyperparam['dense_units']['step']) dense_out = Dense(units = dense_units, activation='relu', kernel_constraint=MaxNorm(maxnorm,axis=[0,1]), bias_constraint=MaxNorm(maxnorm,axis=0), kernel_initializer=glorot_uniform(seed=0))(enc_inp) dense_out = Dropout(rate=hp.Choice('dropout', values = self.hyperparam['dropout']))(dense_out) output = Dense(self.num_classes, activation='softmax', kernel_constraint=MaxNorm(maxnorm,axis=[0,1]), bias_constraint=MaxNorm(maxnorm,axis=0), kernel_initializer=glorot_uniform(seed=0))(dense_out) model = Model(inputs=inp, outputs=output) model.compile(optimizer=Adam(lr=hp.Choice('lr', values = self.hyperparam['lr'])), loss=focal_loss(), metrics=['accuracy', macro_f1]) return model
def main(argv): infile = argv[0] outfile = argv[1] seqlen = 1500 channels = 6 maxnorm = 20.0 # Create model resnet_model = Resnet(input_shape=(seqlen, channels), norm_max=maxnorm) samp1 = Input(shape=(seqlen, channels)) enc_samp1 = resnet_model(samp1) samp2 = Input(shape=(seqlen, channels)) enc_samp2 = resnet_model(samp2) diff_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) diff_enc = diff_layer([enc_samp1, enc_samp2]) dense_out = Dense(50, activation='relu', kernel_constraint=MaxNorm(maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0))(diff_enc) dense_out = Dropout(rate=0.2)(dense_out) output = Dense(1, activation='sigmoid', kernel_constraint=MaxNorm(maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0))(dense_out) model = Model(inputs=[samp1, samp2], outputs=output) model.load_weights(infile) for layer in model.layers: if layer.name == "model": resnet_model.set_weights(layer.get_weights()) resnet_model.save_weights(outfile)
def create_model(beta_1=0.8, beta_2=0.999, learning_rate=0.01, drop_rate_input=0.2, drop_rate_hidden=0.3, weight_constraint=3, units=80, seed=config.SEED): model = Sequential() model.add(Dropout(drop_rate_input, seed=seed)) model.add( Dense(units=units, activation='relu', kernel_constraint=MaxNorm(max_value=weight_constraint, axis=0))) model.add(Dropout(drop_rate_hidden, seed=seed)) model.add( Dense(units=units / 2, activation='relu', kernel_constraint=MaxNorm(max_value=weight_constraint, axis=0))) model.add(Dropout(drop_rate_hidden, seed=seed)) model.add(Dense(units=1, activation='sigmoid')) # For a binary classification problem optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=1e-07, amsgrad=False, name='Adam') model.compile(loss='binary_crossentropy', metrics=["accuracy"], optimizer=optimizer) return model
def mf(n_person=100, n_item=3000, para_dim=5): """ Input: dimensions of person-item matrix Output: neural network with multiple inputs and embedding layers """ p = Input(shape=[1], name='person') p_e = Embedding(n_person, para_dim, embeddings_initializer='RandomNormal', name='person_embedding')(p) p_e = MaxNorm(max_value=5 * np.sqrt(para_dim))(p_e) # maxnorm i = Input(shape=[1], name='item') i_e = Embedding(n_item, para_dim, embeddings_initializer='RandomNormal', name='item_embedding')(i) i_e = MaxNorm(max_value=5 * np.sqrt(para_dim))(i_e) # maxnorm d = Input(shape=[1], name='residual') d_e = Embedding(n_item, 1, embeddings_initializer='RandomNormal', name='res_embed')(i) d_e = MaxNorm(max_value=5 * np.sqrt(para_dim))(d_e) # maxnorm output = Dot(axes=-1, name='dotProduct')([p_e, i_e]) + d_e # print(output.shape) output = Flatten(name='output')(output) main_output = Activation('sigmoid')(output) # print(main_output.shape) model = Model([p, i, d], main_output) return model
def identity_block(inputs, filters, ksz, stage, block, norm_max=1.0): """ Identity block for ResNet Parameters __________ inputs : input tensor of shape (batch_size, num_steps_prev, num_ch_prev) filters : list of 3 integers defining num of conv filters in main path ksz : filter width of middle convolutional layer stage : integer used to name layers block : string used to name layers norm_max : maximum norm for constraint Returns _______ x - output of the identity block, tensor of shape (batch_size, num_steps, num_ch) """ conv_base_name = 'res' + str(stage) + block + '_branch' bn_base_name = 'bn' + str(stage) + block + '_branch' F1, F2, F3 = filters # Shortcut path x_shortcut = inputs # Main path x = Conv1D(filters=F1, kernel_size=1, strides=1, padding='valid', name=conv_base_name + '2a', use_bias=False, kernel_constraint=MaxNorm(norm_max,axis=[0,1,2]), kernel_initializer=glorot_uniform(seed=0))(inputs) x = LeakyReLU(alpha=0.1)(x) x = BatchNormalization(axis=-1, momentum=0.9, name=bn_base_name + '2a', gamma_constraint=MaxNorm(norm_max,axis=0), beta_constraint=MaxNorm(norm_max,axis=0))(x) x = Conv1D(filters=F2, kernel_size=ksz, strides=1, padding='same', name=conv_base_name + '2b', use_bias=False, kernel_constraint=MaxNorm(norm_max,axis=[0,1,2]), kernel_initializer=glorot_uniform(seed=0))(x) x = LeakyReLU(alpha=0.1)(x) x = BatchNormalization(axis=-1, momentum=0.9, name=bn_base_name + '2b', gamma_constraint=MaxNorm(norm_max,axis=0), beta_constraint=MaxNorm(norm_max,axis=0))(x) x = Conv1D(filters=F3, kernel_size=1, strides=1, padding='valid', name=conv_base_name + '2c', use_bias=False, kernel_constraint=MaxNorm(norm_max,axis=[0,1,2]), kernel_initializer=glorot_uniform(seed=0))(x) x = Add()([x, x_shortcut]) x = LeakyReLU(alpha=0.1)(x) x = BatchNormalization(axis=-1, momentum=0.9, name=bn_base_name + '2c', gamma_constraint=MaxNorm(norm_max,axis=0), beta_constraint=MaxNorm(norm_max,axis=0))(x) return x
def wrap_model(): model = Sequential() # add first embedding layer with pretrained wikipedia weights model.add( Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], input_length=max_sequence_length, trainable=False ) ) # add LSTM layer model.add( LSTM( lstm_out_width, input_shape=(max_sequence_length, ), go_backwards=backwards, dropout=dropout, recurrent_dropout=dropout, return_sequences=True, kernel_constraint=MaxNorm(), ) ) model.add( MaxPooling1D( pool_size=lstm_pool_size, ) ) model.add( Flatten() ) # Add output layer model.add( Dense( 1, activation='sigmoid' ) ) if optimizer == "rmsprop": optimizer_fn = RMSprop(lr=0.01*learn_rate_mult) else: optimizer_fn = Adam(lr=0.1*learn_rate_mult) # Compile model model.compile( loss='binary_crossentropy', optimizer=optimizer_fn, metrics=['acc']) if verbose == 1: model.summary() return model
def train(): # load dataset (train_X, train_Y), (test_X, test_Y) = cifar10.load_data() train_x = train_X.astype('float32') test_X = test_X.astype('float32') train_X = train_X/255.0 test_X = test_X/255.0 train_Y = to_categorical(train_Y) test_Y = to_categorical(test_Y) num_classes = test_Y.shape[1] # model model = Sequential() model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding='same', activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.2)) model.add(Conv2D(32, (3, 3), activation='relu', padding='same', kernel_constraint=MaxNorm(3))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(512, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) sgd = SGD(lr=0.01, momentum=0.9, decay=(0.01/25)) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(train_X, train_Y, validation_data=(test_X, test_Y), epochs=15, batch_size=32) _, acc = model.evaluate(test_X, test_Y) print(acc*100) model.save("model1_cifar_10epoch.h5")
def create_resnet_model(seqlen, num_channels, maxnorm, modeldir, dense_units, num_classes): resnet_model = Resnet(input_shape=(seqlen, num_channels), norm_max=maxnorm) # Load weights from pretrained model resnet_model.load_weights(os.path.join(modeldir, 'pretrained_resnet.h5')) samp = Input(shape=(seqlen, num_channels)) enc_samp = resnet_model(samp) dense_out = Dense(dense_units, activation='relu', kernel_constraint=MaxNorm(maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0), name='FC1')(enc_samp) dense_out = Dropout(rate=0.2)(dense_out) output = Dense(num_classes, activation='softmax', kernel_constraint=MaxNorm(maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0), name='output')(dense_out) model = Model(inputs=samp, outputs=output) return model
def analyze_train(train_csv_filename, test_csv_filename): train_dataset = np.loadtxt(train_csv_filename, delimiter=',') test_dataset = np.loadtxt(test_csv_filename, delimiter=',') # split into input (X) and output (Y) variables x_train = train_dataset[:, 2:] y_train = train_dataset[:, 1] x_test = test_dataset[:, 2:] y_test = test_dataset[:, 1] model = Sequential() # BEST RESULTS model.add(Dense(16, input_dim=36, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.3)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # es_callback = EarlyStopping(monitor='val_loss', patience=5) # history = model.fit(x_train, y_train, epochs=75, batch_size=128, validation_split=0.2, verbose=1, callbacks=[es_callback]) history = model.fit(x_train, y_train, epochs=250, batch_size=32, validation_split=0.2, verbose=1) print(model.evaluate(x_test, y_test)) # graph training vs. validation accuracy over epochs plt.figure(1) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') # graph training vs. validation loss over epochs plt.figure(2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() return model
def Conv1DTranspose(inputs, filters, ksz, s=2, padding='same', norm_max=1.0): """ 1D Transposed convolution for FCN Parameters __________ inputs : input tensor of shape (batch_size, num_steps_prev, num_ch_prev) filters : integer defining num of conv filters ksz : filter width of convolutional layer s : integer specifying stride padding : padding for the convolutional layer norm_max : maximum norm for constraint Returns _______ x : output tensor of shape (batch_size, num_steps, num_ch) """ x = Lambda(lambda x: K.expand_dims(x, axis=2))(inputs) x = Conv2DTranspose(filters=filters, kernel_size=(ksz, 1), strides=(s, 1), padding=padding, name='conv_transpose', use_bias=False, kernel_constraint=MaxNorm(norm_max,axis=[0,1,2,3]), kernel_initializer=glorot_uniform(seed=0))(x) x = Lambda(lambda x: K.squeeze(x, axis=2))(x) return x
# print(y_train.shape) # print(y_test.shape) # Defining the model input_shape = img_data[0].shape print(input_shape) model = Sequential() model = Sequential() model.add( Conv2D(32, (3, 3), input_shape=input_shape, padding='same', activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.2)) model.add( Conv2D(32, (3, 3), activation='relu', padding='same', kernel_constraint=MaxNorm(3))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(512, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) # Compile model epochs = 25 lrate = 0.01
def main(argv): indir = args.indir outdir = args.outdir if not os.path.exists(outdir): os.makedirs(outdir) dirstr = 'lr{:.4f}-maxnorm{:.2f}-batchsize{:d}'.format( args.lr, args.maxnorm, args.batchsize) resultdir = os.path.join(outdir, dirstr) if not os.path.exists(resultdir): os.makedirs(resultdir) # Hyperparameters lr = args.lr # learning rate num_epochs = args.num_epochs batch_size = args.batchsize # Read train data ftrain = h5py.File(os.path.join(args.indir, 'train_dataset.h5'), 'r') train_samples1 = ftrain['samp1'] train_samples2 = ftrain['samp2'] train_labels = np.array(ftrain['label'], dtype=np.int32) [num_train, seqlen, channels] = train_samples1.shape # Read validation data fval = h5py.File(os.path.join(args.indir, 'val_dataset.h5'), 'r') val_samples1 = fval['samp1'] val_samples2 = fval['samp2'] val_labels = np.array(fval['label'], dtype=np.int32) [num_val, seqlen, channels] = val_samples1.shape # Read test data ftest = h5py.File(os.path.join(args.indir, 'test_dataset.h5'), 'r') test_samples1 = ftest['samp1'] test_samples2 = ftest['samp2'] test_labels = np.array(ftest['label'], dtype=np.int32) [num_test, seqlen, channels] = test_samples1.shape # Data generators for train/val/test train_gen = DataGenerator(train_samples1, train_samples2, train_labels,\ batch_size=batch_size, seqlen=seqlen, channels=channels,\ shuffle=True, balance=True, augment=False, aug_factor=0.25) val_gen = DataGenerator(val_samples1, val_samples2, val_labels,\ batch_size=batch_size, seqlen=seqlen, channels=channels) test_gen = DataGenerator(test_samples1, test_samples2, test_labels,\ batch_size=batch_size, seqlen=seqlen, channels=channels) # Create model resnet_model = Resnet(input_shape=(seqlen, channels), norm_max=args.maxnorm) samp1 = Input(shape=(seqlen, channels)) enc_samp1 = resnet_model(samp1) samp2 = Input(shape=(seqlen, channels)) enc_samp2 = resnet_model(samp2) diff_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) diff_enc = diff_layer([enc_samp1, enc_samp2]) dense_out = Dense(50, activation='relu', kernel_constraint=MaxNorm(args.maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(args.maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0))(diff_enc) dense_out = Dropout(rate=0.2)(dense_out) output = Dense(1, activation='sigmoid', kernel_constraint=MaxNorm(args.maxnorm, axis=[0, 1]), bias_constraint=MaxNorm(args.maxnorm, axis=0), kernel_initializer=glorot_uniform(seed=0))(dense_out) model = Model(inputs=[samp1, samp2], outputs=output) model.compile(optimizer=Adam(lr=lr), loss=BinaryCrossentropy(), metrics=['accuracy']) # Train model # Use early stopping and model checkpoints to handle overfitting and save best model model_checkpt = ModelCheckpoint(os.path.join(resultdir,'{epoch:02d}-{val_accuracy:.4f}.h5'),\ monitor='val_accuracy')#,\ #mode='max', save_best_only=True) batch_renorm_cb = BatchRenormScheduler( len(train_gen)) # Implement batchrenorm after 1st epoch history = model.fit(train_gen, epochs=num_epochs, validation_data=val_gen, verbose=1, shuffle=False, callbacks=[batch_renorm_cb, model_checkpt], workers=2, max_queue_size=20, use_multiprocessing=False) # Plot training history plot_results(history.history['loss'], history.history['val_loss'],\ os.path.join(resultdir,'loss.jpg'), metric='Loss') plot_results(history.history['accuracy'], history.history['val_accuracy'],\ os.path.join(resultdir,'accuracy.jpg'), metric='Accuracy') # Predict probability on validation data using best model best_model_file, epoch, val_accuracy = get_best_model(resultdir) print( 'Predicting with model saved at Epoch={:d} with val_accuracy={:0.4f}'. format(epoch, val_accuracy)) model.load_weights(os.path.join(resultdir, best_model_file)) probs = model.predict(test_gen) y_pred = probs.argmax(axis=1) y_true = test_labels test_acc = accuracy_score(y_true, y_pred) print('Test accuracy = {:0.2f}'.format(test_acc * 100.0))
def unet(data_shape=(None, None, None), channels_in=1, channels_out=1, starting_filter_number=32, kernel_size=(3, 3, 3), num_conv_per_pool=2, num_repeat_bottom_conv=0, pool_number=4, pool_size=(2, 2, 2), expansion_rate=2, dropout_type='standard', dropout_rate=0.25, dropout_power=1 / 4, dropblock_size=5, add_conv_layers=4, add_conv_filter_number=32, add_conv_dropout_rate=None, final_activation='linear', gn_type='groups', gn_param=32, weight_constraint=None): if len(data_shape) == 1: print('using 1D operations') Conv = Conv1D UpSampling = UpSampling1D MaxPool = MaxPool1D if dropout_type == 'spatial': DRPT = SpatialDropout1D print('using spatial dropout') elif (dropout_type == 'dropblock') or (dropout_type == 'block'): DRPT = DB1D(block_size=dropblock_size) print('using dropblock with blocksize:', dropblock_size) else: DRPT = Dropout print('using standard dropout') elif len(data_shape) == 2: print('using 2D operations') Conv = Conv2D UpSampling = UpSampling2D MaxPool = MaxPool2D if dropout_type == 'spatial': DRPT = SpatialDropout2D print('using spatial dropout') elif (dropout_type == 'dropblock') or (dropout_type == 'block'): DRPT = DB2D(block_size=dropblock_size) print('using dropblock with blocksize:', dropblock_size) else: DRPT = Dropout print('using standard dropout') elif len(data_shape) == 3: print('using 3D operations') Conv = Conv3D UpSampling = UpSampling3D MaxPool = MaxPool3D if dropout_type == 'spatial': DRPT = SpatialDropout3D print('using spatial dropout') elif (dropout_type == 'dropblock') or (dropout_type == 'block'): DRPT = DB3D(block_size=dropblock_size) print('using dropblock with blocksize:', dropblock_size) else: DRPT = Dropout print('using standard dropout') else: print('Error: data_shape not compatible') return None if (weight_constraint == 'ws') or (weight_constraint == 'weightstandardization'): print('using weight standardization') wsconstraint = WeightStandardization(mean=0, std=1) elif (weight_constraint == 'maxnorm') or (weight_constraint == 'MaxNorm'): print('using MaxNorm') wsconstraint = MaxNorm(max_value=1, axis=[ii for ii in range(len(data_shape) + 1)]) elif (weight_constraint == 'unitnorm') or (weight_constraint == 'UnitNorm'): print('using UnitNorm') wsconstraint = UnitNorm(axis=[0, 1, 2]) else: print('excluding weight constraints') wsconstraint = None layer_conv = {} layer_nonconv = {} number_of_layers_half = pool_number + 1 number_of_filters_max = np.round( (expansion_rate**(number_of_layers_half - 1)) * starting_filter_number) #first half of U layer_nonconv[0] = Input(data_shape + (channels_in, )) print() print('Input:', layer_nonconv[0].shape) print() for layer_number in range(1, number_of_layers_half): number_of_filters_current = np.round( (expansion_rate**(layer_number - 1)) * starting_filter_number) drop_rate_layer = dropout_rate * (np.power( (number_of_filters_current / number_of_filters_max), dropout_power)) if isinstance(pool_size, (list, )): poolsize = pool_size[layer_number - 1] else: poolsize = pool_size if isinstance(kernel_size, (list, )): kernelsize = kernel_size[layer_number - 1] else: kernelsize = kernel_size if gn_type == 'channels': groups = int( np.clip(number_of_filters_current / gn_param, 1, number_of_filters_current)) else: groups = int(np.clip(gn_param, 1, number_of_filters_current)) layer_conv[layer_number] = DRPT(rate=drop_rate_layer)( GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)(layer_nonconv[layer_number - 1]))) for _ in range(1, num_conv_per_pool): layer_conv[layer_number] = DRPT(rate=drop_rate_layer)( GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)(layer_conv[layer_number]))) print('{:<30}'.format(str(layer_conv[layer_number].shape)), '\tgroups:', groups, '\tkernel:', kernelsize, '\tdroprate:', drop_rate_layer) layer_nonconv[layer_number] = Concatenate(axis=-1)([ MaxPool(pool_size=poolsize)(layer_conv[layer_number]), DRPT(rate=drop_rate_layer)(GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=poolsize, strides=poolsize, padding='valid', activation='relu', kernel_constraint=wsconstraint)(layer_conv[layer_number]))) ]) #center of U if isinstance(kernel_size, (list, )): kernelsize = kernel_size[number_of_layers_half - 1] else: kernelsize = kernel_size if gn_type == 'channels': groups = int( np.clip( np.round((expansion_rate**(number_of_layers_half - 1)) * starting_filter_number) / gn_param, 1, np.round((expansion_rate**(number_of_layers_half - 1)) * starting_filter_number))) else: groups = int( np.clip( gn_param, 1, np.round((expansion_rate**(number_of_layers_half - 1)) * starting_filter_number))) layer_conv[number_of_layers_half] = DRPT(rate=dropout_rate)( GroupNormalization(groups=groups)(Conv( filters=np.round((expansion_rate**(number_of_layers_half - 1)) * starting_filter_number), kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)( layer_nonconv[number_of_layers_half - 1]))) for _ in range(1, (num_repeat_bottom_conv + 1) * num_conv_per_pool): layer_conv[number_of_layers_half] = DRPT(rate=dropout_rate)( GroupNormalization(groups=groups)(Conv( filters=np.round( (expansion_rate**(number_of_layers_half - 1)) * starting_filter_number), kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)( layer_conv[number_of_layers_half]))) print('{:<30}'.format(str(layer_conv[number_of_layers_half].shape)), '\tgroups:', groups, '\tkernel:', kernelsize, '\tdroprate:', dropout_rate) #second half of U for layer_number in range(number_of_layers_half + 1, 2 * number_of_layers_half): number_of_filters_current = np.round( (expansion_rate**(2 * number_of_layers_half - layer_number - 1)) * starting_filter_number) drop_rate_layer = dropout_rate * (np.power( (number_of_filters_current / number_of_filters_max), dropout_power)) if isinstance(pool_size, (list, )): poolsize = pool_size[2 * number_of_layers_half - layer_number - 1] else: poolsize = pool_size if isinstance(kernel_size, (list, )): kernelsize = kernel_size[2 * number_of_layers_half - layer_number - 1] else: kernelsize = kernel_size if gn_type == 'channels': groups = int( np.clip(number_of_filters_current / gn_param, 1, number_of_filters_current)) else: groups = int(np.clip(gn_param, 1, number_of_filters_current)) layer_nonconv[layer_number] = Concatenate(axis=-1)([ DRPT(rate=drop_rate_layer)(GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)(UpSampling(size=poolsize)( layer_conv[layer_number - 1])))), layer_conv[2 * number_of_layers_half - layer_number] ]) layer_conv[layer_number] = DRPT(rate=drop_rate_layer)( GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)(layer_nonconv[layer_number]))) for _ in range(1, num_conv_per_pool): layer_conv[layer_number] = DRPT(rate=drop_rate_layer)( GroupNormalization(groups=groups)(Conv( filters=number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)(layer_conv[layer_number]))) print('{:<30}'.format(str(layer_conv[layer_number].shape)), '\tgroups:', groups, '\tkernel:', kernelsize, '\tdroprate:', drop_rate_layer) #Add CNN with output if add_conv_layers > 0: print() print('Adding ' + str(add_conv_layers) + ' CNN layers to U-net') number_of_filters_current = add_conv_filter_number if isinstance(kernel_size, (list, )): kernelsize = kernel_size[0] else: kernelsize = kernel_size if gn_type == 'channels': groups = int( np.clip(number_of_filters_current / gn_param, 1, number_of_filters_current)) else: groups = int(np.clip(gn_param, 1, number_of_filters_current)) if add_conv_dropout_rate is None: drop_rate_layer = dropout_rate * (np.power( (number_of_filters_current / number_of_filters_max), dropout_power)) else: drop_rate_layer = add_conv_dropout_rate for layer_CNN_number in range(add_conv_layers): layer_conv[2 * number_of_layers_half + layer_CNN_number] = DRPT( rate=drop_rate_layer)(GroupNormalization(groups=groups)(Conv( number_of_filters_current, kernel_size=kernelsize, padding='same', activation='relu', kernel_constraint=wsconstraint)( layer_conv[2 * number_of_layers_half + layer_CNN_number - 1]))) print( '{:<30}'.format( str(layer_conv[2 * number_of_layers_half + layer_CNN_number].shape)), '\tgroups:', groups, '\tkernel:', kernelsize, '\tdroprate:', drop_rate_layer) if isinstance(kernel_size, (list, )): kernelsize = kernel_size[0] else: kernelsize = kernel_size layer_conv[2 * number_of_layers_half + add_conv_layers] = Conv( channels_out, kernel_size=kernelsize, padding='same', activation=final_activation)(layer_conv[2 * number_of_layers_half + add_conv_layers - 1]) print() print('Output:', layer_conv[2 * number_of_layers_half + add_conv_layers].shape) print() #build and compile U model = Model( inputs=[layer_nonconv[0]], outputs=[layer_conv[2 * number_of_layers_half + add_conv_layers]], name='unet') print('Successfully built ' + str(len(data_shape)) + 'D U-net model') return model
def FCN(input_shape, max_seqlen, num_classes=2, norm_max=1.0): """ Generate a fully convolutional neural network (FCN) model. Parameters ---------- input_shape : tuple defining shape of the input dataset: (num_timesteps, num_channels) num_classes : integer defining number of classes for classification task norm_max : maximum norm for constraint Returns ------- model : Keras model """ outputdim = num_classes # number of classes inputs = Input(shape = input_shape) # Zero padding pad_wd = (max_seqlen - input_shape[0])//2 x = ZeroPadding1D((pad_wd,pad_wd))(inputs) # Stage 1 x = Conv1D(filters=32, kernel_size=7, strides=2, padding='valid', use_bias=False, kernel_constraint=MaxNorm(norm_max, axis=[0,1,2]), name = 'conv1', kernel_initializer=glorot_uniform(seed=0))(x) x = LeakyReLU(alpha=0.1)(x) x = BatchNormalization(axis=-1, momentum=0.9, name='bn_conv1', gamma_constraint=MaxNorm(norm_max,axis=0), beta_constraint=MaxNorm(norm_max,axis=0))(x) # Stage 2 x = conv_block(x, ksz=3, filters=[16,16,32], stage=2, block='a', s=2, norm_max=norm_max) x = identity_block(x, ksz=3, filters=[16,16,32], stage=2, block='b', norm_max=norm_max) x = identity_block(x, ksz=3, filters=[16,16,32], stage=2, block='c', norm_max=norm_max) # # Stage 3 # x = conv_block(x, ksz=3, filters=[64,64,128], stage=3, block='a', s=2) # x = identity_block(x, ksz=3, filters=[64,64,128], stage=3, block='b') # x = identity_block(x, ksz=3, filters=[64,64,128], stage=3, block='c') # x = identity_block(x, ksz=3, filters=[64,64,128], stage=3, block='d') # # Stage 4 # x = conv_block(x, ksz=3, filters=[128,128,256], stage=4, block='a', s=2) # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='b') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='c') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='d') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='e') # # Stage 5 # x = conv_block(x, ksz=3, filters=[256,256,512], stage=5, block='a', s=2) # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='b') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='c') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='d') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='e') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='f') # Output stage x = Conv1DTranspose(x, filters=64, ksz=5, s=4, norm_max=norm_max) x = GlobalAveragePooling1D()(x) outputs = Dense(num_classes, activation='softmax', name='Dense', kernel_constraint=MaxNorm(norm_max,axis=[0,1]), bias_constraint=MaxNorm(norm_max,axis=0), kernel_initializer=glorot_uniform(seed=0))(x) model = Model(inputs=inputs, outputs=outputs) return model
y_pred = rf.predict(X_test) print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) ##Print the feature importance by random forest fi = pd.DataFrame({'feature': list(X_train.columns), 'importance': rf.feature_importances_}).\ sort_values('importance', ascending = False) fi.head() ###Next we compare with a NN, as its just tabular data, so we dont need too many layers to prevent overfitting. model = Sequential() model.add( Dense(64, input_dim=46, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(rate=0.2)) model.add(Dense(8, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(rate=0.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy']) history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=8) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('model accuracy')
if USE_LSTM: model.add( Bidirectional(LSTM(2048, input_shape=(54, 2048), return_sequences=True), merge_mode='concat') ) # 54 timesteps, 2048 feature length per timestep model.add( SeqSelfAttention(attention_width=15, attention_activation=ACTIVATION_FUNC) ) # attention width 15 * 200 ms = 3 seconds else: model.add(Dense(2048, input_dim=2048)) model.add(Activation(ACTIVATION_FUNC)) model.add(Dropout(DROPOUT_RATE)) model.add(Dense(2048, kernel_constraint=MaxNorm(3))) model.add(Activation(ACTIVATION_FUNC)) model.add(Dropout(DROPOUT_RATE)) model.add(Dense(2048, kernel_constraint=MaxNorm(3))) model.add(Activation(ACTIVATION_FUNC)) model.add(Dropout(DROPOUT_RATE)) model.add(Dense(1024, kernel_constraint=MaxNorm(3))) model.add(Activation(ACTIVATION_FUNC)) model.add(Dropout(DROPOUT_RATE)) model.add(Dense(512, kernel_constraint=MaxNorm(3))) model.add(Activation(ACTIVATION_FUNC)) model.add(Dropout(DROPOUT_RATE)) model.add(Dense(256, kernel_constraint=MaxNorm(3))) elif FEATURE_TYPE == 'feature-streams': if USE_LSTM: model.add(
def Resnet(input_shape, norm_max=1.0): """ Generate a fully convolutional neural network (FCN) model. Parameters ---------- input_shape : tuple defining shape of the input dataset: (num_timesteps, num_channels) norm_max : maximum norm for constraint Returns ------- model : Keras model """ inputs = Input(shape=input_shape) # Stage 1 x = Conv1D(filters=32, kernel_size=7, strides=2, padding='valid', use_bias=False, kernel_constraint=MaxNorm(norm_max, axis=[0, 1, 2]), name='conv1', kernel_initializer=glorot_uniform(seed=0))(inputs) x = LeakyReLU(alpha=0.1)(x) x = BatchNormalization(axis=-1, momentum=0.9, name='bn_conv1', gamma_constraint=MaxNorm(norm_max, axis=0), beta_constraint=MaxNorm(norm_max, axis=0))(x) # Stage 2 x = conv_block(x, ksz=3, filters=[16, 16, 32], stage=2, block='a', s=2, norm_max=norm_max) x = identity_block(x, ksz=3, filters=[16, 16, 32], stage=2, block='b', norm_max=norm_max) x = identity_block(x, ksz=3, filters=[16, 16, 32], stage=2, block='c', norm_max=norm_max) # # Stage 3 # x = conv_block(x, ksz=3, filters=[32,32,64], stage=3, block='a', s=2, norm_max=norm_max) # x = identity_block(x, ksz=3, filters=[32,32,64], stage=3, block='b', norm_max=norm_max) # x = identity_block(x, ksz=3, filters=[32,32,64], stage=3, block='c', norm_max=norm_max) # x = identity_block(x, ksz=3, filters=[32,32,64], stage=3, block='d', norm_max=norm_max) # # # Stage 4 # x = conv_block(x, ksz=3, filters=[128,128,256], stage=4, block='a', s=2) # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='b') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='c') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='d') # x = identity_block(x, ksz=3, filters=[128,128,256], stage=4, block='e') # # Stage 5 # x = conv_block(x, ksz=3, filters=[256,256,512], stage=5, block='a', s=2) # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='b') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='c') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='d') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='e') # x = identity_block(x, ksz=3, filters=[256,256,512], stage=5, block='f') # Output stage x = MaxPooling1D(pool_size=2)(x) outputs = Flatten(name='last_layer')(x) model = Model(inputs=inputs, outputs=outputs) return model
def flor(input_size, output_size, learning_rate=5e-4): """Gated Convolucional Recurrent Neural Network by Flor.""" input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(2, 2), padding="same")(input_data) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 2), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = MaxPooling2D(pool_size=(1, 2), strides=(1, 2), padding="valid")(cnn) shape = cnn.get_shape() blstm = Reshape((shape[1], shape[2] * shape[3]))(cnn) blstm = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.5))(blstm) blstm = Dense(units=128)(blstm) blstm = Bidirectional(LSTM(units=128, return_sequences=True, dropout=0.5))(blstm) blstm = Dense(units=output_size)(blstm) output_data = Activation(activation="softmax")(blstm) optimizer = RMSprop(learning_rate=learning_rate) return (input_data, output_data, optimizer)
import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, InputLayer, Dropout, Flatten, Reshape, BatchNormalization, Conv2D, MaxPooling2D, AveragePooling2D from tensorflow.keras.optimizers import Adam from tensorflow.keras.constraints import MaxNorm # model architecture model = Sequential() model.add(InputLayer(input_shape=(X_train.shape[1], ), name='x_input')) model.add(Reshape((int(X_train.shape[1] / 13), 13, 1), input_shape=(X_train.shape[1], ))) model.add(Conv2D(10, kernel_size=5, activation='relu', padding='same', kernel_constraint=MaxNorm(3))) model.add(AveragePooling2D(pool_size=2, padding='same')) model.add(Conv2D(5, kernel_size=5, activation='relu', padding='same', kernel_constraint=MaxNorm(3))) model.add(AveragePooling2D(pool_size=2, padding='same')) model.add(Flatten()) model.add(Dense(classes, activation='softmax', name='y_pred', kernel_constraint=MaxNorm(3))) # this controls the learning rate opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999) # train the neural network model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=32, epochs=9, validation_data=(X_test, Y_test), verbose=2)
def train_neural_network(self): from tensorflow.keras.preprocessing.image import ImageDataGenerator self.train_generator = ImageDataGenerator( rescale=0.02, shear_range=0.01, zoom_range=0.02, horizontal_flip=False).flow_from_directory( directory=os.path.join(SCRAPER_DIR, TRAIN_FOLDER), target_size=(img_height, img_width), batch_size=128, class_mode='binary', color_mode='rgb') self.validation_generator = ImageDataGenerator( rescale=0.01, shear_range=0.05, zoom_range=0.05, horizontal_flip=False).flow_from_directory( directory=os.path.join(SCRAPER_DIR, VALIDATE_FOLDER), target_size=(img_height, img_width), batch_size=128, class_mode='binary', color_mode='rgb') num_classes = 52 input_shape = (50, 15, 3) epochs = 20 from tensorflow.keras.callbacks import TensorBoard from tensorflow.keras.constraints import MaxNorm from tensorflow.keras.layers import Conv2D, MaxPooling2D from tensorflow.keras.layers import Dropout, Flatten, Dense from tensorflow.keras.models import Sequential model = Sequential() model.add( Conv2D(64, (3, 3), input_shape=input_shape, activation='relu', padding='same')) model.add(Dropout(0.2)) model.add(Conv2D(64, (2, 2), activation='relu', padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) model.add(Dropout(0.2)) model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(256, (3, 3), activation='relu', padding='same')) model.add(Dropout(0.2)) model.add(Conv2D(256, (3, 3), activation='relu', padding='same')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(2048, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.2)) model.add(Dense(1024, activation='relu', kernel_constraint=MaxNorm(3))) model.add(Dropout(0.2)) model.add(Dense(num_classes, activation='softmax')) from tensorflow.keras.losses import sparse_categorical_crossentropy from tensorflow.keras import optimizers model.compile(loss=sparse_categorical_crossentropy, optimizer=optimizers.Adam(), metrics=['accuracy']) log.info(model.summary()) from tensorflow.keras.callbacks import EarlyStopping early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=1, verbose=1, mode='auto') tb = TensorBoard(log_dir='c:/tensorboard/pb', histogram_freq=1, write_graph=True, write_images=True, embeddings_freq=1, embeddings_layer_names=False, embeddings_metadata=False) model.fit(self.train_generator, epochs=epochs, verbose=1, validation_data=self.validation_generator, callbacks=[early_stop]) self.model = model score = model.evaluate(self.validation_generator, steps=52) print('Validation loss:', score[0]) print('Validation accuracy:', score[1])
x_input = Input(batch_shape=(None, x_train.shape[1])) e_layer = Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM)(x_input) e_layer = Dropout(rate=0.5)(e_layer) for ks in [3, 4, 5]: r_layer = Conv1D(filters=HIDDEN_DIM, kernel_size=ks, padding='valid', activation='relu')(e_layer) max_pool = GlobalMaxPooling1D()(r_layer) flatten = Flatten()(max_pool) conv_feature_maps.append(flatten) r_layer = Concatenate()(conv_feature_maps) r_layer = Dropout(rate=0.5)(r_layer) y_output = Dense(250, activation='relu', kernel_constraint=MaxNorm(max_value=3.))(r_layer) y_output = Dense(1, activation='sigmoid', kernel_constraint=MaxNorm(max_value=3.))(r_layer) model = Model(x_input, y_output) model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001)) model.summary() # 학습 hist = model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=256, epochs=30)
def _build_model(self): """ Builds a model based on the architecture defined in the configs The input received is already padded from the data processing module for variable sequence length. Making is used to keep track of padded elements in the tensor. Keras layers such as Cropping1D and Concatenate do not use masking, hence custom layer RemoveMask is used to strip masking information from the outputs for such layers. Architecture Logic for Multi Step Forecast -> Append the output of previous forecast step to the next one 1. Concatenate last time step aux features with outputs as outputs only contain financial fields 2. Concatenate the above output to the inputs and strip the first element in the sequence to keep the input shape consistent 3. Repeat 1,2 for subsequent outputs :return: compiled keras model which outputs ((output_1, mask_1), (output_2, mask_2), ...) where _1 refers to the forecast step. For example _1 : 12 month forecast, _2 : 24 month forecast and so on """ outputs = [] # Masking information is only used by certain layers such as LSTM. Hence two copies of inputs are used, one for # propagating the mask and second for storing inputs which are used in operations such as Cropping1D and # concatenate. inputs = x = keras.Input(shape=(self.seq_len * self.n_inputs), name='input_financials') prev_input = inputs # last_time_step_aux = self.get_last_time_step_aux(x) hidden_layer_count = 0 output_count = 0 initializer = self.initializer.get_initializer() # TODO: make activation generic so as to use 'relu', 'leakyrelu', 'tanh' from config for i in range(self.n_layers): hidden_layer_count += 1 x = layers.Dense(self.n_hidden_units, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), kernel_constraint=MaxNorm(self.config.max_norm), use_bias=False, name='dense_%i' % hidden_layer_count)(x) x = layers.BatchNormalization()(x) x = layers.ReLU()(x) x = layers.Dropout(rate=self.config.dropout)( x, training=self.config.train) output_count += 1 cur_output = layers.Dense(self.n_outputs, name='OUTPUT_%i' % output_count)(x) outputs.append(cur_output) for fcst_step in range(1, self.forecast_steps): print("Multi-step forecast not implemented for MLP") raise NotImplementedError # # output_count, lstm_count keep track of layer ids. output_count and fcst_step are not the same as one # # fcst_step could have multiple outputs. # output_count += 1 # cur_output = outputs[-1] # last_time_step_fin = self.get_last_time_step(cur_output, output_count) # # Combine latest prediction with last available aux features to make the input shape compatible # last_time_step = layers.concatenate([last_time_step_fin, last_time_step_aux], axis=2, # name='concat_fin_aux_%i' % fcst_step) # # combine latest prediction with input sequence # cur_input = layers.concatenate([prev_input, last_time_step], axis=1, # name='combine_input_w_last_pred_%i' % fcst_step) # cur_input = layers.Cropping1D(cropping=(1, 0), name='updated_input_w_last_pred_%i' % fcst_step)(cur_input) # prev_input = cur_input # # Add layer for intermediary prediction # hidden_layer_count += 1 # dense_intm = layers.Dense(self.n_hidden_units, # kernel_initializer=initializer, # kernel_regularizer = tf.keras.regularizers.l2(self.config.l2_alpha), # use_bias=False, # name='dense_%i' % hidden_layer_count)(cur_input) # dense_intm = layers.BatchNormalization()(dense_intm) # dense_intm = layers.ReLU()(dense_intm) # dense_intm = layers.Dropout(rate=self.config.dropout)(dense_intm, training=self.config.train) # outputs.append(layers.Dense(self.n_outputs, name='OUTPUT_%i' % output_count)(dense_intm)) model = keras.Model(inputs=inputs, outputs=outputs) return model
def _build_model(self): """ Builds a rnn uq range estimate model based on the architecture defined in the configs The input received is already padded from the data processing module for variable sequence length. Making is used to keep track of padded elements in the tensor. Keras layers such as Cropping1D and Concatenate do not use masking, hence custom layer RemoveMask is used to strip masking information from the outputs for such layers. Architecture Logic for Multi Step Forecast -> Append the output of previous forecast step to the next one 1. Concatenate last time step aux features with outputs as outputs only contain financial fields 2. Concatenate the above output to the inputs and strip the first element in the sequence to keep the input shape consistent 3. Repeat 1,2 for subsequent outputs :return: compiled keras model which outputs (output_1, output_2, ...) where _1 refers to the forecast step. For example _1 : 12 month forecast, _2 : 24 month forecast and so on """ outputs = [] # Masking information is only used by certain layers such as LSTM. Hence two copies of inputs are used, one for # propagating the mask and second for storing inputs which are used in operations such as Cropping1D and # concatenate. inputs = x = keras.Input(shape=(self.seq_len, self.n_inputs), name='input_financials') prev_input = inputs last_time_step_aux = self.get_last_time_step_aux(x) lstm_count = 0 output_count = 0 initializer = self.initializer.get_initializer() for i in range(self.n_layers): lstm_count += 1 if self.config.rnn_cell == 'lstm': x = layers.LSTM( self.n_hidden_units, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), return_sequences=True, kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='lstm_%i' % lstm_count)(x, training=True) x = layers.BatchNormalization()(x) x = layers.Dropout(rate=self.config.dropout)(x, training=True) elif self.config.rnn_cell == 'gru': x = layers.GRU(self.n_hidden_units, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), return_sequences=True, kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='gru_%i' % lstm_count)(x, training=True) x = layers.BatchNormalization()(x) x = layers.Dropout(rate=self.config.dropout)(x, training=True) else: raise NotImplementedError output_count += 1 # outputs for target values cur_output_tar = layers.Dense(self.n_outputs, name='OUTPUT_TARGET_%i' % output_count)(x) # outputs for variances of the target values cur_output_var = layers.Dense(self.n_outputs, name='OUTPUT_VARIANCE_%i' % output_count)(x) cur_output_var = SoftPlus()(cur_output_var) outputs.append(cur_output_tar) outputs.append(cur_output_var) for fcst_step in range(1, self.forecast_steps): # output_count, lstm_count keep track of layer ids. output_count and fcst_step are not the same as one # fcst_step could have multiple outputs. output_count += 1 cur_output = outputs[-2] # last target output last_time_step_fin = self.get_last_time_step( cur_output, output_count) # Combine latest prediction with last available aux features to make the input shape compatible last_time_step = layers.concatenate( [last_time_step_fin, last_time_step_aux], axis=2, name='concat_fin_aux_%i' % fcst_step) # combine latest prediction with input sequence cur_input = layers.concatenate( [prev_input, last_time_step], axis=1, name='combine_input_w_last_pred_%i' % fcst_step) cur_input = layers.Cropping1D(cropping=(1, 0), name='updated_input_w_last_pred_%i' % fcst_step)(cur_input) prev_input = cur_input # Add LSTM layer for intermediary prediction lstm_count += 1 if self.config.rnn_cell == 'lstm': intm = layers.LSTM( self.n_hidden_units, return_sequences=True, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='lstm_%i' % lstm_count)(cur_input, training=True) intm = layers.BatchNormalization()(intm) intm = layers.Dropout(rate=self.config.dropout)(intm, training=True) elif self.config.rnn_cell == 'gru': intm = layers.GRU( self.n_hidden_units, return_sequences=True, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='gru_%i' % lstm_count)(cur_input, training=True) intm = layers.BatchNormalization()(intm) intm = layers.Dropout(rate=self.config.dropout)(intm, training=True) else: raise NotImplementedError outputs.append( layers.Dense(self.n_outputs, name='OUTPUT_TARGET_%i' % output_count)(intm)) intm_var = layers.Dense(self.n_outputs, name='OUTPUT_VARIANCE_%i' % output_count)(intm) outputs.append(SoftPlus()(intm_var)) model = keras.Model(inputs=inputs, outputs=outputs) return model
dropout = 0.3 neurons = 500 red_rate = 0.3 constraint = 4 mom = 0.9 learning_rate = 0.01 seed = 1 set_seed(seed) model = Sequential() model.add( Dense(reduce_width(neurons, red_rate, 0), input_shape=(input_dim, ), kernel_constraint=MaxNorm(constraint))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add( Dense(reduce_width(neurons, red_rate, 1), kernel_constraint=MaxNorm(constraint))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add( Dense(reduce_width(neurons, red_rate, 2), kernel_constraint=MaxNorm(constraint))) model.add(BatchNormalization())
def flor(input_size, d_model, learning_rate): """ Gated Convolucional Recurrent Neural Network by Flor et al. """ input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3,3), strides=(2,2), padding="same", kernel_initializer="he_uniform")(input_data) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3,3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3,3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2,4), strides=(2,4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2,4), strides=(2,4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3,3), padding="same", kernel_constraint=MaxNorm(4, [0,1,2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1,2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = MaxPooling2D(pool_size=(1,2), strides=(1,2), padding="valid")(cnn) shape = cnn.get_shape() nb_units = shape[2] * shape[3] bgru = Reshape((shape[1], nb_units))(cnn) bgru = Bidirectional(GRU(units=nb_units, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=nb_units * 2)(bgru) bgru = Bidirectional(GRU(units=nb_units, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) if learning_rate is None: learning_rate = 5e-4 optimizer = RMSprop(learning_rate=learning_rate) return (input_data, output_data, optimizer)
def architecture(self, input_size, d_model): input_data = Input(name="input", shape=input_size) cnn = Reshape((input_size[0] // 2, input_size[1] // 2, input_size[2] * 4))(input_data) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 2), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=16, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=32, kernel_size=(3, 3), padding="same")(cnn) cnn = Conv2D(filters=40, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=40, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=48, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=56, kernel_size=(2, 4), strides=(2, 4), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) cnn = FullGatedConv2D(filters=56, kernel_size=(3, 3), padding="same", kernel_constraint=MaxNorm(4, [0, 1, 2]))(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform")(cnn) cnn = PReLU(shared_axes=[1, 2])(cnn) cnn = BatchNormalization(renorm=True)(cnn) shape = cnn.get_shape() bgru = Reshape((shape[1], shape[2] * shape[3]))(cnn) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) bgru = Dense(units=256)(bgru) bgru = Bidirectional(GRU(units=128, return_sequences=True, dropout=0.5))(bgru) output_data = Dense(units=d_model, activation="softmax")(bgru) return (input_data, output_data)