def LSTM_critic(useMask=True, hiddenDim=100, LSTMactiv='tanh', depth=1, useLSTMfc=True, LSTMfcDim=16, LSTMfcActiv='relu', oneHotWordDim=wordsVocabSize, useOneHotWordFc=False, oneHotWordFcDim=16, oneHotWordFcActiv='relu', outputHDim=64, outputActiv='relu', lr=5e-4): # Manual seeds os.environ['PYTHONHASHSEED'] = '0' # Necessary for python3 np.random.seed(29) rn.seed(29) tf.set_random_seed(29) # Input vidInput = Input(shape=( framesPerWord, nOfMouthPixels, )) # Mask if useMask: LSTMinput = Masking(mask_value=0.)(vidInput) else: LSTMinput = vidInput # (Deep) LSTM # If depth > 1 if depth > 1: # First layer encoded = LSTM(hiddenDim, activation=LSTMactiv, return_sequences=True)(LSTMinput) for d in range(depth - 2): encoded = LSTM(hiddenDim, activation=LSTMactiv, return_sequences=True)(encoded) # Last layer encoded = LSTM(hiddenDim, activation=LSTMactiv)(encoded) # If depth = 1 else: encoded = LSTM(hiddenDim, activation=LSTMactiv)(LSTMinput) # LSTM Fc if useLSTMfc: vidFeatures = Dense(LSTMfcDim, activation=LSTMfcActiv)(encoded) else: vidFeatures = encoded # Predicted Word input oneHotWordInput = Input(shape=(oneHotWordDim, )) # OHWfc if useOneHotWordFc: oneHotWordFeatures = Dense( oneHotWordFcDim, activation=oneHotWordFcActiv)(oneHotWordInput) else: oneHotWordFeatures = oneHotWordInput # Full feature fullFeature = concatenate([vidFeatures, oneHotWordInput]) # Output y = Dense(outputHDim, activation=outputActiv)(fullFeature) myOutput = Dense(1, activation='sigmoid')(y) # Model criticModel = Model(inputs=[vidInput, oneHotWordInput], outputs=myOutput) # lr = 5e-4 adam = Adam(lr=lr) criticModel.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy']) criticModel.summary() fileNamePre ='LSTMCritic-revSeq-Mask-LSTMh' + str(hiddenDim) \ + '-LSTMactiv' + str(LSTMactiv) + '-depth' + str(depth) if useLSTMfc: fileNamePre += '-LSTMfc' + str(LSTMfcDim) fileNamePre += '-OHWord' + str(oneHotWordDim) if useOneHotWordFc: fileNamePre += '-OHWordFc' + str(oneHotWordFcDim) fileNamePre += '-out' + str(outputHDim) \ + '-Adam-%1.e' % lr print(fileNamePre) return criticModel, fileNamePre
project = 'manual-ant-1.8.2' x_train, y_train = preprocess.get_xy_train(TRAIN_SET_DIR + '/train', tokenizer=tokenizer, mn_maxlen=MAX_SEQUENCE_LENGTH, embedding_matrix=embedding_matrix) #x_test, y_test = preprocess.get_xy_test(test_path,project,tokenizer=tokenizer, maxlen=MAX_SEQUENCE_LENGTH,embedding_matrix=embedding_matrix) print('Training model.') # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed method_a = Input(shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM), name='method_a') metric_a = Input(shape=(12, ), name='metric_a') #embedding_layer = Embedding(len(all_word_index) + 1,EMBEDDING_DIM,input_length=MAX_SEQUENCE_LENGTH,weights=[embedding_matrix],trainable=False) masking_layer = Masking(mask_value=0, input_shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM)) lstm_share = LSTM(output_dim=2, activation='sigmoid', init='uniform') #dropout_share = GaussianNoise(0.2) #dense_share = Dense(8,activation='tanh',init='uniform') #dense_share3 = Dense(64,activation='tanh',init='uniform') #bn_share = BatchNormalization(epsilon=0.001, mode=0, axis=-1, momentum=0.9, weights=None, beta_init='zero', gamma_init='one') embedding_a = masking_layer(method_a) lstm_a = lstm_share(embedding_a) #dropout_a = dropout_share(lstm_a) #bn_a = bn_share(lstm_a) #encoded_a = dense_share(dropout_a) #decoded_a = dense_share3(encoded_a) #merged_vector = keras.layers.dot([encoded_a,encoded_b],normalize=True,axes=-1) #dense_vector = Dense(16,activation='tanh')(merged_vector)
def __init__(self, dim, batch_norm, dropout, rec_dropout, header, task, mode, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=76, size_coef=4, **kwargs): self.dim = dim self.batch_norm = batch_norm self.dropout = dropout self.rec_dropout = rec_dropout self.depth = depth self.size_coef = size_coef if task in ['decomp', 'ihm', 'ph']: final_activation = 'sigmoid' elif task in ['los']: if num_classes == 1: final_activation = 'relu' else: final_activation = 'softmax' else: return ValueError("Wrong value for task") print "==> not used params in network class:", kwargs.keys() # Parse channels channel_names = set() for ch in header: if ch.find("mask->") != -1: continue pos = ch.find("->") if pos != -1: channel_names.add(ch[:pos]) else: channel_names.add(ch) channel_names = sorted(list(channel_names)) print "==> found {} channels: {}".format(len(channel_names), channel_names) channels = [] # each channel is a list of columns for ch in channel_names: indices = range(len(header)) indices = filter(lambda i: header[i].find(ch) != -1, indices) channels.append(indices) # Input layers and masking X = Input(shape=(None, input_dim), name='X') inputs = [X] mX = Masking()(X) if deep_supervision and mode == 'train': M = Input(shape=(None, ), name='M') inputs.append(M) # Configurations is_bidirectional = True if deep_supervision: is_bidirectional = False # Preprocess each channel cX = [] for ch in channels: cX.append(Slice(ch)(mX)) pX = [] # LSTM processed version of cX for x in cX: p = x for i in range(depth): num_units = dim if is_bidirectional: num_units = num_units // 2 lstm = LSTM(units=num_units, activation='tanh', return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout) if is_bidirectional: p = Bidirectional(lstm)(p) else: p = lstm(p) pX.append(p) # Concatenate processed channels Z = Concatenate(axis=2)(pX) # Main part of the network for i in range(depth - 1): num_units = int(size_coef * dim) if is_bidirectional: num_units = num_units // 2 lstm = LSTM(units=num_units, activation='tanh', return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout) if is_bidirectional: Z = Bidirectional(lstm)(Z) else: Z = lstm(Z) # Output module of the network return_sequences = (target_repl or deep_supervision) return_sequences = return_sequences and (mode == 'train') L = LSTM(units=int(size_coef * dim), activation='tanh', return_sequences=return_sequences, dropout=dropout, recurrent_dropout=rec_dropout)(Z) if dropout > 0: L = Dropout(dropout)(L) if target_repl: y = TimeDistributed(Dense(num_classes, activation=final_activation), name='seq')(L) y_last = LastTimestep(name='single')(y) outputs = [y_last, y] elif deep_supervision and mode == 'train': y = TimeDistributed(Dense(num_classes, activation=final_activation))(L) y = ExtendMask()([y, M]) # this way we extend mask of y to M outputs = [y] else: y = Dense(num_classes, activation=final_activation)(L) outputs = [y] return super(Network, self).__init__(inputs=inputs, outputs=outputs)
from keras.layers.wrappers import Bidirectional from attention_decoder import AttentionDecoder import processing TOKEN = "553617004:AAGFq_FMPlojaJcdn4dzrWgUmfnUU3gOyTs" URL = "https://api.telegram.org/bot{}/".format(TOKEN) chats = [] #define model: MAX_LENGTH = 150 NR_WORDPIECE = 512 LATENT_DIM = 256 model = Sequential() model.add(Masking(mask_value=0., input_shape=(MAX_LENGTH, NR_WORDPIECE))) model.add(Bidirectional(LSTM(LATENT_DIM, return_sequences=True))) model.add(AttentionDecoder(LATENT_DIM * 2, NR_WORDPIECE)) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) # Check for weights, and if not found return error if os.path.isfile('../res/model.h5'): model.load_weights('../res/model.h5') print("Modelweights loaded") else: raise FileNotFoundError("Could not find model weights!!!") def get_url(url):
def buildKerasModel(self, use_sourcelang=False, use_image=True): ''' Define the exact structure of your model here. We create an image description generation model by merging the VGG image features with a word embedding model, with an LSTM over the sequences. ''' logger.info('Building Keras model...') text_input = Input(shape=(self.max_t, self.vocab_size), name='text') text_mask = Masking(mask_value=0., name='text_mask')(text_input) # Word embeddings wemb = TimeDistributed(Dense(output_dim=self.embed_size, input_dim=self.vocab_size, W_regularizer=l2(self.l2reg)), name="w_embed")(text_mask) drop_wemb = Dropout(self.dropin, name="wemb_drop")(wemb) # Embed -> Hidden emb_to_hidden = TimeDistributed(Dense(output_dim=self.hidden_size, input_dim=self.vocab_size, W_regularizer=l2(self.l2reg)), name='wemb_to_hidden')(drop_wemb) if use_image: # Image 'embedding' logger.info('Using image features: %s', use_image) img_input = Input(shape=(self.max_t, 4096), name='img') img_emb = TimeDistributed(Dense(output_dim=self.hidden_size, input_dim=4096, W_regularizer=l2(self.l2reg)), name='img_emb')(img_input) img_drop = Dropout(self.dropin, name='img_embed_drop')(img_emb) if use_sourcelang: logger.info('Using source features: %s', use_sourcelang) logger.info('Size of source feature vectors: %d', self.hsn_size) src_input = Input(shape=(self.max_t, self.hsn_size), name='src') src_relu = Activation('relu', name='src_relu')(src_input) src_embed = TimeDistributed(Dense(output_dim=self.hidden_size, input_dim=self.hsn_size, W_regularizer=l2(self.l2reg)), name="src_embed")(src_relu) src_drop = Dropout(self.dropin, name="src_drop")(src_embed) # Input nodes for the recurrent layer rnn_input_dim = self.hidden_size if use_image and use_sourcelang: recurrent_inputs = [emb_to_hidden, img_drop, src_drop] recurrent_inputs_names = ['emb_to_hidden', 'img_drop', 'src_drop'] inputs = [text_input, img_input, src_input] elif use_image: recurrent_inputs = [emb_to_hidden, img_drop] recurrent_inputs_names = ['emb_to_hidden', 'img_drop'] inputs = [text_input, img_input] elif use_sourcelang: recurrent_inputs = [emb_to_hidden, src_drop] recurrent_inputs_names = ['emb_to_hidden', 'src_drop'] inputs = [text_input, src_input] merged_input = Merge(mode='sum')(recurrent_inputs) # Recurrent layer if self.gru: logger.info("Building a GRU with recurrent inputs %s", recurrent_inputs_names) rnn = GRU(output_dim=self.hidden_size, input_dim=rnn_input_dim, return_sequences=True, W_regularizer=l2(self.l2reg), U_regularizer=l2(self.l2reg), name='rnn')(merged_input) else: logger.info("Building an LSTM with recurrent inputs %s", recurrent_inputs_names) rnn = LSTM(output_dim=self.hidden_size, input_dim=rnn_input_dim, return_sequences=True, W_regularizer=l2(self.l2reg), U_regularizer=l2(self.l2reg), name='rnn')(merged_input) output = TimeDistributed(Dense(output_dim=self.vocab_size, input_dim=self.hidden_size, W_regularizer=l2(self.l2reg), activation='softmax'), name='output')(rnn) if self.optimiser == 'adam': # allow user-defined hyper-parameters for ADAM because it is # our preferred optimiser optimiser = Adam(lr=self.lr, beta_1=self.beta1, beta_2=self.beta2, epsilon=self.epsilon, clipnorm=self.clipnorm) model = Model(input=inputs, output=output) model.compile(optimiser, {'output': 'categorical_crossentropy'}) else: model.compile(self.optimiser, {'output': 'categorical_crossentropy'}) if self.weights is not None: logger.info("... with weights defined in %s", self.weights) # Initialise the weights of the model shutil.copyfile("%s/weights.hdf5" % self.weights, "%s/weights.hdf5.bak" % self.weights) model.load_weights("%s/weights.hdf5" % self.weights) #plot(model, to_file="model.png") return model
def LSTM(self, features, labels, speaker_id, model_type): """ This block will make LSTM cells and produce output """ logo = LeaveOneGroupOut() #code for LSTM model LSTM_predict_probability = {} #saves the softmax output LSTM_test_GT = {} #saves the test set GTs LSTM__modelpred = {} #saves the final outputs LSTM_con = {} #saves the confusion matrix per speaker LSTM_UWR = {} #saves the unweighted recall per speaker speaker = 0 for train, test in logo.split(features, labels, speaker_id): label_train = np_utils.to_categorical(labels[train]) # Set callback functions to early stop training callbacks = [EarlyStopping(monitor='val_loss', patience=10)] model = Sequential() model.add( Masking(mask_value=0., input_shape=(features.shape[1], features.shape[2]))) if model_type == 'unidirectional': model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Dropout(0.5)) model.add(Bidirectional(LSTM(128))) elif model_type == 'bidirectional': model.add(LSTM(128, return_sequences=True)) model.add(Dropout(0.5)) model.add(LSTM(128)) else: print('error in model type') break model.add(Dropout(0.5)) model.add(Dense(256, activation='relu')) model.add(Dropout(0.5)) ## model.add(Dense(4, activation='softmax')) adam = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) model.fit(features[train], label_train, epochs=50, validation_split=0.20, callbacks=callbacks, batch_size=128, verbose=1) X_pred = model.predict(features[test, :]) LSTM_predict_probability[speaker] = X_pred LSTM_test_GT[speaker] = labels[test] Y_pred = np.argmax(X_pred, axis=1) LSTM_con[speaker] = confusion_matrix(labels[test], Y_pred) LSTM_UWR[speaker] = recall_score(labels[test], Y_pred, average='macro') LSTM__modelpred[speaker] = Y_pred speaker += 1 return LSTM_test_GT, LSTM__modelpred, LSTM_predict_probability
def _build_network(self, vocab_size, maxlen, emb_weights=[], hidden_units=256, trainable=False): print('Build model...') model = Sequential() model.add(Masking(mask_value=0, input_shape=(maxlen, ))) if (len(emb_weights) == 0): model.add( Embedding(vocab_size, 20, input_length=maxlen, embeddings_initializer='he_normal', trainable=trainable, mask_zero=True)) else: model.add( Embedding(vocab_size, emb_weights.shape[1], input_length=maxlen, weights=[emb_weights], trainable=trainable)) model.add(Reshape((model.output_shape[1], model.output_shape[2], 1))) model.add( Convolution2D(int(hidden_units / 8), (5, 1), kernel_initializer='he_normal', padding='valid', activation='relu')) model.add(MaxPooling2D((2, 1))) model.add(Dropout(0.5)) model.add( Convolution2D(int(hidden_units / 4), (3, 1), kernel_initializer='he_normal', padding='valid', activation='relu')) model.add(MaxPooling2D((2, 1))) model.add(Dropout(0.5)) model.add( Dense(int(hidden_units / 2), kernel_initializer='he_normal', activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2, activation='softmax')) adam = Adam(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
def make_model(input_dim, embed_dim, char_hidden_dim, word_hidden_dim, output_dim, max_chars, max_len): """Creates a 2 level BiLSTM based model Arguments: input_dim {int} -- Size of the input vocabulary + 1 (adjusted for 0) i.e number of unqique characters embed_dim {int} -- Size of character embeddings char_hidden_dim {int} -- Size of char-level LSTM hidden state word_hidden_dim {int} -- Size of word-level LSTM hidden state output_dim {int} -- Number of output classes (4: other, en, es, padding) max_chars {int} -- Max chars in a token max_len {int} -- Max tokens in a tweet Returns: keras.models.Model -- Instance of a Keras Model """ dropout_prob = 0. r1, r2 = None, None # r1=regularizers.l2(0.001) # r2=regularizers.l2(0.001) inputs = Input(shape=(max_chars * max_len, )) word_mask = Input(shape=(max_len, 2 * char_hidden_dim)) ''' Add character embeddings. Keep mask_zero = True. This is need because we have added padding at char level and want to ignore this padding for furture processing. ''' embeddings = Embedding(input_dim, embed_dim, embeddings_initializer='uniform', mask_zero=True) embed = embeddings(inputs) ''' Reshape to a rank 3 tensor of (batch_size, max_chars, embed_dim) This tensor will hold the representation for each char in each word in each tweet (plus padding as required) ''' reshape_layer_1 = Lambda( lambda x: K.reshape(x, shape=[-1, max_chars, embed_dim])) reshaped = reshape_layer_1(embed) # Apply char-level LSTM char_lstm = Bidirectional(LSTM(units=char_hidden_dim, kernel_regularizer=r1), merge_mode='concat') char_hidden = char_lstm(reshaped) ''' Reshape to a rank 3 tensor of (batch_size, max_len, 2 * char_hidden_dim]) This tensor will essentially capture the representation for each word in each tweet (plus padding as required) ''' reshape_layer_2 = Lambda( lambda x: K.reshape(x, shape=[-1, max_len, 2 * char_hidden_dim])) reshaped_2 = reshape_layer_2(char_hidden) ''' Apply the mask at word level. This is need because we have added padding at word level and don't want to compute the loss or metrics for this padding. ''' mask_multiply = Multiply() masked = mask_multiply([reshaped_2, word_mask]) mask_layer = Masking(mask_value=0.) masked = mask_layer(masked) # Apply the word-level LSTM word_lstm = Bidirectional(LSTM(units=word_hidden_dim, kernel_regularizer=r2, return_sequences=True), merge_mode='concat') word_hidden = word_lstm(masked) dropout = Dropout(dropout_prob) dropped = dropout(word_hidden) # Project the word-level hidden representation to output space dense = Dense(output_dim, activation='softmax') output_probs = dense(dropped) # create model model = Model(inputs=[inputs, word_mask], outputs=output_probs) # print model's layerwise summary print(model.summary()) return model
def trainRNN(X_train, X_test, y_train, y_test, w_train, w_test, sequence, collection, unit_type, n_units, combinedDim, epochs, batchSize, dropout, optimizer, activation, initializer, learningRate=0.01, decay=0.0, momentum=0.0, nesterov=False, mergeModels=False, multiclass=False): print "Performing a Deep Recurrent Neural Net!" if type(sequence) == list: for seq in sequence: print 'Prepare channel for {} collection...'.format(seq['name']) SHAPE = seq['X_train'].shape[1:] seq['input'] = Input(SHAPE) seq['channel'] = Masking(mask_value=-999, name=seq['name']+'_masking')(seq['input']) if unit_type.lower() == 'lstm': seq['channel'] = LSTM(n_units, name=seq['name']+'_lstm')(seq['channel']) if unit_type.lower() == 'gru': seq['channel'] = GRU(n_units, name=seq['name']+'_gru')(seq['channel']) seq['channel'] = Dropout(dropout, name=seq['name']+'_dropout')(seq['channel']) if mergeModels: print 'Going to merge sequence model with common NN!' print 'Standardize training set...' scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) model_inputs = Input(shape=(X_train.shape[1], )) layer = Dense(n_units, activation=activation, kernel_initializer=initializer)(model_inputs) layer = BatchNormalization()(layer) layer = Dropout(dropout)(layer) if mergeModels: combined = concatenate([c['channel'] for c in sequence]+[layer]) else: if len(sequence)>1: combined = concatenate([c['channel'] for c in sequence]) else: combined = sequence[0]['channel'] #for layer in combinedDim: # combined = Dense(layer, activation = activation)(combined) # combined = Dropout(dropout)(combined) if multiclass: combined_output = Dense(len(np.bincount(y_train)), activation='softmax')(combined) loss = 'categorical_crossentropy' else: combined_outputs = Dense(1, activation='sigmoid')(combined) loss = 'binary_crossentropy' if mergeModels: combined_rnn = Model(inputs=[seq['input'] for seq in sequence]+[model_inputs], outputs=combined_outputs) else: if len(sequence)>1: combined_rnn = Model(inputs=[seq['input'] for seq in sequence], outputs=combined_outputs) else: combined_rnn = Model(inputs=sequence[0]['input'], outputs=combined_outputs) combined_rnn.summary() combined_rnn.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) print 'Training...' class_weight = compute_class_weight('balanced', np.unique(y_train), y_train) try: if mergeModels: history = combined_rnn.fit([seq['X_train'] for seq in sequence]+[X_train], y_train, class_weight=class_weight, sample_weight=w_train, epochs=epochs, batch_size=batchSize, callbacks = [EarlyStopping(verbose=True, patience=10, monitor='loss')]) #ModelCheckpoint('./models/combinedrnn_tutorial-progress', monitor='val_loss', verbose=True, save_best_only=True) else: history = combined_rnn.fit([seq['X_train'] for seq in sequence], y_train, class_weight=class_weight, sample_weight=w_train, epochs=epochs, batch_size=batchSize, callbacks = [EarlyStopping(verbose=True, patience=10, monitor='acc')]) except KeyboardInterrupt: print 'Training ended early.' print 'Testing...' if mergeModels: score = combined_rnn.evaluate([seq['X_test'] for seq in sequence]+[X_test], y_test, batch_size=batchSize) y_predicted = combined_rnn.predict([seq['X_test'] for seq in sequence]+[X_test], batch_size=batchSize) else: if len(seq)>1: score = combined_rnn.evaluate([seq['X_test'] for seq in sequence], y_test) y_predicted = combined_rnn.predict([seq['X_test'] for seq in sequence], batch_size=batchSize) else: score = combined_rnn.evaluate(sequence[0]['X_test'], y_test) y_predicted = combined_rnn.predict(sequence[0]['X_test'], batch_size=batchSize) #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[0], score[0]*100)) #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[1], score[1]*100)) print "RNN finished!" return combined_rnn, history, y_predicted
maxtracks = [5, 10, 20, 50] nepochs = [1, 5, 10, 20] for mt in maxtracks: scores = [] aucs = [] epochs = [] for ne in nepochs: filename = output_dir + 'model_ntrk%(mt)d_nepochs%(ne)d'%{'mt':mt,'ne':ne} print filename if not os.path.isfile(filename): print 'not a file' continue epochs.append(ne) model = Sequential() model.add(Masking(mask_value=0.0, input_shape=(max_len, len(vars)))) model.add(LSTM(output_dim=16, activation='sigmoid', inner_activation='hard_sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model.load_weights(filename) print 'Weights loaded' score = model.evaluate(X, y, batch_size=16) scores.append(score[1]) print 'score %f', score proba = model.predict_proba(X, batch_size=16) fpr,tpr,thres = roc_curve(y, proba) area = auc(fpr, tpr)
def test_merge(): from keras.layers import Input, merge, Merge, Masking from keras.models import Model # test modes: 'sum', 'mul', 'concat', 'ave', 'cos', 'dot'. input_shapes = [(3, 2), (3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] # test functional API for mode in ['sum', 'mul', 'concat', 'ave', 'max']: print(mode) input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) merged = merge([input_a, input_b], mode=mode) model = Model([input_a, input_b], merged) model.compile('rmsprop', 'mse') expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') # test Merge (#2460) merged = Merge(mode=mode)([input_a, input_b]) model = Model([input_a, input_b], merged) model.compile('rmsprop', 'mse') expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape # test lambda with output_shape lambda input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) merged = merge([input_a, input_b], mode=lambda tup: K.concatenate([tup[0], tup[1]]), output_shape=lambda tup: tup[0][:-1] + (tup[0][-1] + tup[1][-1], )) model = Model([input_a, input_b], merged) expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') # test function with output_shape function def fn_mode(tup): x, y = tup return K.concatenate([x, y], axis=1) def fn_output_shape(tup): s1, s2 = tup return (s1[0], s1[1] + s2[1]) + s1[2:] input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) merged = merge([input_a, input_b], mode=fn_mode, output_shape=fn_output_shape) model = Model([input_a, input_b], merged) expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') # test function with output_mask function # time dimension is required for masking input_shapes = [(4, 3, 2), (4, 3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] def fn_output_mask(tup): x_mask, y_mask = tup return K.concatenate([x_mask, y_mask]) input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) a = Masking()(input_a) b = Masking()(input_b) merged = merge([a, b], mode=fn_mode, output_shape=fn_output_shape, output_mask=fn_output_mask) model = Model([input_a, input_b], merged) expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') mask_inputs = (np.zeros(input_shapes[0][:-1]), np.ones(input_shapes[1][:-1])) expected_mask_output = np.concatenate(mask_inputs, axis=-1) mask_input_placeholders = [ K.placeholder(shape=input_shape[:-1]) for input_shape in input_shapes ] mask_output = model.layers[-1]._output_mask(mask_input_placeholders) assert np.all( K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output) # test lambda with output_mask lambda input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) a = Masking()(input_a) b = Masking()(input_b) merged = merge([a, b], mode=lambda tup: K.concatenate([tup[0], tup[1]], axis=1), output_shape=lambda tup: (tup[0][0], tup[0][1] + tup[1][1]) + tup[0][2:], output_mask=lambda tup: K.concatenate([tup[0], tup[1]])) model = Model([input_a, input_b], merged) expected_output_shape = model.get_output_shape_for(input_shapes) actual_output_shape = model.predict(inputs).shape assert expected_output_shape == actual_output_shape config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') mask_output = model.layers[-1]._output_mask(mask_input_placeholders) assert np.all( K.function(mask_input_placeholders, [mask_output])(mask_inputs)[0] == expected_mask_output) # test with arguments input_shapes = [(3, 2), (3, 2)] inputs = [np.random.random(shape) for shape in input_shapes] def fn_mode(tup, a, b): x, y = tup return x * a + y * b input_a = Input(shape=input_shapes[0][1:]) input_b = Input(shape=input_shapes[1][1:]) merged = merge([input_a, input_b], mode=fn_mode, output_shape=lambda s: s[0], arguments={ 'a': 0.7, 'b': 0.3 }) model = Model([input_a, input_b], merged) output = model.predict(inputs) config = model.get_config() model = Model.from_config(config) assert np.all(model.predict(inputs) == output)
def deeplearning(): #参数设定 BATCH_SIZE = 100 #每次训练多少句话 TIME_STEPS = 30 #一句话多少个词向量 INPUT_SIZE = 200 #每个词向量的长度 OUTPUT_SIZE = 4 #label的宽度 LR = 0.001 #样本标签one hot 化 y_train = np_utils.to_categorical(xunlian_label, num_classes=OUTPUT_SIZE) y_test = np_utils.to_categorical(ceshi_label, num_classes=OUTPUT_SIZE) y_validation = np_utils.to_categorical(yanzheng_label, num_classes=OUTPUT_SIZE) #统一词向量长度 x_train = pad_sequences(xunlian, maxlen=TIME_STEPS, padding='post', dtype='float') x_test = pad_sequences(ceshi, maxlen=TIME_STEPS, padding='post', dtype='float') x_validation = pad_sequences(yanzheng, maxlen=TIME_STEPS, padding='post', dtype='float') #模型构建 #我试了RNN,LSTM,GRU发现GRU的效果相对比较好 model = Sequential() model.add(Masking( mask_value=0, input_shape=(TIME_STEPS, INPUT_SIZE))) #这里mask_value参数去除了输入的词向量中的零向量实现了GRU的变长度输入 model.add(Bidirectional(GRU(64))) #这里可选的参数常见有32,64,128,256我试了以后发现32比较好 model.add(Dropout(0.5)) #这是对GRU门的设置,为了防止过拟合 model.add(Dense(OUTPUT_SIZE)) model.add(Activation('softmax')) adam = Adam(LR) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) #跑模型 result = model.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epoch=5, verbose=1, validation_data=(x_test, y_test)) #评估 score, acc = model.evaluate(x_validation, y_validation, batch_size=BATCH_SIZE, verbose=1) print('五分类下的准确率{}'.format(acc)) # 鉴于上述四分的时候准确率不高,我们来看一下二分的时候 validation_label = model.predict_classes(x_validation, batch_size=BATCH_SIZE) num = 0 for i in range(len(x_validation)): if validation_label[i] <= 1 and yanzheng_label[i] <= 1: num += 1 if validation_label[i] >= 2 and yanzheng_label[i] >= 2: num += 1 print('二分法下的准确率{}'.format( num / len(x_validation))) #四分类不是很准,但是对市场情绪的积极,消极判断还是不错的 model.save('./GRU/gru.h5')
word2vec.save('word2vec_words_final.model') word2vec.init_sims(replace=True) #预先归一化,使得词向量不受尺度影响 print(u'正在进行第一次训练......') ''' 用最新版本的Keras训练模型,使用GPU加速(我的是GTX 960) 其中Bidirectional函数目前要在github版本才有 ''' from keras.layers import Dense, LSTM, Lambda, TimeDistributed, Input, Masking, Bidirectional from keras.models import Model from keras.utils import np_utils from keras import regularizers # from keras.regularizers import activity_regularizer #通过L1正则项,使得输出更加稀疏 activity_l1 sequence = Input(shape=(maxlen, word_size)) mask = Masking(mask_value=0.)(sequence) blstm = Bidirectional(LSTM(64, return_sequences=True), merge_mode='sum')(mask) blstm = Bidirectional(LSTM(32, return_sequences=True), merge_mode='sum')(blstm) output = TimeDistributed( Dense(5, activation='softmax', activity_regularizer=regularizers.l1(0.01)))(blstm) model = Model(input=sequence, output=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) ''' gen_matrix实现从分词后的list来输出训练样本 gen_target实现将输出序列转换为one hot形式的目标 超过maxlen则截断,不足补0 ''' gen_matrix = lambda z: np.vstack(
def create_model_hierarchy(cls, bottom_item_list, emb_wgts_bottom_items_dict, layer_nums=3, rnn_state_size=[], bottom_emb_item_len=3, flag_embedding_trainable=1, seq_len=39, batch_size=20, mode_attention=1, drop_out_r=0., att_layer_cnt=2, bhDwellAtt=0, rnn_type="WGRU", RNN_norm="GRU", flagCid3RNNNorm=False): c_mask_value = 0. att_zero_value = -2 ^ 31 def slice(x): return x[:, -1, :] flag_concate_sku_cid = True RNN = rnn_type MODE_BHDWELLATT = True if bhDwellAtt == 1 else False ATT_NET_LAYER_CNT = att_layer_cnt bottom_item_len = len(bottom_item_list) input = [None] * bottom_item_len word_num = [None] * bottom_item_len emb_len = [None] * bottom_item_len embedding_bottom_item = [None] * bottom_item_len embed = [None] * bottom_item_len layer_nums_max = 3 rnn_embed = [None] * layer_nums_max rnn = [None] * layer_nums_max rnn_output = [None] * layer_nums_max flag_embedding_trainable = True if flag_embedding_trainable == 1 else False ##Embedding layer # Embedding sku, bh, cid3, gap, dwell: 0, 1, 2, 3, 4 for i in range(bottom_item_len): bottom_item = bottom_item_list[i] ###input input[i] = Input(batch_shape=( batch_size, seq_len, ), dtype='int32') ###Embedding # load embedding weights # emb_wgts[i] = np.loadtxt(init_wgts_file_emb[i]) word_num[i], emb_len[i] = emb_wgts_bottom_items_dict[ bottom_item].shape print word_num[i], emb_len[i] # get embedding cur_flag_embedding_trainable = flag_embedding_trainable if (i == 0): cur_flag_embedding_trainable = False embedding_bottom_item[i] = Embedding( word_num[i], emb_len[i], input_length=seq_len, trainable=cur_flag_embedding_trainable) embed[i] = embedding_bottom_item[i](input[i]) # drop_out=0.2 embedding_bottom_item[i].set_weights( [emb_wgts_bottom_items_dict[bottom_item]]) # cal mask mask_sku = np.zeros((batch_size, seq_len)) mask_cid3 = np.zeros((batch_size, seq_len)) for j in range(batch_size): sku = input[0][j, :] cid3 = input[2][j, :] for k in range(seq_len - 1): if (sku[k] == 0 or sku[k] == sku[k + 1]): mask_sku[j][k] = 1 if (sku[k] == 0 or cid3[k] == cid3[k + 1]): mask_cid3[j][k] = 1 # f mask def f_mask_sku(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_sku[j][k] == 1): x_new = T.set_subtensor(x_new[j, k, :], c_mask_value) return x_new def f_mask_cid3(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_cid3[j][k] == 1): x_new = T.set_subtensor(x_new[j, k, :], c_mask_value) return x_new def f_mask_att_sku(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_sku[j][k] == 1): x_new = T.set_subtensor(x_new[j, k], att_zero_value) return x_new def f_mask_att_cid3(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_cid3[j][k] == 1): x_new = T.set_subtensor(x_new[j, k], att_zero_value) return x_new def K_dot(arr): axes = [1, 1] x, y = arr[0], arr[1] return K.batch_dot(x, y, axes=axes) def K_squeeze(x): return K.squeeze(x, axis=-1) Lambda_sequeeze = Lambda(lambda x: K_squeeze(x)) ##RNN layer if (RNN == "BLSTM"): rnn[0] = BLSTM(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=emb_len[4], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "BLSTM2"): rnn[0] = BLSTM2(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM2(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=emb_len[4], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM2(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "TimeLSTM"): rnn[0] = BLSTM(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "WGRU"): rnn[0] = WGRU(rnn_state_size[0], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = WGRU(rnn_state_size[1], weight_dim=emb_len[3], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = WGRU(rnn_state_size[2], weight_dim=emb_len[3], tateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "LSTM" or RNN == "GRU"): RNN = LSTM if RNN == "LSTM" else GRU rnn[0] = RNN(rnn_state_size[0], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = RNN(rnn_state_size[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") else: print "%s is not valid RNN!" % RNN if (RNN_norm == "LSTM"): rnn_cid3 = LSTM else: rnn_cid3 = GRU if (flagCid3RNNNorm): rnn[2] = rnn_cid3(rnn_state_size[2], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") #rnn embed 0 if (bottom_emb_item_len == 5): rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3], embed[4]]) elif (bottom_emb_item_len == 4): rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3]]) elif (bottom_emb_item_len == 3): rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[1], embed[2]]) elif (bottom_emb_item_len == 1): rnn_embed[0] = embed[0] elif (bottom_emb_item_len == 2): rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[2]]) else: rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3]]) #add interval, wei if (RNN == "WGRU"): rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[1]]) if (RNN == "BLSTM" or RNN == "BLSTM2"): rnn_embed[0] = Concatenate(axis=-1)( [rnn_embed[0], embed[3], embed[1]]) if (RNN == "TimeLSTM"): rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[3]]) #rnn micro rnn_output[0] = rnn[0](rnn_embed[0]) # rnn sku if (flag_concate_sku_cid): rnn_embed[1] = Concatenate(axis=-1)([embed[0], rnn_output[0]]) else: rnn_embed[1] = rnn_output[0] # mask sku #rnn embed 1 # rnn_embed[1] = Lambda(f_mask_sku, output_shape=(seq_len, rnn_state_size[1]))(rnn_embed[1]) if (RNN == "WGRU"): rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[4]]) if (RNN == "BLSTM" or RNN == "BLSTM2"): rnn_embed[1] = Concatenate(axis=-1)( [rnn_embed[1], embed[3], embed[4]]) if (RNN == "TimeLSTM"): rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[3]]) rnn_embed[1] = Lambda(f_mask_sku)(rnn_embed[1]) rnn_embed[1] = Masking(mask_value=c_mask_value)(rnn_embed[1]) rnn_output[1] = rnn[1](rnn_embed[1]) # rnn cid3 if (flag_concate_sku_cid): rnn_embed[2] = Concatenate()([embed[2], rnn_output[1]]) else: rnn_embed[2] = rnn_output[1] if (not flagCid3RNNNorm): rnn_embed[2] = Concatenate(axis=-1)([rnn_embed[2], embed[3]]) # mask cid3 # rnn_embed[2] = Lambda(f_mask_cid3, output_shape=(seq_len, rnn_state_size[2]))(rnn_embed[2]) rnn_embed[2] = Lambda(f_mask_cid3)(rnn_embed[2]) rnn_embed[2] = Masking(mask_value=c_mask_value)(rnn_embed[2]) rnn_output[2] = rnn[2](rnn_embed[2]) # rnn final output rnn_out_final = rnn_output[layer_nums - 1] rnn_out_micro = rnn_output[0] rnn_out_sku = rnn_output[1] rnn_out_cid3 = rnn_output[2] # predict sku, cid3 if (mode_attention == 0): # micro att_out_micro = Lambda( slice, output_shape=(rnn_state_size[0], ))(rnn_out_micro) # trans to sku emb len out_micro_sku_emb = Dense(emb_len[0], activation="tanh")(att_out_micro) out_micro = out_micro_sku_emb # sku att_out_sku = Lambda( slice, output_shape=(rnn_state_size[1], ))(rnn_out_sku) # trans to sku emb len out_sku_emb = Dense(emb_len[0], activation="tanh")(att_out_sku) out_sku = out_sku_emb # cid3 att_out_cid3 = Lambda( slice, output_shape=(rnn_state_size[2], ))(rnn_out_cid3) out_cid3_emb = Dense(emb_len[2], activation="tanh")(att_out_cid3) out_cid3 = out_cid3_emb # out_cid3 = Dense(word_num[2], activation="softmax")(out_cid3_emb) if (mode_attention == 2): # atten micro m_h = rnn_out_micro m_h_last = Lambda(slice, output_shape=(rnn_state_size[0], ), name="rnn_out_micro_last")(m_h) m_h_r = RepeatVector(seq_len)(m_h_last) if (MODE_BHDWELLATT): m_h_c = Concatenate(axis=-1)([m_h, m_h_r, embed[1]]) else: m_h_c = Concatenate(axis=-1)([m_h, m_h_r]) if (ATT_NET_LAYER_CNT == 2): m_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(m_h_c) m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_a_1) else: m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_c) m_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(m_h_a) m_att = Flatten()(m_h_a) m_att_micro = Softmax(name="att_micro")(m_att) m_att_out = Lambda(K_dot, output_shape=(rnn_state_size[0], ), name="out_micro_pre")([m_h, m_att_micro]) # trans to sku emb len out_micro = Dense(emb_len[0], activation="tanh")(m_att_out) # attenion sku s_h = rnn_out_sku s_h_last = Lambda(slice, output_shape=(rnn_state_size[1], ), name="rnn_out_sku_last")(s_h) s_h_r = RepeatVector(seq_len)(s_h_last) if (MODE_BHDWELLATT): s_h_c = Concatenate(axis=-1)([s_h, s_h_r, embed[4]]) else: s_h_c = Concatenate(axis=-1)([s_h, s_h_r]) if (ATT_NET_LAYER_CNT == 2): s_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(s_h_c) s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_a_1) else: s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_c) s_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(s_h_a) s_att = Flatten()(s_h_a) s_att = Lambda(f_mask_att_sku)(s_att) s_att_sku = Softmax(axis=-1, name="att_sku")(s_att) s_att_out = Lambda(K_dot, output_shape=(rnn_state_size[1], ), name="out_sku_pre")([s_h, s_att_sku]) # attention cid3 c_h = rnn_out_cid3 c_h_last = Lambda(slice, output_shape=(rnn_state_size[2], ), name="rnn_out_cid3_last")(c_h) c_h_r = RepeatVector(seq_len)(c_h_last) c_h_c = Concatenate(axis=-1)([c_h, c_h_r]) if (ATT_NET_LAYER_CNT == 2): c_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(c_h_c) c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_a_1) else: c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_c) c_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(c_h_a) c_att = Flatten()(c_h_a) c_att = Lambda(f_mask_att_cid3)(c_att) c_att_cid3 = Softmax(axis=-1, name="att_cid3")(c_att) c_att_out = Lambda(K_dot, output_shape=(rnn_state_size[2], ), name="out_cid3_pre")([c_h, c_att_cid3]) out_cid3 = Dense(emb_len[2], activation="tanh")(c_att_out) out_sku = Dense(emb_len[0], activation="tanh")(s_att_out) # model model = Model( inputs=[input[0], input[1], input[2], input[3], input[4]], outputs=[out_micro, out_sku, out_cid3]) # return embedding, rnn, ret_with_target, input, out return model
for j, char in enumerate(x_sentence): x_encoder[i, j, char_indices[char]] = 1 # encoderへの入力をone-hot表現で表す for j, char in enumerate(t_sentence): x_decoder[i, j, char_indices[char]] = 1 # decoderへの入力をone-hot表現で表す if j > 0: # 正解は入力より1つ前の時刻のものにする t_decoder[i, j - 1, char_indices[char]] = 1 #print(x_encoder.shape) #Model構築 batch_size = 32 epochs = 3000 n_mid = 256 # 中間層のニューロン数 encoder_input = Input(shape=(None, n_char)) encoder_mask = Masking(mask_value=0) # 全ての要素が0であるベクトルの入力は無視する encoder_masked = encoder_mask(encoder_input) encoder_lstm = GRU(n_mid, dropout=0.2, recurrent_dropout=0.2, return_state=True) # dropoutを設定し、ニューロンをランダムに無効にする encoder_output, encoder_state_h = encoder_lstm(encoder_masked) decoder_input = Input(shape=(None, n_char)) decoder_mask = Masking(mask_value=0) # 全ての要素が0であるベクトルの入力は無視する decoder_masked = decoder_mask(decoder_input) decoder_lstm = GRU(n_mid, dropout=0.2, recurrent_dropout=0.2, return_sequences=True, return_state=True) # dropoutを設定
def bgru(x_train, x_val, x_test, y_train, y_val, y_test, out_dir, name='bgru_model', hidden_units=10, layers=1, max_epochs=1000, batch_size=32, patience=20, dropout=0.0, recurrent_dropout=0.0): """ Bidirectional GRU model for protein secondary structure prediction. """ num_samples = x_train.shape[0] max_seq_len = x_train.shape[1] num_features = x_train.shape[2] num_classes = y_train.shape[2] # Build Keras model model = Sequential() model.add(Masking(mask_value=0, input_shape=(max_seq_len, num_features))) model.add( Bidirectional( GRU(hidden_units, return_sequences=True, input_shape=(max_seq_len, num_features), dropout=dropout, recurrent_dropout=recurrent_dropout))) if layers > 1: for _ in range(layers - 1): model.add( Bidirectional( GRU(hidden_units, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout))) model.add(TimeDistributed(Dense(num_classes))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Train model. Use early-stopping on validation data to determine when to stop training. model_path = os.path.join(out_dir, name + '.h5') checkpointer = ModelCheckpoint(model_path, save_best_only=True) model.fit(x_train, y_train, epochs=max_epochs, batch_size=batch_size, verbose=1, validation_data=(x_val, y_val), callbacks=[EarlyStopping(patience=patience), checkpointer]) model = load_model( model_path ) # Best model is not necessarily current model instance b/c patience != 0 y_train_pred = model.predict(x_train) print('Train accuracy: {:.2f}%'.format( calculate_accuracy(y_train, y_train_pred) * 100.0)) # Test set accuracy y_test_pred = [] for i in range(3): y_test_pred.append(model.predict(x_test[i])) print('Test accuracy: {:.2f}%'.format( calculate_accuracy(y_test[i], y_test_pred[i]) * 100.0)) return model
#LOAD DATA train_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_train_data_padded.npy", encoding="bytes") val_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_val_data_padded.npy", encoding="bytes") test_data_padded = np.load("/storage/hpc_lkpiel/data/fbank_test_data_padded.npy", encoding="bytes") print ("DATA LOADED") ################################################################################################ reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=2, min_lr=0.0001, verbose=1) model_14 = Sequential([ Masking(mask_value=0., input_shape=(1107,20)), Bidirectional(LSTM(64, return_sequences=True)), Bidirectional(LSTM(64, return_sequences=True)), AttentionWithContext(), Dense(3, activation='softmax') ]) print ("model_14 BUILT") model_14.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print ("model_14 COMPILED") checkpoint = ModelCheckpoint(filepath='/models/model_14.hdf5', monitor='val_loss', save_best_only=True) history = model_14.fit(x=train_data_padded,
from keras.optimizers import Adam max_features = 20000 maxlen = 10 # cut texts after this number of words (among top max_features most common words) a = [[[1, 1], [2, 2], [3, 3], [4, 4]], [[1, 1], [2, 2]]] label = [[0, 1], [1, 0]] print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(a, maxlen=maxlen) print(x_train) print('x_train shape:', x_train.shape) print('Build model...') frame_input = Input(shape=(10, 2)) mask_frame_input = Masking(mask_value=0.)(frame_input) frame_l1 = Bidirectional( LSTM(16, return_sequences=True, recurrent_dropout=0.25, name='LSTM_audio_1'))(mask_frame_input) frame_l2 = Bidirectional(LSTM(16, recurrent_dropout=0.25, name='LSTM_audio_2'))(frame_l1) dropout_word = Dropout(0.5)(frame_l2) audio_prediction = Dense(2, activation='softmax')(dropout_word) audio_model = Model(inputs=frame_input, outputs=audio_prediction) inter_audio = Model(inputs=frame_input, outputs=frame_l1) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) audio_model.compile(loss='binary_crossentropy',
# # no embedding # model = Sequential() # #model.add(InputLayer(input_shape=(max_sequence_len,len(idx),))) # #model.add(Embedding(len(idx),16)) # model.add(Masking(input_shape=(max_sequence_len,len(idx)))) # #model.add(Dropout(0.5)) # model.add(Bidirectional(LSTM(12, input_shape=(max_sequence_len,len(idx)),recurrent_dropout=0.0,dropout=0.0,return_sequences=True))) # model.add(Dropout(0.5)) # model.add(TimeDistributed(Dense(len(idx_label),activation='softmax'))) # model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) # embedded tokens tokens_input = Input(shape=(max_sequence_len,)) features_input = Input(shape=(max_sequence_len, len(idx))) tokens_masked = Masking(0) (tokens_input) features_masked = Masking(0) (features_input) tokens_embed = Embedding(int((2/3)*len(idx_t)), 10, input_length=max_sequence_len) (tokens_masked) tokens_embed = Dropout(0.5) (tokens_embed) features_merged = Concatenate(axis=-1) ([features_masked, tokens_embed]) # features_input = Input(shape=(max_sequence_len, len(idx))) # chars_input = Input(shape=(max_sequence_len, max_carray_len)) # chars_masked = Masking(0) (chars_input) # features_masked = Masking(0) (features_input) # chars_embed = Bidirectional(LSTM(5, return_sequences=True)) (chars_masked) # # chars_embed = Dropout(0.5) (chars_embed) # features_merged = Concatenate(axis=-1) ([features_masked, chars_embed]) h = Bidirectional(LSTM(12, return_sequences=True)) (features_merged) # h = Dropout(0.5) (h)
def model_architecture(self, input_shape, output_shape): """Build a Keras model and return a compiled model.""" from keras.layers import LSTM, Activation, Masking, Dense from keras.models import Sequential from keras.models import Sequential from keras.layers import \ Masking, LSTM, Dense, TimeDistributed, Activation # Build Model model = Sequential() # the shape of the y vector of the labels, # determines which output from rnn will be used # to calculate the loss if len(output_shape) == 1: # y is (num examples, num features) so # only the last output from the rnn is used to # calculate the loss model.add(Masking(mask_value=-1, input_shape=input_shape)) model.add(LSTM(self.rnn_size)) model.add(Dense(input_dim=self.rnn_size, units=output_shape[-1])) elif len(output_shape) == 2: # y is (num examples, max_dialogue_len, num features) so # all the outputs from the rnn are used to # calculate the loss, therefore a sequence is returned and # time distributed layer is used # the first value in input_shape is max dialogue_len, # it is set to None, to allow dynamic_rnn creation # during prediction model.add( Masking(mask_value=-1, input_shape=(None, input_shape[1]))) model.add(LSTM(self.rnn_size, return_sequences=True)) model.add(TimeDistributed(Dense(units=output_shape[-1]))) else: raise ValueError("Cannot construct the model because" "length of output_shape = {} " "should be 1 or 2." "".format(len(output_shape))) model.add(Activation('softmax')) #model.add(Activation('relu')) # model.compile(loss='categorical_crossentropy', # optimizer='adam', # metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy']) logger.debug(model.summary()) return model # class nuRobotPolicy(KerasPolicy): # def model_architecture(self, num_features, num_actions, max_history_len): # """Build a Keras model and return a compiled model.""" # from keras.layers import LSTM, Activation, Masking, Dense # from keras.models import Sequential # n_hidden = 32 # size of hidden layer in LSTM # # Build Model # batch_shape = (None, max_history_len, num_features) # model = Sequential() # model.add(Masking(-1, batch_input_shape=batch_shape)) # model.add(LSTM(n_hidden, batch_input_shape=batch_shape)) # model.add(Dense(input_dim=n_hidden, output_dim=num_actions)) # model.add(Activation('softmax')) # model.compile(loss='categorical_crossentropy', # optimizer='adam', # metrics=['accuracy']) # logger.debug(model.summary()) # return model
trainY = Y[:int((1-valSplit)*len(audioMfccChunksPadded))] valY = Y[int((1-valSplit)*len(audioMfccChunksPadded)):len(audioMfccChunksPadded)] ############################################################################## # MODEL ############################################################################## depth = 2 # Input myInput = Input(shape=(maxInputLen, inputDim,)) # Masking LSTMinput = Masking(mask_value=0.)(myInput) # If depth > 1 if depth > 1: # First layer encoded = LSTM(hiddenDim, activation=LSTMactiv, return_sequences=True)(LSTMinput) for d in range(depth - 2): encoded = LSTM(hiddenDim, activation=LSTMactiv, return_sequences=True)(encoded) # Last layer encoded = LSTM(hiddenDim, activation=LSTMactiv)(encoded) # If depth = 1 else: encoded = LSTM(hiddenDim, activation=LSTMactiv)(LSTMinput)
def get_text_model(self): # Modality specific hyperparameters self.epochs = 100 self.batch_size = 50 # Modality specific parameters self.embedding_dim = self.data.W.shape[1] # For text model self.vocabulary_size = self.data.W.shape[0] self.filter_sizes = [3,4,5] self.num_filters = 512 print("Creating Model...") sentence_length = self.train_x.shape[2] # Initializing sentence representation layers embedding = Embedding(input_dim=self.vocabulary_size, output_dim=self.embedding_dim, weights=[self.data.W], input_length=sentence_length, trainable=False) conv_0 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[0], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') conv_1 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[1], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') conv_2 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[2], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') maxpool_0 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[0] + 1, 1), strides=(1,1), padding='valid') maxpool_1 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[1] + 1, 1), strides=(1,1), padding='valid') maxpool_2 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[2] + 1, 1), strides=(1,1), padding='valid') dense_func = Dense(100, activation='tanh', name="dense") dense_final = Dense(units=self.classes, activation='softmax') reshape_func = Reshape((sentence_length, self.embedding_dim, 1)) def slicer(x, index): return x[:,K.constant(index, dtype='int32'),:] def slicer_output_shape(input_shape): shape = list(input_shape) assert len(shape) == 3 # batch, seq_len, sent_len new_shape = (shape[0], shape[2]) return new_shape def reshaper(x): return K.expand_dims(x, axis=3) def flattener(x): x = K.reshape(x, [-1, x.shape[1]*x.shape[3]]) return x def flattener_output_shape(input_shape): shape = list(input_shape) new_shape = (shape[0], 3*shape[3]) return new_shape inputs = Input(shape=(self.sequence_length, sentence_length), dtype='int32') cnn_output = [] for ind in range(self.sequence_length): local_input = Lambda(slicer, output_shape=slicer_output_shape, arguments={"index":ind})(inputs) # Batch, word_indices #cnn-sent emb_output = embedding(local_input) reshape = Lambda(reshaper)(emb_output) concatenated_tensor = Concatenate(axis=1)([maxpool_0(conv_0(reshape)), maxpool_1(conv_1(reshape)), maxpool_2(conv_2(reshape))]) flatten = Lambda(flattener, output_shape=flattener_output_shape,)(concatenated_tensor) dense_output = dense_func(flatten) dropout = Dropout(0.5)(dense_output) cnn_output.append(dropout) def stack(x): return K.stack(x, axis=1) cnn_outputs = Lambda(stack)(cnn_output) masked = Masking(mask_value =0)(cnn_outputs) lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3))(masked) lstm = Bidirectional(LSTM(300, activation='relu', return_sequences = True, dropout=0.3), name="utter")(lstm) output = TimeDistributed(Dense(self.classes,activation='softmax'))(lstm) model = Model(inputs, output) return model
def buildKerasModel(self, use_sourcelang=False, use_image=True): ''' Define the exact structure of your model here. We create an image description generation model by merging the VGG image features with a word embedding model, with an LSTM over the sequences. The order in which these appear below (text, image) is _IMMUTABLE_. (Needs to match up with input to model.fit.) ''' logger.info('Building Keras model...') logger.info('Using image features: %s', use_image) logger.info('Using source language features: %s', use_sourcelang) model = Graph() model.add_input('text', input_shape=(self.max_t, self.vocab_size)) model.add_node(Masking(mask_value=0.), input='text', name='text_mask') # Word embeddings model.add_node(TimeDistributedDense(output_dim=self.embed_size, input_dim=self.vocab_size, W_regularizer=l2(self.l2reg)), name="w_embed", input='text_mask') model.add_node(Dropout(self.dropin), name="w_embed_drop", input="w_embed") # Embed -> Hidden model.add_node(TimeDistributedDense(output_dim=self.hidden_size, input_dim=self.embed_size, W_regularizer=l2(self.l2reg)), name='embed_to_hidden', input='w_embed_drop') recurrent_inputs = 'embed_to_hidden' # Source language input if use_sourcelang: model.add_input('source', input_shape=(self.max_t, self.hsn_size)) model.add_node(Masking(mask_value=0.), input='source', name='source_mask') model.add_node(TimeDistributedDense(output_dim=self.hidden_size, input_dim=self.hsn_size, W_regularizer=l2(self.l2reg)), name="s_embed", input="source_mask") model.add_node(Dropout(self.dropin), name="s_embed_drop", input="s_embed") recurrent_inputs = ['embed_to_hidden', 's_embed_drop'] # Recurrent layer if self.gru: model.add_node(GRU(output_dim=self.hidden_size, input_dim=self.hidden_size, return_sequences=True), name='rnn', input=recurrent_inputs) else: model.add_node(LSTM(output_dim=self.hidden_size, input_dim=self.hidden_size, return_sequences=True), name='rnn', input=recurrent_inputs) # Image 'embedding' model.add_input('img', input_shape=(self.max_t, 4096)) model.add_node(Masking(mask_value=0.), input='img', name='img_mask') model.add_node(TimeDistributedDense(output_dim=self.hidden_size, input_dim=4096, W_regularizer=l2(self.l2reg)), name='i_embed', input='img_mask') model.add_node(Dropout(self.dropin), name='i_embed_drop', input='i_embed') # Multimodal layer outside the recurrent layer model.add_node(TimeDistributedDense(output_dim=self.hidden_size, input_dim=self.hidden_size, W_regularizer=l2(self.l2reg)), name='m_layer', inputs=['rnn', 'i_embed_drop', 'embed_to_hidden'], merge_mode='sum') model.add_node(TimeDistributedDense(output_dim=self.vocab_size, input_dim=self.hidden_size, W_regularizer=l2(self.l2reg), activation='softmax'), name='output', input='m_layer', create_output=True) if self.optimiser == 'adam': # allow user-defined hyper-parameters for ADAM because it is # our preferred optimiser optimiser = Adam(lr=self.lr, beta_1=self.beta1, beta_2=self.beta2, epsilon=self.epsilon, clipnorm=self.clipnorm) model.compile(optimiser, {'output': 'categorical_crossentropy'}) else: model.compile(self.optimiser, {'output': 'categorical_crossentropy'}) if self.weights is not None: logger.info("... with weights defined in %s", self.weights) # Initialise the weights of the model shutil.copyfile("%s/weights.hdf5" % self.weights, "%s/weights.hdf5.bak" % self.weights) model.load_weights("%s/weights.hdf5" % self.weights) #plot(model, to_file="model.png") return model
def my_timedistributed_cnn_model(input_shape, conv_f_1, conv_f_2, conv_f_3, cnn_dense_fc_1, masking=False): model = Sequential() if masking: model.add(Masking(mask_value=0.0, input_shape=input_shape)) model.add( TimeDistributed( Conv2D(filters=conv_f_1, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(1.e-4)))) else: model.add( TimeDistributed(Conv2D(filters=conv_f_1, kernel_size=(3, 3), padding='same', kernel_regularizer=l2(1.e-4)), input_shape=input_shape)) model.add(TimeDistributed(BatchNormalization())) model.add(TimeDistributed(Activation('relu'))) model.add( TimeDistributed( MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))) model.add( TimeDistributed( Conv2D(filters=conv_f_2, kernel_size=(3, 3), padding='same', activation='relu', kernel_regularizer=l2(1.e-4)))) model.add(TimeDistributed(BatchNormalization())) model.add(TimeDistributed(Activation('relu'))) model.add( TimeDistributed( MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))) model.add( TimeDistributed( Conv2D(filters=conv_f_3, kernel_size=(3, 3), padding='same', activation='relu', kernel_regularizer=l2(1.e-4)))) model.add(TimeDistributed(BatchNormalization())) model.add(TimeDistributed(Activation('relu'))) model.add( TimeDistributed( MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same'))) model.add(TimeDistributed(Flatten())) model.add( TimeDistributed(Dense(cnn_dense_fc_1, kernel_regularizer=l2(1.e-4)))) return model
def __init__(self, dim, batch_norm, dropout, rec_dropout, task, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=76, **kwargs): print("==> not used params in network class:", kwargs.keys()) self.dim = dim self.batch_norm = batch_norm self.dropout = dropout self.rec_dropout = rec_dropout self.depth = depth if task in ['decomp', 'ihm', 'ph']: final_activation = 'sigmoid' elif task in ['los']: if num_classes == 1: final_activation = 'relu' else: final_activation = 'softmax' else: raise ValueError("Wrong value for task") # Input layers and masking X = Input(shape=(None, input_dim), name='X') print("X-> " , X) inputs = [X] print("input->", inputs) mX = Masking()(X) if deep_supervision: M = Input(shape=(None,), name='M') inputs.append(M) # Configurations is_bidirectional = False if deep_supervision: is_bidirectional = False # Main part of the network ''' for i in range(depth - 1): num_units = dim if is_bidirectional: num_units = num_units // 2 lstm = LSTM(units=num_units, activation='tanh', return_sequences=True, recurrent_dropout=rec_dropout, dropout=dropout) if is_bidirectional: mX = Bidirectional(lstm)(mX) else: mX = lstm(mX) ''' # Output module of the network return_sequences = (target_repl or deep_supervision) ''' L = LSTM(units=dim, activation='tanh', return_sequences=return_sequences, dropout=dropout, recurrent_dropout=rec_dropout)(mX) ''' L = SeqSelfAttention( attention_width=48, attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, attention_activation='sigmoid', kernel_regularizer=keras.regularizers.l2(1e-6), use_attention_bias=False, name='Attention', )(mX) L = GRU(units=dim, activation='tanh', return_sequences=return_sequences, dropout=dropout, recurrent_dropout=rec_dropout)(L) if dropout > 0: L = Dropout(dropout)(L) if target_repl: y = TimeDistributed(Dense(num_classes, activation=final_activation), name='seq')(L) y_last = LastTimestep(name='single')(y) outputs = [y_last, y] elif deep_supervision: y = TimeDistributed(Dense(num_classes, activation=final_activation))(L) y = ExtendMask()([y, M]) # this way we extend mask of y to M outputs = [y] else: y = Dense(num_classes, activation=final_activation)(L) outputs = [y] super(Network, self).__init__(inputs=inputs, outputs=outputs)
print('train shape:', train_samples) print('dev shape:', dev_samples) print() word_embedding = pd.read_csv('../preprocessing/senna/embeddings.txt', delimiter=' ', header=None) word_embedding = word_embedding.values word_embedding = np.concatenate([np.zeros((1,emb_length)),word_embedding, np.random.uniform(-1,1,(1,emb_length))]) embed_index_input = Input(shape=(step_length,)) embedding = Embedding(emb_vocab+2, emb_length, weights=[word_embedding], mask_zero=True, input_length=step_length)(embed_index_input) pos_input = Input(shape=(step_length, pos_length)) chunk_input = Input(shape=(step_length, chunk_length)) senna_pos_chunk_merge = merge([embedding, pos_input, chunk_input], mode='concat') input_mask = Masking(mask_value=0)(senna_pos_chunk_merge) dp_1 = Dropout(0.5)(input_mask) hidden_1 = Bidirectional(LSTM(64, return_sequences=True))(dp_1) hidden_2 = Bidirectional(LSTM(32, return_sequences=True))(hidden_1) dp_2 = Dropout(0.5)(hidden_2) output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2) model = Model(input=[embed_index_input,pos_input,chunk_input], output=output) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print(model.summary()) number_of_train_batches = int(math.ceil(float(train_samples)/batch_size))
def __init__(self, dim, batch_norm, dropout, rec_dropout, task, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=376, **kwargs): print("==> not used params in network class:", kwargs.keys()) self.output_dim = dim self.batch_norm = batch_norm self.dropout = dropout self.rec_dropout = rec_dropout self.depth = depth if task in ['decomp', 'ihm', 'ph']: final_activation = 'sigmoid' elif task in ['los']: if num_classes == 1: final_activation = 'relu' else: final_activation = 'softmax' else: return ValueError("Wrong value for task") # Input layers and masking X = Input(shape=(None, input_dim), name='X') inputs = [X] mX = Masking()(X) if deep_supervision: M = Input(shape=(None, ), name='M') inputs.append(M) # Configurations is_bidirectional = True if deep_supervision: is_bidirectional = False # Main part of the network for i in range(depth - 1): #num_units = 48 num_units = dim if is_bidirectional: num_units = num_units // 2 lstm = LSTM(num_units, activation='tanh', return_sequences=True, dropout_U=rec_dropout, dropout_W=dropout) if is_bidirectional: mX = Bidirectional(lstm)(mX) else: mX = lstm(mX) # Output module of the network return_sequences = (target_repl or deep_supervision) ''' L = LSTM(units=dim, activation='tanh', return_sequences=return_sequences, dropout=dropout, recurrent_dropout=rec_dropout)(mX) ''' L = LSTM(dim, activation='tanh', return_sequences=return_sequences, dropout_W=dropout, dropout_U=rec_dropout)(mX) if dropout > 0: L = Dropout(dropout)(L) y = Dense(num_classes, activation=final_activation)(L) outputs = [y] return super(Network, self).__init__(inputs, outputs)
def get_model(args): # Dataset config assert args.dataset.lower() == 'avletters' config = data_constants['avletters'] inputCNNshape = config['lstm_inputCNNshape'] inputMLPshape = config['lstm_inputMLPshape'] nb_classes = config['nb_classes'] # Build the CNN - pre-cross-connections inputCNN = Input(shape=inputCNNshape) inputNorm = TimeDistributed(Flatten())(inputCNN) inputNorm = Masking(mask_value=0.)(inputNorm) inputNorm = TimeDistributed(Reshape((80, 60, 1)))(inputNorm) inputNorm = BatchNormalization(axis=1)(inputNorm) conv = TimeDistributed(Convolution2D(8, 3, 3, border_mode='same', activation='relu'), name='conv11')(inputNorm) pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)), name='maxpool1')(conv) # Build the MLP - pre-cross-connections inputMLP = Input(shape=inputMLPshape) inputMasked = Masking(mask_value=0., input_shape=inputMLPshape)(inputMLP) fcMLP = TimeDistributed(Dense(32, activation='relu'), name='fc1')(inputMasked) # Add the 1st round of cross-connections - CNN to MLP x21 = TimeDistributed(Convolution2D(8, 1, 1, border_mode='same'))(pool) x21 = TimeDistributed(PReLU())(x21) x21 = TimeDistributed(Flatten())(x21) x21 = TimeDistributed(Dense(32))(x21) x21 = TimeDistributed(PReLU())(x21) # Add 1st shortcut (residual connection) from CNN input to MLP short1_2dto1d = TimeDistributed(MaxPooling2D((4, 4), strides=(4, 4)))(inputNorm) short1_2dto1d = TimeDistributed(Flatten())(short1_2dto1d) short1_2dto1d = TimeDistributed(Dense(32))(short1_2dto1d) short1_2dto1d = TimeDistributed(PReLU())(short1_2dto1d) # Cross-connections - MLP to CNN x12 = TimeDistributed(Dense(25 * 15))(fcMLP) x12 = TimeDistributed(PReLU())(x12) x12 = TimeDistributed(Reshape((25, 15, 1)))(x12) x12 = TimeDistributed(Conv2DTranspose(8, (16, 16), padding='valid'))(x12) x12 = TimeDistributed(PReLU())(x12) # 1st shortcut (residual connection) from MLP input to CNN short1_1dto2d = TimeDistributed(Dense(25 * 15))(inputMasked) short1_1dto2d = TimeDistributed(PReLU())(short1_1dto2d) short1_1dto2d = TimeDistributed(Reshape((25, 15, 1)))(short1_1dto2d) short1_1dto2d = TimeDistributed( Conv2DTranspose(8, (16, 16), padding='valid'))(short1_1dto2d) short1_1dto2d = TimeDistributed(PReLU())(short1_1dto2d) # CNN - post-cross-connections 1 pool = add([pool, short1_1dto2d]) merged = concatenate([pool, x12]) conv = TimeDistributed(Convolution2D(16, 3, 3, border_mode='same', activation='relu'), name='conv21')(merged) pool = TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)), name='maxpool2')(conv) # MLP - post-cross-connections 1 fcMLP = add([fcMLP, short1_2dto1d]) fcMLP = concatenate([fcMLP, x21]) fcMLP = TimeDistributed(Dense(32, activation='relu'), name='fc2')(fcMLP) # Add the 2nd round of cross-connections - CNN to MLP x21 = TimeDistributed(Convolution2D(16, 1, 1, border_mode='same'))(pool) x21 = TimeDistributed(PReLU())(x21) x21 = TimeDistributed(Flatten())(x21) x21 = TimeDistributed(Dense(64))(x21) x21 = TimeDistributed(PReLU())(x21) # Add 2nd shortcut (residual connection) from CNN input to MLP short2_2dto1d = TimeDistributed(MaxPooling2D((8, 8), strides=(8, 4)))(inputNorm) short2_2dto1d = TimeDistributed(Flatten())(short2_2dto1d) short2_2dto1d = TimeDistributed(Dense(32))(short2_2dto1d) short2_2dto1d = TimeDistributed(PReLU())(short2_2dto1d) # Cross-connections - MLP to CNN x12 = TimeDistributed(Dense(13 * 8))(fcMLP) x12 = TimeDistributed(PReLU())(x12) x12 = TimeDistributed(Reshape((13, 8, 1)))(x12) x12 = TimeDistributed(Conv2DTranspose(16, (8, 8), padding='valid'))(x12) x12 = TimeDistributed(PReLU())(x12) # 2nd shortcut (residual connection) from MLP input to CNN short2_1dto2d = TimeDistributed(Dense(13 * 8))(inputMasked) short2_1dto2d = TimeDistributed(PReLU())(short2_1dto2d) short2_1dto2d = TimeDistributed(Reshape((13, 8, 1)))(short2_1dto2d) short2_1dto2d = TimeDistributed( Conv2DTranspose(16, (8, 8), padding='valid'))(short2_1dto2d) short2_1dto2d = TimeDistributed(PReLU())(short2_1dto2d) # CNN - post-cross-connections 2 pool = add([pool, short2_1dto2d]) merged = concatenate([pool, x12]) reshape = TimeDistributed(Flatten(), name='flatten1')(merged) fcCNN = TimeDistributed(Dense(64, activation='relu'), name='fcCNN')(reshape) # Merge the models fcMLP = add([fcMLP, short2_2dto1d]) merged = concatenate([fcCNN, fcMLP, x21]) merged = BatchNormalization(axis=1, name='mergebn')(merged) merged = Dropout(0.5, name='mergedrop')(merged) lstm = LSTM(64)(merged) out = Dense(nb_classes, activation='softmax')(lstm) # Return the model object model = Model(input=[inputCNN, inputMLP], output=out) return model
predictfrom_ind, predictfrom_resp = predictfrom[:, :, :-1], predictfrom[:, :, -1] print("Shape of the array used for prediction:", predictfrom_ind.shape) # The shape IS THE SAME as seasonML1! #%% Design and train the LSTM model: # Design LSTM neural network # Define the network using the Sequential Keras API model = Sequential() # Inform algorithm that -1 represents non-values model.add( Masking(mask_value=-1, input_shape=(train_ind.shape[1], train_ind.shape[2]))) # Define as LSTM with 9 neurons - not optimized - use 9 because I have 9 statistical categories model.add(LSTM(9)) # I'm not even sure why I need this part, but it doesn't work without it... model.add(Dense(train_ind.shape[1])) # Define a loss function and the Adam optimization algorithm model.compile(loss='mean_squared_error', optimizer='adam') # train network history = model.fit(train_ind, train_resp, epochs=40, batch_size=25,
def negative_samples(input_length, input_dim, output_length, output_dim, hidden_dim, ns_amount, learning_rate, drop_rate): q_encoder_input = Input(shape=(input_length, input_dim)) r_decoder_input = Input(shape=(output_length, output_dim)) weight_data_r = Input(shape=(1, )) weight_data_w = Input(shape=(1, ns_amount)) if ns_amount == 0: weight_data_w_list = [] else: weight_data_w_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=2))(weight_data_w) fixed_r_decoder_input = adding_weight( output_length, output_dim)([r_decoder_input, weight_data_r]) w_decoder_input = Input(shape=(output_length, output_dim, ns_amount)) if ns_amount == 0: w_decoder_input_list = [] else: w_decoder_input_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=3))(w_decoder_input) if ns_amount == 1: # print("===w_decoder_input_list:", w_decoder_input_list.shape) w_decoder_input_list = [w_decoder_input_list] weight_data_w_list = [weight_data_w_list] fixed_w_decoder_input = [] for i in range(ns_amount): w_decoder_input_list[i] = Reshape( (output_length, output_dim))(w_decoder_input_list[i]) weight_data_w_list[i] = Reshape((1, ))(weight_data_w_list[i]) w_decoder_weighted = adding_weight(output_length, output_dim)( [w_decoder_input_list[i], weight_data_w_list[i]]) w_decoder_weighted_masked = Masking( mask_value=0., input_shape=(output_length, output_dim))(w_decoder_weighted) fixed_w_decoder_input.append(w_decoder_weighted_masked) q_encoder_input_masked = Masking(mask_value=0., input_shape=(input_length, input_dim))(q_encoder_input) fixed_r_decoder_input_masked = Masking( mask_value=0., input_shape=(output_length, output_dim))(fixed_r_decoder_input) encoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional1") q_encoder_output = encoder(q_encoder_input_masked) q_encoder_output = Dropout(rate=drop_rate, name="dropout1")(q_encoder_output) decoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional2") r_decoder_output = decoder(fixed_r_decoder_input_masked) r_decoder_output = Dropout(rate=drop_rate, name="dropout2")(r_decoder_output) # doc_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(q_encoder_input) # doc_output = Flatten()(q_encoder_input) # que_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(fixed_r_decoder_input) # que_output = Flatten()(fixed_r_decoder_input) # output_vec = Concatenate(axis=1, name="dropout_con")([q_encoder_output, r_decoder_output]) # output_hid = Dense(hidden_dim, name="output_hid", activation="relu")(output_vec) # similarity = Dense(1, name="similarity", activation="softmax")(output_hid) # Difference between kernel, bias, and activity regulizers in Keras # https://stats.stackexchange.com/questions/383310/difference-between-kernel-bias-and-activity-regulizers-in-keras # output = Dense(128, kernel_regularizer=keras.regularizers.l2(0.0001))(output_vec) # activation="relu", # output = Dense(64, name="output_hid", kernel_regularizer=keras.regularizers.l2(0.0001))(output) # activation="relu", # similarity = Dense(1, name="similarity", activation="softmax")(output) w_decoder_output_list = [] for i in range(ns_amount): w_decoder_output = decoder(fixed_w_decoder_input[i]) w_decoder_output = Dropout(rate=drop_rate)(w_decoder_output) w_decoder_output_list.append(w_decoder_output) # similarities = [ similarity ] similarities = [ Dot(axes=1, normalize=True)([q_encoder_output, r_decoder_output]) ] for i in range(ns_amount): similarities.append( Dot(axes=1, normalize=True)([q_encoder_output, w_decoder_output_list[i]])) loss_data = Lambda(lambda x: loss_c(x))(similarities) model = Model([ q_encoder_input, r_decoder_input, w_decoder_input, weight_data_r, weight_data_w ], similarities[0]) ada = adam(lr=learning_rate) model.compile(optimizer=ada, loss=lambda y_true, y_pred: loss_data) return model