seq_out = raw_text[i + seq_length] dataX.append([chars_to_int[char] for char in seq_in]) dataY.append(chars_to_int[seq_out]) n_patterns = len(dataX) print('Total patterns: {}'.format(n_patterns)) #reshape X X = np.reshape(dataX, (n_patterns, seq_length, 1)) # normalize X = X / float(n_vocab) #one hot encode output y = np_utils.to_categorical(dataY) #Building the model model = Sequential() model.add(CuDNNLSTM(256, input_shape=(X.shape[1], X.shape[2]))) model.add(Dropout(0.2)) model.add(Dense(y.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') #define checkpoints filepath = "weights-improvement-latest.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] #train model model.load_weights(filepath)
def get_model(config): inp = Input(shape=(config.strmaxlen, ), name='input') # inp = Input(shape=(config.max_features, ), name='input') emb = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_L = Bidirectional(CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1) l2_LL = Bidirectional(CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L) l2_LG = Bidirectional(CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L) l3_LLC = Conv1D(config.filter_size, kernel_size = config.kernel_size, strides=2, padding = "valid", kernel_initializer = "he_uniform")(l2_LL) l3_LGC = Conv1D(config.filter_size, kernel_size = config.kernel_size, strides=2, padding = "valid", kernel_initializer = "he_uniform")(l2_LG) avg_pool_L = GlobalAveragePooling1D()(l1_L) max_pool_L = GlobalMaxPooling1D()(l1_L) avg_pool_LL = GlobalAveragePooling1D()(l2_LL) max_pool_LL = GlobalMaxPooling1D()(l2_LL) avg_pool_LG = GlobalAveragePooling1D()(l2_LG) max_pool_LG = GlobalMaxPooling1D()(l2_LG) attention_LLA = Attention(config.strmaxlen)(l2_LL) attention_LGA = Attention(config.strmaxlen)(l2_LG) avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC) max_pool_LLC = GlobalMaxPooling1D()(l3_LLC) avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC) max_pool_LGC = GlobalMaxPooling1D()(l3_LGC) attention_LLCA = Attention(int(config.strmaxlen/2-1))(l3_LLC) attention_LGCA = Attention(int(config.strmaxlen/2-1))(l3_LGC) conc_LLC = concatenate([avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC, max_pool_LLC, attention_LLA, attention_LLCA]) conc_LGC = concatenate([avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC, max_pool_LGC, attention_LGA, attention_LGCA]) out_LL = Dropout(config.prob_dropout2)(conc_LLC) out_LG = Dropout(config.prob_dropout2)(conc_LGC) out_LL = Dense(1)(out_LL) out_LG = Dense(1)(out_LG) #### # emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb2 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_G = Bidirectional(CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2) l2_GL = Bidirectional(CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G) l2_GG = Bidirectional(CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G) l3_GLC = Conv1D(config.filter_size, kernel_size = config.kernel_size, strides=2, padding = "valid", kernel_initializer = "he_uniform")(l2_GL) l3_GGC = Conv1D(config.filter_size, kernel_size = config.kernel_size, strides=2, padding = "valid", kernel_initializer = "he_uniform")(l2_GG) avg_pool_G = GlobalAveragePooling1D()(l1_G) max_pool_G = GlobalMaxPooling1D()(l1_G) avg_pool_GL = GlobalAveragePooling1D()(l2_GL) max_pool_GL = GlobalMaxPooling1D()(l2_GL) avg_pool_GG = GlobalAveragePooling1D()(l2_GG) max_pool_GG = GlobalMaxPooling1D()(l2_GG) attention_GLA = Attention(config.strmaxlen)(l2_GL) attention_GGA = Attention(config.strmaxlen)(l2_GG) avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC) max_pool_GLC = GlobalMaxPooling1D()(l3_GLC) avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC) max_pool_GGC = GlobalMaxPooling1D()(l3_GGC) attention_GLCA = Attention(int(config.strmaxlen/2-1))(l3_GLC) attention_GGCA = Attention(int(config.strmaxlen/2-1))(l3_GGC) conc_GLC = concatenate([avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC, max_pool_GLC, attention_GLA, attention_GLCA]) conc_GGC = concatenate([avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC, max_pool_GGC, attention_GGA, attention_GGCA]) out_GL = Dropout(config.prob_dropout2)(conc_GLC) out_GG = Dropout(config.prob_dropout2)(conc_GGC) out_GL = Dense(1)(out_GL) out_GG = Dense(1)(out_GG) out_avg = average([out_LL, out_LG, out_GL, out_GG]) # # ================================================================================================== model_avg = Model(inputs=inp, outputs=[out_LL, out_LG, out_GL, out_GG, out_avg]) # inp_pre = Input(shape=(config.strmaxlen, ), name='input_pre') # inp_post = Input(shape=(config.strmaxlen, ), name='input_post') # model_pre = model_avg(inp_pre) # model_post = model_avg(inp_post) # stack_layer = concatenate([model_pre, model_post]) # ens_out = Dense(1, use_bias=False)(stack_layer) # reg_model = Model(inputs=[inp_pre, inp_post], outputs=ens_out) model_avg.compile(loss='mean_squared_error', optimizer='adam', loss_weights=[1., 1., 1., 1., 0.1], metrics=['mean_squared_error', 'accuracy']) return model_avg
wv_layer = Embedding(nb_words, WV_DIM, mask_zero=False, weights=[wv_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) # Inputs comment_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences = wv_layer(comment_input) # biGRU embedded_sequences = SpatialDropout1D(0.2)(embedded_sequences) x = Bidirectional(CuDNNLSTM(64, return_sequences=False))(embedded_sequences) # Output x = Dropout(0.2)(x) x = BatchNormalization()(x) preds = Dense(6, activation='sigmoid')(x) # build the model model = Model(inputs=[comment_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001, clipnorm=.25, beta_1=0.7, beta_2=0.99), metrics=[]) hist = model.fit([data], y, validation_split=0.1, epochs=10, batch_size=256, shuffle=True)
opt = 'RMSprop' lf = 'binary_crossentropy' ep = 1000 bs = 32 val_split = 0.2 es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None, restore_best_weights=True) #Construction layers: regressor = Sequential() regressor.add(InputLayer(input_shape=(lookback, features))) regressor.add(CuDNNLSTM(units=input_units, return_sequences=False)) regressor.add(Dropout(dp)) regressor.add(Dense(units=output_units, activation=act)) # Compiling the LSTM: regressor.compile(optimizer=opt, loss=lf) #------------ This is where major loop over all 19 study periods starts ------------- for i in range(0, len(return_window)): # Determine which stocks are eligleble for the study period vec0 = list(list(np.where(binary_matrix[(749 + i * test), :] == 1))[0]) vec = [] for u in vec0: if (all(np.isnan(return_window[i, 0:750, u, 0])) == False and all( np.isnan(return_window[i, 750:1000, u, 0])) == False) == True: vec.append(u) # Training set
max_length = max([len(txt) for txt in x_data]) words_size = len(char2id_dict) #-------------------------------# # 建立神经网络 #-------------------------------# inputs = Input(shape=(None,words_size)) x = CuDNNLSTM(UNITS,return_sequences=True)(inputs) x = Dropout(0.6)(x) x = CuDNNLSTM(UNITS)(x) x = Dropout(0.6)(x) x = Dense(words_size, activation='softmax')(x) model = Model(inputs,x) model.load_weights("logs/loss4.419-val_loss4.009.h5")
def get_model(img_w, img_h): batch_size = 128 rnn_size = 384 tiger_train = TextImageGenerator(train_data_path=TRAIN_DATA_PATH, test_data_path=TEST_DATA_PATH, batch_size=batch_size) tiger_train.build_data() input_shape = (img_w, img_h, 1) inp = Input(name='the_input', shape=input_shape) x = Conv2D(kernel_size=3, filters=16, strides=1, padding='same', kernel_regularizer=regularizers.l2(0.01))(inp) x = BatchNormalization()(x) x = Activation(relu)(x) # CNN LAYER # F_1 x = block(16)(x) x = block(16)(x) x = BatchNormalization()(x) x = Activation(relu)(x) x = AveragePooling2D()(x) # F_2 x = block(32, upscale=True)(x) # !!! <------- Uncomment for local evaluation x = block(32)(x) # !!! <------- Uncomment for local evaluation x = BatchNormalization()(x) x = Activation(relu)(x) x = AveragePooling2D()(x) # F_2 x = block(48, upscale=True)(x) # !!! <------- Uncomment for local evaluation x = block(48)(x) # !!! <------- Uncomment for local evaluation x = BatchNormalization()(x) x = Activation(relu)(x) x = AveragePooling2D()(x) # last activation of the entire network's output # input_shape = (img_w, img_h, 1) # inp = Input(name='the_input', shape=input_shape) # norm_inp_1 = BatchNormalization()(inp) # img_1 = Conv2D(8, kernel_size=2, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(norm_inp_1) # img_1 = Conv2D(8, kernel_size=2, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(img_1) # merge_layer_1 = Add()([img_1, norm_inp_1]) # pooling_1 = MaxPooling2D(pool_size=(2, 2))(merge_layer_1) # # pooling_1 = Dropout(rate=0.1)(pooling_1) # norm_inp_2 = BatchNormalization()(pooling_1) # img_1 = Conv2D(16, kernel_size=3, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(norm_inp_2) # img_1 = Conv2D(16, kernel_size=3, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(img_1) # pooling_1 = Conv2D(16, kernel_size=1, activation='relu', padding='same')(pooling_1) # merge_layer_2 = Add()([img_1, pooling_1]) # pooling_2 = MaxPooling2D(pool_size=(2, 2))(merge_layer_2) # # pooling_2 = Dropout(rate=0.1)(pooling_2) # norm_inp_3 = BatchNormalization()(pooling_2) # img_1 = Conv2D(32, kernel_size=3, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(norm_inp_3) # img_1 = Conv2D(32, kernel_size=3, activation='relu', padding='same', kernel_initializer=keras.initializers.he_uniform(seed=None))(img_1) # pooling_2 = Conv2D(32, kernel_size=1, activation='relu', padding='same')(pooling_2) # merge_layer_3 = Add()([img_1, pooling_2]) # pooling_3 = MaxPooling2D(pool_size=(2, 2))(merge_layer_3) # pooling_3 = Dropout(rate=0.1)(merge_layer_3) Model(inputs=inp, outputs=x).summary() inner = Reshape(target_shape=(62, 10 * 48), name='reshape')(x) gru_1 = Bidirectional(CuDNNLSTM(rnn_size, return_sequences=True, kernel_initializer=keras.initializers.he_uniform(seed=None), name='gru1'))(inner) gru_1b = Bidirectional(CuDNNLSTM(rnn_size, return_sequences=True, kernel_initializer=keras.initializers.he_uniform(seed=None), name='gru1_b'))(inner) gru1_merged = Add()([gru_1, gru_1b]) gru_2 = Bidirectional(CuDNNLSTM(rnn_size, return_sequences=True, kernel_initializer=keras.initializers.he_uniform(seed=None), name='gru2'))(gru1_merged) gru_2b = Bidirectional(CuDNNLSTM(rnn_size, return_sequences=True, kernel_initializer=keras.initializers.he_uniform(seed=None), name='gru2_b'))(gru1_merged) gru2_merged = concatenate([gru_2, gru_2b]) lstm = Bidirectional(CuDNNLSTM(rnn_size , return_sequences=True, kernel_initializer=keras.initializers.he_uniform(seed=None), name='gru3'))(gru2_merged) inner = TimeDistributed(Dense( tiger_train.vocab_size, kernel_initializer=keras.initializers.he_uniform(seed=None), name='dense2' ))(lstm) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[tiger_train.max_text_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[inp, labels, input_length, label_length], outputs=loss_out) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adam', metrics=['acc']) return model, tiger_train
def run_lstm_model(scenario=None): """ Encode the sequence of memory addresses to a sequence of integers We can do it either using Keras, or by implementing our own convertion function (see above "encode_mem_accesses") For the Keras approach, see documentation in the source code here: https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/text.py """ use_manual_encoding = scenario['use_manual_encoding'] app_name = scenario['app_name'] decompose_timeseries = scenario['decompose_timeseries'] decomposition_frequency = scenario['decomposition_frequency'] test_ratio = scenario['test_ratio'] on_the_fly_testing = scenario['on_the_fly_testing'] plot_timeseries = scenario['plot_timeseries'] look_back = scenario['look_back_window'] scenario_name = scenario['scenario_name'] vocabulary_maximum_size = scenario['vocabulary_maximum_size'] vocabulary_mimimum_word_frequency_quantile = scenario['vocabulary_mimimum_word_frequency_quantile'] model_diffs = scenario['model_diffs'] lstm_batch_size = scenario['lstm_batch_size'] lstm_epochs = scenario['lstm_epochs'] verbosity = scenario['verbosity'] dropout_ratio = scenario['dropout_ratio'] lstm_size = scenario['lstm_size'] embedding_size = scenario['embedding_size'] prediction_batch_size = scenario['prediction_batch_size'] online_retraining = scenario['online_retraining'] online_learning_accuracy_threshold = scenario['online_learning_accuracy_threshold'] online_retraining_periods = scenario['online_retraining_periods'] online_retraining_period_size = scenario['online_retraining_period_size'] number_of_rows_to_model = scenario['number_of_rows_to_model'] model_type = scenario['model_type'] loss_function = scenario['loss_function'] activation_fuction = scenario['activation_function'] # output compress convert_output_to_binary = scenario['convert_output_to_binary'] # this is used for FPGA implementation. # bit_size bit_size = scenario['bit_size'] load_existing_pickles = scenario['load_existing_pickles'] # !!!new updata: input encode and compress encode_inputs = scenario['encode_inputs'] # new, about cache CACHE_SIZES = scenario['CACHE_SIZES'] CACHE_BLOCK_SIZES = scenario['CACHE_BLOCK_SIZES'] CACHE_REPLACEMENT_ALGOS = scenario['CACHE_REPLACEMENT_ALGOS'] # unique_key = abs(hash(frozenset(scenario.items()))), move to later function unique_key = scenario['id'] misc_stats = {} # miscellaneous statistics start_time = time.time() if online_retraining: return "online_retraining no function yet" else: # ===================================================================================================== # Data preparation # ===================================================================================================== encoded_final, sequences, final_vocab_size, tokenizer, tokenizer2, max_test_accuracy, max_length, dummy_word, \ dummy_word_index, dummy_index, vocab_size_raw, dataset = dataset_creator(scenario) # ===================================================================================================== # ===================================================================================================== # Neural Network Configuration # ===================================================================================================== if convert_output_to_binary: model = Sequential() model.add(Embedding(bit_size if encode_inputs else final_vocab_size, embedding_size, input_length=(max_length - 1) * (bit_size if encode_inputs else 1))) '''keras.layers.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None)''' # model.add(Embedding(final_vocab_size, embedding_size, input_length=max_length - 1)) if USE_GPU: print("uG2") model.add(CuDNNLSTM(lstm_size)) else: print("nG2") model.add(LSTM(lstm_size)) model.add(Dropout(dropout_ratio)) model.add(Dense(bit_size, activation=activation_fuction)) # the size of this layer should align with the size of the bit representation of the output. model.compile(loss=loss_function, optimizer='adam', metrics=[ 'accuracy']) # top_k_categorical_accuracy is still wrong but we have it for illustration purposes. else: model = Sequential() model.add(Embedding(final_vocab_size, embedding_size, input_length=max_length - 1)) if USE_GPU: model.add(CuDNNLSTM(lstm_size)) else: model.add(LSTM(lstm_size)) model.add(Dropout(dropout_ratio)) model.add(Dense(final_vocab_size, activation=activation_fuction)) model.compile(loss=loss_function, optimizer='adam', metrics=['accuracy']) if verbosity > 0: print(model.summary()) SVG(model_to_dot(model, show_shapes=True, show_layer_names=False).create(prog='dot', format='svg')) plot_model(model, to_file=NOTEBOOK_PLOTS_DIRECTORY + 'model_for_%s.png' % scenario_name, show_shapes=True, show_layer_names=False) # ===================================================================================================== # ===================================================================================================== # Model training/testing # ===================================================================================================== X, y = sequences[:, :-1], sequences[:, -1] # print X.shape, y.shape # print "A11", y # y = y.reshape((16, len(y))) # print "A7", sequences # Vectorize the output y (one hot encoding) # encode_inputs if convert_output_to_binary: # print y y = convert_to_binary(data=y, bit_size=bit_size) # converts diffs to 16 bit representation # print X[:5], y if encode_inputs: X = convert_to_binary(data=X, bit_size=bit_size, multidimensional_input=True) # converts diffs to 16 bit representation # print X[:5], y else: y = to_categorical(y, num_classes=final_vocab_size) # print y # y = np.array([np.array(tmp_y) for tmp_y in y]) # y = y.reshape((y.shape[0], 16)) # print X.shape, y.shape # print "A8", y.reshape(1, -1) if on_the_fly_testing: return 0 else: X_train, X_test = train_test_split(X, test_size=test_ratio, shuffle=False) y_train, y_test = train_test_split(y, test_size=test_ratio, shuffle=False) # [email protected], useful? y_train_raw, y_test_raw = train_test_split(dataset, test_size=test_ratio, shuffle=False) # print "A2", y_train, y_test, y # print X, X_train, X_test # print y, y_train, y_test # ===================================================================================================== # IMPORTANT: The code below modifies the dummy word mappings to be forcing a false positive to be counted. if max_test_accuracy < 1: print("Overwritting Ignored Words...") print(dummy_word_index) if convert_output_to_binary: # print "AA2", y_test # for el in y_test: # print "AA3", el # print convert_to_binary(data=[dummy_word_index], bit_size=bit_size) # break # print pd.DataFrame(y_test).describe() # y_test = np.array([[0 for tmp2 in tmp1] if all(tmp1 == convert_to_binary(data=[dummy_word_index], bit_size=bit_size)[0]) else tmp1 for tmp1 in y_test]) y_test = np.array([[0 for tmp2 in tmp1] if np.array_equal(tmp1, convert_to_binary( data=[dummy_word_index], bit_size=bit_size)[0]) else tmp1 for tmp1 in y_test]) else: y_test = np.array( [[0 for tmp2 in tmp1] if argmax(tmp1) == dummy_word_index else tmp1 for tmp1 in y_test]) print("Overwritting Ignored Words Completted") # ===================================================================================================== # print X_train, y_train model_file_name = NOTEBOOK_PICKLES_DIRECTORY + P_model_name+".h5" if load_existing_pickles and os.path.isfile(model_file_name): model = load_model(model_file_name) train_history = None train_accuracy = -1 # train_history.history['acc'][-1] else: train_history = model.fit(X_train, y_train, epochs=lstm_epochs, verbose=verbosity, shuffle=False, batch_size=lstm_batch_size) model.save(model_file_name) train_accuracy = train_history.history['acc'][-1] if convert_output_to_binary: y_pred = model.predict(X_test) # np.savetxt('y_test1.txt', y_test, delimiter=',') # np.savetxt('y_pred1.txt', y_pred, delimiter=',') y_pred[y_pred >= 0.5] = 1 y_pred[y_pred < 0.5] = 0 # np.savetxt('y_test2.txt', y_test, delimiter=',') # np.savetxt('y_pred2.txt', y_pred, delimiter=',') aaaaa = np.packbits(np.array(y_test, dtype=np.bool).reshape(-1, 2, 8)[:, ::-1]).view(np.uint16) bbbbb = np.packbits(np.array(y_pred, dtype=np.bool).reshape(-1, 2, 8)[:, ::-1]).view(np.uint16) # print "ANGELOS", scenario_name np.savetxt('%s/y_test_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name), aaaaa, delimiter=',', fmt='%10.5f') np.savetxt('%s/y_pred_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name), bbbbb, delimiter=',', fmt='%10.5f') # ================================================================================================================ # Reverse transforms reverse_word_map = dict(map(reversed, tokenizer2.word_index.items())) # Function takes a tokenized sentence and returns the words def sequence_to_text(list_of_indices): # Looking up words in dictionary words = [reverse_word_map.get(letter) for letter in list_of_indices] return (words) # Creating texts original_testing_diffs = list(map(sequence_to_text, [aaaaa])) original_predictions_diffs = list(map(sequence_to_text, [bbbbb])) # print original_testing_diffs[0][:100] np.savetxt('%s/y_test_actual_mem2_%s_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name, unique_key), np.array(original_testing_diffs), delimiter=',\n', fmt='%s') np.savetxt( '%s/y_test_predicted_mem2_%s_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name, unique_key), np.array(original_predictions_diffs), delimiter=',\n', fmt='%s') # print(sum(xxx is not None for xxx in original_testing_diffs)) # return 1 # original_testing_diffs = [(-1 if int(k[0]) == 1 else 1)*int(k[2:]) for k in original_testing_diffs[0] if k is not None] # original_predictions_diffs = [(-1 if int(k[0]) == 1 else 1)*int(k[2:]) for k in original_predictions_diffs[0] if k is not None] # a = [((-1 if int(k[0]) == 1 else 1)*int(k[2:]), (-1 if int(l[0]) == 1 else 1)*int(l[2:])) for k,l in zip(original_testing_diffs, original_predictions_diffs) if l is not None and k is not None] # print a[:100] # return 1 tmp = [((-1 if int(k[0]) == 1 else 1) * int(k[2:]), (-1 if int(l[0]) == 1 else 1) * int( l[2:])) if l is not None and k is not None and l != dummy_word and k != dummy_word else ( None, None) for k, l in zip(original_testing_diffs[0], original_predictions_diffs[0])] original_testing_diffs, original_predictions_diffs = zip(*tmp) # print original_testing_diffs[:100] # print list(y_test_raw)[:101] # print difference16(data=list(y_test_raw), lag=1, prune_lsb=False, prune_length=0)[:101] i = 0 actual_memory_address = [] predicted_memory_address = [] tmp = list(y_test_raw) for act, pred in zip(original_testing_diffs, original_predictions_diffs): # if i%10000 == 0: # print i # if i < 5: # #print i, int(list(y_test_raw)[i+1], 16), act, int(list(y_test_raw)[i+1], 16) + act # print hex(int(list(y_test_raw)[i+1], 16) + act), hex(int(list(y_test_raw)[i+1], 16) + pred) actual_memory_address.append(hex(int(tmp[i + 1], 16) + act) if act is not None else "-1") predicted_memory_address.append(hex(int(tmp[i + 1], 16) + pred) if pred is not None else "-1") i += 1 np.savetxt('%s/y_test_actual_mem_%s_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name, unique_key), np.array(actual_memory_address), delimiter=',', fmt='%s') np.savetxt( '%s/y_test_predicted_mem_%s_%s.txt' % (NOTEBOOK_DATA_DIRECTORY, scenario_name, unique_key), np.array(predicted_memory_address), delimiter=',', fmt='%s') # print actual_memory_address[:100] # print predicted_memory_address[:100] # ================================================================================================================ # accuracy = accuracy_score(np.array(actual_memory_address), np.array(predicted_memory_address)) accuracy = accuracy_score(np.array(y_test), np.array(y_pred)) test_history = [0, accuracy] # for backwards compatibility misc_stats['execution_time'] = time.time() - start_time misc_stats['vocab_size_raw'] = vocab_size_raw misc_stats['final_vocab_size'] = final_vocab_size misc_stats['params'] = model.count_params() misc_stats['max_test_accuracy'] = max_test_accuracy np.savetxt('%s/accuracy_%s_%s.txt' % (NOTEBOOK_REPORT_DIRECTORY, scenario_name, unique_key), np.array([accuracy]), delimiter=',', fmt='%10.5f') plot_train_test_model_performance(train_history, test_history, app_name=app_name, scenario_name=scenario_name) print("Train Accuracy %f, Test Accuracy %f" % (train_accuracy, accuracy)) return train_accuracy, accuracy, misc_stats
def Selector(dataframe,Threshold,target,Corr_Thresh,split,timesteps): data = DataProcessor(dataframe,Threshold,target,Corr_Thresh) trainin_limit = split training_upbound = split*data.shape[0] training_upbound = math.ceil(training_upbound) target = list(data.columns).index(target) lookback = timesteps features = data.shape[1] Model_Array=dict() #Train data and scaling training_data = data.iloc[:training_upbound,:] test_data = data.iloc[training_upbound+1:,:] sc = MinMaxScaler(feature_range=(0,1)) sc_predict = MinMaxScaler(feature_range=(0,1)) training_data_scaled = sc.fit_transform(training_data) training_target_scaled = sc_predict.fit_transform(training_data.iloc[:,target].values.reshape(-1,1)) X_train = [] Y_train = [] for i in range(lookback,training_data.shape[0]): X_train.append(training_data_scaled[i-lookback:i,:]) Y_train.append(training_data_scaled[i,target]) X_train,Y_train = np.array(X_train),np.array(Y_train) #Test Data and scaling dataset_total = pd.DataFrame() #emty dataframe dataset_total = training_data.iloc[-lookback:,:] dataset_total = pd.concat([dataset_total ,data.iloc[training_upbound+1:,:]],axis=0) inp = dataset_total.copy() inp = sc.transform(inp) X_test = [] Y_test = [] for i in range(lookback,dataset_total.shape[0]): X_test.append(inp[i-lookback:i,:]) Y_test.append(inp[i,target]) X_test,Y_test = np.array(X_test),np.array(Y_test) print("LSTM is being trained and tested now\n") #LSTM training structure LSTM = Sequential() LSTM.add(CuDNNLSTM(units = 200,input_shape=(lookback,features))) LSTM.add(Dense(units=1 , activation = 'linear')) LSTM.compile(optimizer='adadelta',loss="mean_absolute_error") LSTM.fit(X_train,Y_train,epochs=500,batch_size=16,verbose=1) Model_Array['LSTM']= LSTM.evaluate(X_test,Y_test) predicted_LSTM = LSTM.predict(X_test) predicted_LSTM = sc_predict.inverse_transform(predicted_LSTM) print("CNN is being trained and tested now\n") #CNN CNN = Sequential() CNN.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(lookback, features))) CNN.add(Conv1D(filters=128,kernel_size=2,activation='relu')) CNN.add(MaxPooling1D(2)) CNN.add(Conv1D(filters=128,kernel_size=1,activation='relu')) CNN.add(Conv1D(filters=128,kernel_size=1,activation='relu')) CNN.add(Flatten()) CNN.add(Dense(50, activation='relu')) CNN.add(Dense(1)) CNN.compile(optimizer='adam', loss='mae') CNN.fit(X_train,Y_train,epochs=500,verbose=1,batch_size=16) Model_Array['CNN'] = CNN.evaluate(X_test,Y_test) predicted_CNN = CNN.predict(X_test) predicted_CNN = sc_predict.inverse_transform(predicted_CNN) def generator(): gen = Sequential() gen.add(CuDNNLSTM(200,input_shape=(lookback,features))) gen.add(Dense(1,activation='linear')) return gen def new_generator(): gcnn = Sequential() gcnn.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(lookback, features))) gcnn.add(Conv1D(filters=128,kernel_size=2,activation='relu')) gcnn.add(MaxPooling1D(2)) gcnn.add(Conv1D(filters=128,kernel_size=1,activation='relu')) gcnn.add(Conv1D(filters=128,kernel_size=1,activation='relu')) gcnn.add(Flatten()) gcnn.add(Dense(50, activation='relu')) gcnn.add(Dense(1)) return gcnn def discriminator(): model = Sequential() model.add(Dense((10), input_shape=(1,))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(int((10) / 2))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='linear')) return model def stacked_generator_discriminator(D,G): D.trainable = False model = Sequential() model.add(G) model.add(D) return model Generator = generator() Generator.compile(loss='mae',optimizer="adam") Generator_CNN = new_generator() Generator_CNN.compile(loss='mae',optimizer="adam") Discriminator = discriminator() Discriminator.compile(loss='mse',optimizer="adam") stacked = stacked_generator_discriminator(Discriminator,Generator) stacked.compile(loss='mae',optimizer='adam') stacked_CNN = stacked_generator_discriminator(Discriminator,Generator_CNN) stacked_CNN.compile(loss='mae',optimizer='adam') epochs = 6000 batch =16 PYTHONHASHSEED=0 np.random.seed=1 print("GAN - LSTM is being Trained and Tested now\n") for count in range(epochs): random_index = np.random.randint(0,len(X_train)-batch/2) gen_data = Generator.predict(X_train[random_index:random_index+batch//2]) gen_data = gen_data.reshape((batch//2,)) x_combined_batch = np.concatenate((Y_train[random_index:random_index+batch//2], gen_data)) y_combined_batch = np.concatenate((Y_train[random_index:random_index+batch//2],gen_data)) d_loss= Discriminator.train_on_batch(x_combined_batch,y_combined_batch) g_loss = stacked.train_on_batch(X_train[random_index:random_index+batch],Y_train[random_index:random_index+batch]) logger.info('epoch: {}, [Discriminator: {}], [Generator: {}]'.format(count,d_loss,g_loss)) Model_Array['GAN-LSTM']= Generator.evaluate(X_test,Y_test) predicted_GAN = Generator.predict(X_test) predicted_GAN = sc_predict.inverse_transform(predicted_GAN) print("GAN - LSTM is being Trained and Tested now\n") epochs = 6000 batch =16 PYTHONHASHSEED=0 np.random.seed=1 for count in range(epochs): random_index = np.random.randint(0,len(X_train)-batch/2) gen_data = Generator_CNN.predict(X_train[random_index:random_index+batch//2]) gen_data = gen_data.reshape((batch//2,)) x_combined_batch = np.concatenate((Y_train[random_index:random_index+batch//2], gen_data)) y_combined_batch = np.concatenate((Y_train[random_index:random_index+batch//2],gen_data)) d_loss= Discriminator.train_on_batch(x_combined_batch,y_combined_batch) g_loss = stacked_CNN.train_on_batch(X_train[random_index:random_index+batch],Y_train[random_index:random_index+batch]) logger.info('epoch: {}, [Discriminator: {}], [Generator: {}]'.format(count,d_loss,g_loss)) Model_Array['GAN-CNN']= Generator_CNN.evaluate(X_test,Y_test) predicted_GAN_CNN = Generator_CNN.predict(X_test) predicted_GAN_CNN = sc_predict.inverse_transform(predicted_GAN_CNN) print(Model_Array) best_model = min(Model_Array, key=Model_Array.get) print("\n") print("#############################################") print("Best Model with the Current Data -->",best_model) return best_model
X = X / float(n_vocab) # One hot encode the output targets : y = np_utils.to_categorical(data_y) # ============================================================================= # Define LSTM parameters # NOTE -- IF YOU DO NOT HAVE TENSORFLOW-GPU installed with CUDA this will fail # ============================================================================= LSTM_layer_num = 4 # number of LSTM layers layer_size = [256, 256, 256, 256] # number of nodes in each layer model = Sequential() model.add( CuDNNLSTM(layer_size[0], input_shape=(X.shape[1], X.shape[2]), return_sequences=True)) for i in range(1, LSTM_layer_num): model.add(CuDNNLSTM(layer_size[i], return_sequences=True)) model.add(Flatten()) model.add(Dense(y.shape[1])) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') model.summary() # ============================================================================= # Set up checkpoints at each Epoch save out any improecment to the loss function
def DeepConvLSTM_Model(x_train, y_train, x_val, y_val, x_test, labels, lr=0.01, batch_size=20000, epochs=100, model_filename="model_deepConvLSTM.h5", submit_filename="submission_deepConvLSTM.csv"): print( "-------------------------------------------------------------------------------------\n" ) print("[+] DeepConvLSTM Model.\n") x_train = np.reshape(x_train, (x_train.shape[0], img_width, img_height)) x_val = np.reshape(x_val, (x_val.shape[0], img_width, img_height)) x_test = np.reshape(x_test, (x_test.shape[0], img_width, img_height)) model = Sequential() model.add( Conv1D(32, kernel_size=5, activation='relu', padding='same', kernel_initializer='normal', input_shape=(x_train.shape[1], x_train.shape[2]))) model.add(BatchNormalization()) #model.add(Dropout(0.25)) model.add( Conv1D(32, kernel_size=5, activation='relu', padding='same', kernel_initializer='normal')) model.add(BatchNormalization()) #model.add(Dropout(0.25)) model.add( Conv1D(32, kernel_size=5, activation='relu', padding='same', kernel_initializer='normal')) model.add(BatchNormalization()) #model.add(Dropout(0.25)) #model.add(CuDNNLSTM(256, return_sequences=True)) model.add(CuDNNLSTM(256, kernel_initializer='normal')) model.add(Dense(256, activation='relu', kernel_initializer='normal')) #model.add(Dropout(0.25)) #model.add(Dense(256, activation='relu', kernel_initializer='normal')) #model.add(Dropout(0.25)) model.add(Dense(labels, activation='softmax', kernel_initializer='normal')) model.compile(optimizer=Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) #earlystop = EarlyStopping(monitor='val_loss', mode='auto', patience=10, verbose=1) train_history = model.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), epochs=epochs, batch_size=batch_size) #, #callbacks=[earlystop]) #model.save(model_filename) #y_pred = model.predict(x_test, batch_size=batch_size, verbose=1) #_Submission(y_pred, submit_filename) return train_history
def define_keras_rnn_model(layer_type, bidirectional, rnn_size, feature_dim, output_dim): import keras from keras import backend as K from keras.models import Model as KerasModel from keras.models import Sequential from keras.layers import LSTM, Input, Lambda, Bidirectional, CuDNNLSTM, Dropout, TimeDistributed, Dense, SimpleRNN, GRU from keras.layers import CuDNNLSTM, CuDNNGRU from keras.activations import relu from keras.utils import multi_gpu_model from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard if layer_type not in ['simplernn', 'lstm', 'gru', 'cudnnlstm', 'cudnngru']: return None # define CTC function for Keras implementation def ctc_lambda_func(args): y_pred, labels, input_seq_len, label_seq_len = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: y_pred = y_pred[:, 2:, :] ret = K.ctc_batch_cost(labels, y_pred, input_seq_len - 2, label_seq_len) return ret # start building the Keras model input_data = Input(name='the_input', shape=(None, feature_dim)) if layer_type == 'simplernn': if bidirectional: out_layer1 = Bidirectional(SimpleRNN( rnn_size, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, return_sequences=True, return_state=False, stateful=False, unroll=False, name='birnn1'), merge_mode='concat')(input_data) out_layer2 = Bidirectional(SimpleRNN( rnn_size, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, return_sequences=True, return_state=False, stateful=False, unroll=False, name='birnn2'), merge_mode='concat')(out_layer1) else: out_layer1 = SimpleRNN(rnn_size, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, name='birnn1')(input_data) out_layer2 = SimpleRNN(rnn_size, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, name='birnn2')(out_layer1) elif layer_type == 'lstm': if bidirectional: out_layer1 = Bidirectional(LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', bias_initializer='RandomNormal', unit_forget_bias=False, activation='tanh', recurrent_activation='sigmoid', name='birnn1'), merge_mode='concat')(input_data) out_layer2 = Bidirectional(LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', bias_initializer='RandomNormal', unit_forget_bias=False, activation='tanh', recurrent_activation='sigmoid', name='birnn2'), merge_mode='concat')(out_layer1) else: out_layer1 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', bias_initializer='RandomNormal', unit_forget_bias=False, activation='tanh', recurrent_activation='sigmoid', name='birnn1')(input_data) out_layer2 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', bias_initializer='RandomNormal', unit_forget_bias=False, activation='tanh', recurrent_activation='sigmoid', name='birnn2')(out_layer1) elif layer_type == 'cudnnlstm': if bidirectional: out_layer1 = Bidirectional(CuDNNLSTM( rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', unit_forget_bias=True, return_sequences=True, return_state=False, stateful=False, name='birnn1'), merge_mode='concat')(input_data) out_layer2 = Bidirectional(CuDNNLSTM( rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', unit_forget_bias=True, return_sequences=True, return_state=False, stateful=False, name='birnn2'), merge_mode='concat')(out_layer1) else: out_layer1 = CuDNNLSTM(rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', unit_forget_bias=True, return_sequences=True, return_state=False, stateful=False, name='birnn1')(input_data) out_layer2 = CuDNNLSTM(rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', unit_forget_bias=True, return_sequences=True, return_state=False, stateful=False, name='birnn2')(out_layer1) elif layer_type == 'gru': if bidirectional: out_layer1 = Bidirectional( GRU( rnn_size, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, # set to True for CUDNN implementation name='birnn1'), merge_mode='concat')(input_data) out_layer2 = Bidirectional( GRU( rnn_size, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, # set to True for CUDNN implementation name='birnn2'), merge_mode='concat')(out_layer1) else: out_layer1 = GRU( rnn_size, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, # set to True for CUDNN implementation name='birnn1')(input_data) out_layer2 = GRU( rnn_size, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='RandomNormal', dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=True, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, # set to True for CUDNN implementation name='birnn2')(out_layer1) elif layer_type == 'cudnngru': if bidirectional: out_layer1 = Bidirectional(CuDNNGRU( rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True, return_state=False, stateful=False, name='birnn1'), merge_mode='concat')(input_data) out_layer2 = Bidirectional(CuDNNGRU( rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True, return_state=False, stateful=False, name='birnn2'), merge_mode='concat')(out_layer1) else: out_layer1 = CuDNNGRU(rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True, return_state=False, stateful=False, name='birnn1')(input_data) out_layer2 = CuDNNGRU(rnn_size, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True, return_state=False, stateful=False, name='birnn2')(out_layer1) y_pred = TimeDistributed( Dense( output_dim, name="y_pred", kernel_initializer='he_normal', bias_initializer='RandomNormal', # zeros activation="softmax"), name="out")(out_layer2) # Input of labels and other CTC requirements labels = Input(name='the_labels', shape=[ None, ], dtype='int32') input_length = Input(name='input_length', shape=[1], dtype='int32') label_length = Input(name='label_length', shape=[1], dtype='int32') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) model = KerasModel(inputs=[input_data, labels, input_length, label_length], outputs=[loss_out]) adam = keras.optimizers.Adam(lr=1e-3, clipvalue=10000, clipnorm=5., epsilon=1e-8) model.compile(optimizer=adam, loss={'ctc': lambda y_true, y_pred: y_pred}) return model
name='particles_input_bnorm')(input_particles) h = Conv1D(32, 2, activation='relu', name='particles_conv0', kernel_initializer='lecun_uniform', padding='same')(h) h = BatchNormalization(momentum=0.6, name='particles_conv0_bnorm')(h) h = Conv1D(16, 4, activation='relu', name='particles_conv1', kernel_initializer='lecun_uniform', padding='same')(h) h = BatchNormalization(momentum=0.6, name='particles_conv1_bnorm')(h) h = CuDNNLSTM(100, name='particles_lstm')(h) h = Dropout(0.1)(h) h = BatchNormalization(momentum=0.6, name='particles_lstm_norm')(h) h = Dense(100, activation='relu', name='particles_lstm_dense', kernel_initializer='lecun_uniform')(h) particles_final = BatchNormalization(momentum=0.6, name='particles_lstm_dense_norm')(h) # merge everything to_merge = [particles_final, input_mass, input_pt] h = concatenate(to_merge) for i in xrange(1, 5): h = Dense(50, activation='relu', name='final_dense%i' % i)(h)
def sequence_attention_model(opt): ''' implementation of sequence attention (Read2Phenotype) model ''' # Define model X = Input(shape=(opt.SEQLEN, opt.BASENUM)) ## CONV Layers # no cnn if opt.if_cnn == 0: X_cnn = X # cnn + res_net else: X_cnn = X # cnn for i in range(opt.n_cnn_layer): X_cnn = conv_net_block(X_cnn, opt.n_cnn_filters, opt.cnn_window, 'convblock_{}'.format(str(i))) # res_net for i in range(opt.n_cnn_layer): X_cnn = res_net_block(X_cnn, opt.n_cnn_filters, opt.cnn_window, 'resblock_{}'.format(str(i))) ## RNN Layers if opt.if_lstm == 0: H_lstm = X_cnn elif opt.if_lstm == 1: if opt.device == "gpu": H_lstm = CuDNNLSTM(opt.n_lstm_node, return_sequences=True, name='LSTM')(X_cnn) else: H_lstm = LSTM(opt.n_lstm_node, return_sequences=True, name='LSTM')(X_cnn) else: if opt.device == "gpu": H_lstm = Bidirectional(CuDNNLSTM(opt.n_lstm_node, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'), merge_mode='sum', name='LSTM')(X_cnn) else: H_lstm = Bidirectional(LSTM(opt.n_lstm_node, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'), merge_mode='sum', name='LSTM')(X_cnn) H_lstm = Activation('tanh')(H_lstm) ## ATT Layers r_emb = attention_layer(H_lstm, opt.att_n_layer, opt.att_n_node, block_name='att') # additional fully connected r_emb = fully_connected(r_emb, opt.fc_n_layer, opt.fc_n_node, opt.drop_out_rate, block_name='fc') if opt.Ty == 2: out = Dense(1, activation='sigmoid', name='final_dense')(r_emb) model = Model(inputs=X, outputs=out) # Compile model model.compile(optimizer=Adam(lr=opt.opt_lr, beta_1=0.9, beta_2=0.999, decay=opt.opt_decay), metrics=['accuracy'], loss='binary_crossentropy') else: out = Dense(opt.Ty, activation='softmax', name='final_dense')(r_emb) model = Model(inputs=X, outputs=out) # Compile model model.compile(optimizer=Adam(lr=opt.opt_lr, beta_1=0.9, beta_2=0.999, decay=opt.opt_decay), metrics=['accuracy'], loss='categorical_crossentropy') return model
def create_model(max_story_len, max_question_len, vocab_size): # initialise input_sequence and question input layers input_sequence = Input((max_story_len, )) question = Input((max_question_len, )) # Input gets embedded to a sequence of vectors # Input Encoder m input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(0.3)) # This encoder will output: # (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen # Input Encoder c input_encoder_c = Sequential() input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=max_question_len)) input_encoder_c.add(Dropout(0.3)) # output: (samples, story_maxlen, query_maxlen) # Question Encoder # embed the question into sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=max_question_len)) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # Encode input sequence and questions to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # pi = Softmax(uTmi) # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # o=Sum(pici) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute( (2, 1))(response) # (samples, query_maxlen, story_maxlen) # ^a = Softmax(W(o + u)) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # Reduce with LSTM answer = CuDNNLSTM(32)(answer) # (samples, 32) # Regularization with Dropout answer = Dropout(0.5)(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) return model
def get_mobilenet_segm_depth(seq_length, sine_steering, freeze, asd): hlc_input = Input(shape=(seq_length, 4), name="hlc_input") info_input = Input(shape=(seq_length, 3), name="info_input") segmentation_model = get_segmentation_model(freeze) [_, height, width, _] = segmentation_model.input.shape.dims forward_image_input = Input(shape=(seq_length, height.value, width.value, 3), name="forward_image_input") segmentation_output = TimeDistributed(segmentation_model)( forward_image_input) # Vanilla encoder kernel = 3 filter_size = 64 pad = 1 pool_size = 2 x = segmentation_output x = TimeDistributed(ZeroPadding2D((pad, pad)))(x) x = TimeDistributed(Conv2D(filter_size, (kernel, kernel), padding='valid'))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Activation('relu'))(x) x = TimeDistributed(MaxPooling2D((pool_size, pool_size)))(x) x = TimeDistributed(ZeroPadding2D((pad, pad)))(x) x = TimeDistributed(Conv2D(128, (kernel, kernel), padding='valid'))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Activation('relu'))(x) x = TimeDistributed(MaxPooling2D((pool_size, pool_size)))(x) for _ in range(3): x = TimeDistributed(ZeroPadding2D((pad, pad)))(x) x = TimeDistributed(Conv2D(256, (kernel, kernel), padding='valid'))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Activation('relu'))(x) x = TimeDistributed(MaxPooling2D((pool_size, pool_size)))(x) segmentation_output = x segmentation_output = TimeDistributed(Flatten())(segmentation_output) x = concatenate([segmentation_output, hlc_input, info_input]) # x = Dropout(0.2)(x) x = TimeDistributed(Dense(100, activation="relu"))(x) x = concatenate([x, hlc_input]) x = CuDNNLSTM(10, return_sequences=False)(x) hlc_latest = Lambda(lambda x: x[:, -1, :])(hlc_input) x = concatenate([x, hlc_latest]) if sine_steering: steer_pred = Dense(10, activation="tanh", name="steer_pred")(x) else: steer_pred = Dense(1, activation="relu", name="steer_pred")(x) target_speed_pred = Dense(1, name="target_speed_pred", activation="sigmoid")(x) model = Model(inputs=[forward_image_input, hlc_input, info_input], outputs=[steer_pred, target_speed_pred]) model.summary() return model
def Trainer(token, dataframe, Threshold, target, Corr_Thresh, timesteps): data = DataProcessor(dataframe, Threshold, target, Corr_Thresh) trainin_limit = 1 target = list(data.columns).index(target) lookback = timesteps features = data.shape[1] training_data = data.iloc[:, :] sc = MinMaxScaler(feature_range=(0, 1)) sc_predict = MinMaxScaler(feature_range=(0, 1)) training_data_scaled = sc.fit_transform(training_data) training_target_scaled = sc_predict.fit_transform( training_data.iloc[:, target].values.reshape(-1, 1)) X_train = [] Y_train = [] for i in range(lookback, training_data.shape[0]): X_train.append(training_data_scaled[i - lookback:i, :]) Y_train.append(training_data_scaled[i, target]) X_train, Y_train = np.array(X_train), np.array(Y_train) if (token == 'LSTM'): print("Token is", token, "and now commencing training on the dataset... \n") #LSTM training structure LSTM = Sequential() LSTM.add(CuDNNLSTM(units=200, input_shape=(lookback, features))) LSTM.add(Dense(units=1, activation='linear')) LSTM.compile(optimizer='adadelta', loss="mean_absolute_error") LSTM.fit(X_train, Y_train, epochs=500, batch_size=16, verbose=1) elif (token == 'CNN'): print("Token is", token, "and now commencing training...") CNN = Sequential() CNN.add( Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(lookback, features))) CNN.add(Conv1D(filters=128, kernel_size=2, activation='relu')) CNN.add(MaxPooling1D(2)) CNN.add(Conv1D(filters=128, kernel_size=1, activation='relu')) CNN.add(Conv1D(filters=128, kernel_size=1, activation='relu')) CNN.add(Flatten()) CNN.add(Dense(50, activation='relu')) CNN.add(Dense(1)) CNN.compile(optimizer='adam', loss='mae') CNN.fit(X_train, Y_train, epochs=500, verbose=1, batch_size=16) elif (token == 'GAN-LSTM'): print("Token is", token, "and now commencing training...") def generator(): gen = Sequential() gen.add(CuDNNLSTM(200, input_shape=(lookback, features))) gen.add(Dense(1, activation='linear')) return gen def discriminator(): model = Sequential() model.add(Dense((10), input_shape=(1, ))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(int((10) / 2))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='linear')) return model def stacked_generator_discriminator(D, G): D.trainable = False model = Sequential() model.add(G) model.add(D) return model Generator = generator() Generator.compile(loss='mae', optimizer="adam") Discriminator = discriminator() Discriminator.compile(loss='mse', optimizer="adam") stacked = stacked_generator_discriminator(Discriminator, Generator) stacked.compile(loss='mae', optimizer='adam') epochs = 6000 batch = 16 PYTHONHASHSEED = 0 np.random.seed = 1 for count in range(epochs): random_index = np.random.randint(0, len(X_train) - batch / 2) gen_data = Generator.predict(X_train[random_index:random_index + batch // 2]) gen_data = gen_data.reshape((batch // 2, )) x_combined_batch = np.concatenate( (Y_train[random_index:random_index + batch // 2], gen_data)) y_combined_batch = np.concatenate( (Y_train[random_index:random_index + batch // 2], gen_data)) d_loss = Discriminator.train_on_batch(x_combined_batch, y_combined_batch) g_loss = stacked.train_on_batch( X_train[random_index:random_index + batch], Y_train[random_index:random_index + batch]) logger.info( 'epoch: {}, [Discriminator: {}], [Generator: {}]'.format( count, d_loss, g_loss)) elif (token == 'GAN-CNN'): print("Token is", token, "and now commencing training...") def generator(): gen = Sequential() gen.add( Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(lookback, features))) gen.add(Conv1D(filters=128, kernel_size=2, activation='relu')) gen.add(MaxPooling1D(2)) gen.add(Conv1D(filters=128, kernel_size=1, activation='relu')) gen.add(Conv1D(filters=128, kernel_size=1, activation='relu')) gen.add(Flatten()) gen.add(Dense(50, activation='relu')) gen.add(Dense(1)) return gen def discriminator(): model = Sequential() model.add(Dense((10), input_shape=(1, ))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(int((10) / 2))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='linear')) return model def stacked_generator_discriminator(D, G): D.trainable = False model = Sequential() model.add(G) model.add(D) return model Generator = generator() Generator.compile(loss='mae', optimizer="adam") Discriminator = discriminator() Discriminator.compile(loss='mse', optimizer="adam") stacked = stacked_generator_discriminator(Discriminator, Generator) stacked.compile(loss='mae', optimizer='adam') epochs = 6000 batch = 16 PYTHONHASHSEED = 0 np.random.seed = 1 for count in range(epochs): random_index = np.random.randint(0, len(X_train) - batch / 2) gen_data = Generator.predict(X_train[random_index:random_index + batch // 2]) gen_data = gen_data.reshape((batch // 2, )) x_combined_batch = np.concatenate( (Y_train[random_index:random_index + batch // 2], gen_data)) y_combined_batch = np.concatenate( (Y_train[random_index:random_index + batch // 2], gen_data)) d_loss = Discriminator.train_on_batch(x_combined_batch, y_combined_batch) g_loss = stacked.train_on_batch( X_train[random_index:random_index + batch], Y_train[random_index:random_index + batch]) logger.info( 'epoch: {}, [Discriminator: {}], [Generator: {}]'.format( count, d_loss, g_loss))
from typing import List
def generator(): gen = Sequential() gen.add(CuDNNLSTM(200, input_shape=(lookback, features))) gen.add(Dense(1, activation='linear')) return gen
y_test = y_data_new[len(train_indices[0]):] X_pred = X_pred_map y_pred_ref = y_pred_ref_map data_dim = X_train.shape[2] # number of input features timesteps = X_train.shape[1] num_classes = y_train.shape[2] # number of output features batch_size = 10 rms = RMSprop(lr=0.001, decay=0.0001) adam = Adam(lr=0.001, decay=0.0001) model = Sequential() model.add( CuDNNLSTM(100, return_sequences=True, stateful=False, input_shape=(None, data_dim))) model.add(Activation('relu')) # model.add(Dropout(0.2)) model.add(CuDNNLSTM(100, return_sequences=True, stateful=False)) model.add(Activation('relu')) # model.add(Dropout(0.2)) model.add(Dense(100)) # model.add(Activation('relu')) model.add(Dense(num_classes)) model.summary() model.compile( loss= 'mean_squared_error', # categorical_crossentropy, mean_squared_error, mean_absolute_error optimizer=
u_map = Reshape((7 * 7, 512))(u) h_0 = Dense(hid_dim)(u) cell_0 = Dense(hid_dim)(u) y = Input(shape=(None, ), dtype='int32') y_in = Lambda(lambda x: x[:, :-1])(y) y_out = Lambda(lambda x: x[:, 1:])(y) mask = Lambda(lambda x: K.cast(K.not_equal(x, w2i['<pad>']), 'float32'))(y_out) embedding = Embedding(vocab_size, emb_dim) # lstm = LSTM(hid_dim, return_sequences=True, return_state=True) lstm = Bidirectional( LSTM(hid_dim, return_sequences=True, dropout=0.25, recurrent_dropout=0.1)) lstm2 = CuDNNLSTM(hid_dim, return_sequences=True, return_state=True) y_emb = embedding(y_in) y_emb = Dropout(0.5)(y_emb) h = lstm(y_emb) y_emb = Dropout(0.5)(h) h, _, _ = lstm2(h, initial_state=[h_0, cell_0]) # h, _, _ = CuDNNLSTM(hid_dim, return_sequences=True, return_state=True)(y_emb, initial_state=[h_0, cell_0]) # x = Bidirectional(LSTM(100, return_sequences=True, dropout=0.25, recurrent_dropout=0.1))(x) h = Activation('tanh')(h) ### Attention ### dense_att = Dense(hid_dim) _u_map = dense_att(u_map)
print(coord) sys.exit(1) ''' model = Sequential() # model.add(BatchNormalization()) # model.add(Dense(outputs, input_shape=(time_window + 1, feature_count))) # model.add(LSTM(units=256, input_shape=(time_window + 1, feature_count), return_sequences=True)) model.add(BatchNormalization(input_shape=(time_window + 1, feature_count))) model.add(Dropout(0.2)) model.add( CuDNNLSTM(units=256, input_shape=(time_window + 1, feature_count), return_sequences=False)) model.add(Dropout(0.5)) model.add(Dense(outputs)) # model.add(CuDNNLSTM(units=outputs, return_sequences=False)) # model.add(CuDNNGRU(units=outputs, input_shape=(time_window + 1, feature_count), return_sequences=False)) # model.add(LSTM(units=outputs)) # model.add(Dense(outputs)) # model.add(Activation('softmax')) # opt = RMSprop(0.001) opt = SGD() model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy']) date = str(datetime.datetime.now().isoformat())
X_test = X[test_idx] y_test = ratings[test_idx] val_ratio = 0.1 print 'Training data size: {}'.format(X_train.shape) print 'Test data size: {}'.format(X_test.shape) print 'Validation ratio: {} % of training data'.format(val_ratio*100) vocab_size = 5000 embedding_size = 32 # define model model = Sequential() model.add(Embedding(vocab_size, embedding_size, input_length=max_review_length)) model.add(CuDNNLSTM(128)) model.add(Dense(1, activation=None)) optim = optimizers.Adam(lr=0.001, decay=0.001) model.compile(loss='mse', optimizer='adam', metrics = ['mse']) tensorboard = TensorBoard(log_dir='./logs', write_graph=True) earlystopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')
description='description', # 引数のヘルプの前に表示 add_help=True, # -h/–help オプションの追加 ) # 引数の追加 parser.add_argument('-l', '--lstm', help='select lstm model', default=True) parser.add_argument('-m', '--mlp', help='select mlp model', default=False) # 引数を解析する args = parser.parse_args() # Next, we build a model. print('Build model...') if args.lstm: model = Sequential() model.add(CuDNNLSTM(30, input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(18)) model.add(LeakyReLU(alpha=0.3)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(nb_actions)) model.add(Activation('linear')) print('load model...') model.load_weights( '/home/farmhouse/bitmex/bitcoin_fx_bot/weights/dqn_lstm_ccxt_bitmex-v0_weights_2018_8_14_12_45.h5f' ) if args.mlp: model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(49)) model.add(LeakyReLU(alpha=0.3)) model.add(BatchNormalization(momentum=0.8))
lf = 'binary_crossentropy' ep = 1000 bs = 32 val_split = 0.2 es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None, restore_best_weights=True) # Construction layers: classifier = Sequential() classifier.add(InputLayer(input_shape=(lookback, 1))) classifier.add(CuDNNLSTM(units=input_units, return_sequences=False)) classifier.add(Dropout(dp)) classifier.add(Dense(units=output_units, activation=act)) classifier.compile(optimizer=opt, loss=lf) #------------ Loop over 19 study periods ------------- for i in range(0, len(return_window)): # Determine which stocks are eligleble for the study period vec0 = list(list(np.where(binary_matrix[(749 + i * test), :] == 1))[0]) vec = [] for u in vec0: if (all(np.isnan(return_window[i, 0:750, u])) == False and all( np.isnan(return_window[i, 750:1000, u])) == False) == True: vec.append(u)
units= 50 second_units = 30 batch_size = 8 nb_features = datas.shape[2] epochs = 50 output_size=16 reg = 1 output_file_name='bitcoin2015to2017_close_LSTM_1_tanh_leaky_areg_l1_'+ str(reg) #split training validation training_size = int(0.8* datas.shape[0]) training_datas = datas[:training_size,:] training_labels = labels[:training_size,:,0] validation_datas = datas[training_size:,:] validation_labels = labels[training_size:,:,0] #build model model = Sequential() model.add(CuDNNLSTM(units=units, activity_regularizer=regularizers.l1(reg), input_shape=(step_size,nb_features),return_sequences=False)) model.add(Activation('tanh')) model.add(Dropout(0.2)) model.add(Dense(output_size)) model.add(LeakyReLU()) model.compile(loss='mse', optimizer='adam') model.fit(training_datas, training_labels, batch_size=batch_size,validation_data=(validation_datas,validation_labels), epochs = epochs, callbacks=[CSVLogger(output_file_name+'.csv', append=True)]) # model.fit(datas,labels) #model.save(output_file_name+'.h5')
def compile_elmo(self, print_summary=False): if self.parameters['token_encoding'] == 'word': word_inputs = Input(shape=(None, ), name='word_indices', dtype='int32') embeddings = Embedding(self.parameters['vocab_size'], self.parameters['hidden_units_size'], trainable=True, name='token_encoding') inputs = embeddings(word_inputs) drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') elif self.parameters['token_encoding'] == 'char': word_inputs = Input(shape=( None, self.parameters['token_maxlen'], ), dtype='int32', name='char_indices') inputs = self.char_level_token_encoder()(word_inputs) drop_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(inputs) lstm_inputs = TimestepDropout( self.parameters['word_dropout_rate'])(drop_inputs) next_ids = Input(shape=(None, 1), name='next_ids', dtype='float32') previous_ids = Input(shape=(None, 1), name='previous_ids', dtype='float32') re_lstm_inputs = Lambda(function=ELMo_obj.reverse)(lstm_inputs) mask = Lambda(function=ELMo_obj.reverse)(drop_inputs) for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) else: lstm = LSTM(units=self.parameters['lstm_units_size'], return_sequences=True, activation="tanh", recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(lstm_inputs) lstm = Camouflage(mask_value=0)(inputs=[lstm, drop_inputs]) proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(lstm) lstm_inputs = add([proj, lstm_inputs], name='f_block_{}'.format(i + 1)) lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(lstm_inputs) for i in range(self.parameters['n_lstm_layers']): if self.parameters['cuDNN']: re_lstm = CuDNNLSTM( units=self.parameters['lstm_units_size'], return_sequences=True, kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) else: re_lstm = LSTM( units=self.parameters['lstm_units_size'], return_sequences=True, activation='tanh', recurrent_activation='sigmoid', kernel_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']), recurrent_constraint=MinMaxNorm( -1 * self.parameters['cell_clip'], self.parameters['cell_clip']))(re_lstm_inputs) re_lstm = Camouflage(mask_value=0)(inputs=[re_lstm, mask]) re_proj = TimeDistributed( Dense(self.parameters['hidden_units_size'], activation='linear', kernel_constraint=MinMaxNorm( -1 * self.parameters['proj_clip'], self.parameters['proj_clip'])))(re_lstm) re_lstm_inputs = add([re_proj, re_lstm_inputs], name='b_block_{}'.format(i + 1)) re_lstm_inputs = SpatialDropout1D( self.parameters['dropout_rate'])(re_lstm_inputs) re_lstm_inputs = Lambda(function=ELMo_obj.reverse, name="reverse")(re_lstm_inputs) sampled_softmax = SampledSoftmax( num_classes=self.parameters['vocab_size'], num_sampled=int(self.parameters['num_sampled']), tied_to=embeddings if self.parameters['weight_tying'] and self.parameters['token_encoding'] == 'word' else None) outputs = sampled_softmax([lstm_inputs, next_ids]) re_outputs = sampled_softmax([re_lstm_inputs, previous_ids]) self._model = Model(inputs=[word_inputs, next_ids, previous_ids], outputs=[outputs, re_outputs]) # self._model.compile(optimizer=Adagrad(lr=self.parameters['lr'], clipvalue=self.parameters['clip_value']), loss=None) if print_summary: self._model.summary() # self.wrap_multi_elmo_encoder() self._elmo_model = self._model
X = pad_sequences(X, maxlen=util.TWEET_LENGTH) lstm_out = 200 word_index = tokenizer.word_index embedding_matrix, nb_words = util.load_glove_model(word_index) model = Sequential() model.add( Embedding(nb_words, util.EMBEDDING_DIM, input_length=util.TWEET_LENGTH, dropout=0.2, weights=[embedding_matrix], trainable=False)) model.add(Bidirectional(CuDNNLSTM(lstm_out, return_sequences=True))) model.add(Bidirectional(CuDNNLSTM(lstm_out, go_backwards=True))) model.add(Dense(len(data_labels), activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy']) print(model.summary()) Y = data['Sentiment'] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) validation_size = 1500
#print(out.shape) return (out) def compute_output_shape(self, input_shape): assert isinstance(input_shape, list) shape_a, shape_b = input_shape return (shape_a[0], self.output_dim[0], self.output_dim[1]) print("loading embedded model...") with tf.device("/cpu:0"): loaded_model = load_model_from_disk('emb_model.json', 'emb_model.h5') print("embedded model loaded") lstm_layer1 = CuDNNLSTM(100, return_sequences=True) lstm_layer2 = CuDNNLSTM(100, return_sequences=True) #lstm_layer1=LSTM(100,return_sequences=True) #lstm_layer2=LSTM(100,return_sequences=True) matching_layer1 = MatchingLayer((max_length, max_length)) matching_layer2 = MatchingLayer((max_length, max_length)) #aggregate_layer=AggregationLayer() dropout = Dropout(0.2) norm = BatchNormalization() inputs = loaded_model.input outputs = loaded_model.output
'''manually partially connected residual LSTM''' # i1 = dae # o1 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(i1) # i2 = o1 # o2 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(i2) # i3 = Add()([o1, o2]) # o3 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(i3) # i4 = o3 # o4 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(i4) # # dae = Add()([o3, o4]) '''LSTM''' # o1 = (CuDNNLSTM(filter_size, return_sequences=True))(dae) # o2 = (CuDNNLSTM(filter_size, return_sequences=True))(o1) '''bidirectional LSTM''' o1 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(dae) o2 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o1) o2 = Add()([o1, o2]) o2 = Dropout(0.2)(o2) o2 = BatchNormalization()(o2) o3 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o2) o3 = Add()([o1, o2, o3]) o3 = Dropout(0.2)(o3) o3 = BatchNormalization()(o3) # '''attention model''' # o3a = TimeDistributed(Dense(filter_size*2, activation='softmax'))(o3) # o3v = Multiply()([o3a, o1]) o4 = Bidirectional(CuDNNLSTM(filter_size, return_sequences=True))(o3) o4 = Add()([o3, o4])
os.path.join(self.dataset_path, "{}_sum".format(ID) + '.npy')) # Store class y[i] = self.labels[str(ID)] y = keras.utils.to_categorical(y, num_classes=self.n_classes) X = [x_text, x_sum] return X, y # LTSM model architecture # article input model inputs1 = Input(shape=(max_text_length, )) article1 = Embedding(vocalbulary_text, 256)(inputs1) article2 = CuDNNLSTM(1024)(article1) article3 = RepeatVector(config['window'])(article2) # summary input model inputs2 = Input(shape=(config['window'], )) summ1 = Embedding(vocalbulary_summary, 256)(inputs2) summ2 = CuDNNLSTM(1024)(summ1) summ3 = Dense(1024, activation="relu")(summ2) summ4 = Dropout(0.8)(summ3) summ5 = RepeatVector(config['window'])(summ4) # decoder model decoder1 = Concatenate()([article3, summ5]) decoder2 = CuDNNLSTM(1024)(decoder1) decoder3 = Dense(1024, name="dense_two")(decoder2) decoder4 = Dropout(0.8)(decoder3) outputs = Dense(vocalbulary_summary, activation='softmax')(decoder4)