else: embedding_weights[word_placeholder, :] = pos_embeddings_model.get_vec(word_placeholder) except ValueError: pass pos_weights = [embedding_weights] # ========================================================= print('\nBuild model ...') # WORD ------------------------------------- word_inputs = Input(shape=(X_used_row_len,)) word_embedding = Embedding(input_dim=X_word_max_value + 1, output_dim=word_embeddings_dim, input_length=X_used_row_len, weights=word_weights, dropout=0.2, trainable=True, mask_zero=False)(word_inputs) word_encoded = Convolution1D(50, 3, border_mode='valid')(word_embedding) word_encoded = MaxPooling1D(X_used_row_len - 2)(word_encoded) word_encoded = Flatten()(word_encoded) word_c2 = Convolution1D(50, 4, border_mode='valid')(word_embedding) word_c2 = MaxPooling1D(X_used_row_len - 3)(word_c2) word_c2 = Flatten()(word_c2) word_c3 = Convolution1D(50, 5, border_mode='valid')(word_embedding) word_c3 = MaxPooling1D(X_used_row_len - 4)(word_c3) word_c3 = Flatten()(word_c3) word_c4 = Convolution1D(50, 2, border_mode='valid')(word_embedding) word_c4 = MaxPooling1D(X_used_row_len - 1)(word_c4) word_c4 = Flatten()(word_c4)
if model_type == "CNN-static": z = model_input else: z = Embedding(len(vocabulary_inv), embedding_dim, input_length=sequence_length, name="embedding")(model_input) z = Dropout(dropout_prob[0])(z) # Convolutional block conv_blocks = [] for sz in filter_sizes: conv = Convolution1D(filters=num_filters, kernel_size=sz, padding="valid", activation="relu", strides=1)(z) conv = MaxPooling1D(pool_size=2)(conv) conv = Flatten()(conv) conv_blocks.append(conv) z = Concatenate()( conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] z = Dropout(dropout_prob[1])(z) z = Dense(hidden_dims, activation="relu")(z) model_output = Dense(len(label_dict["label2index"]), activation="softmax")(z) model = Model(model_input, model_output) model.compile(loss="categorical_crossentropy",
def __init__(self, dim, batch_norm, dropout, rec_dropout, task, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=376, **kwargs): print("==> not used params in network class:", kwargs.keys()) self.output_dim = dim self.batch_norm = batch_norm self.dropout = dropout self.rec_dropout = rec_dropout self.depth = depth self.dropout_words = 0.3 self.dropout_rnn_U = 0.3 self.drop_conv = 0.5 if task in ['decomp', 'ihm', 'ph']: final_activation = 'sigmoid' elif task in ['los']: if num_classes == 1: final_activation = 'relu' else: final_activation = 'softmax' else: return ValueError("Wrong value for task") # Input layers and masking X = Input(shape=(48, input_dim), name='X') inputs = [X] # emb_text = Dropout(self.dropout_words)(X) nfilters = [2, 3, 4] nb_filters = 100 pooling_reps = [] for i in nfilters: feat_maps = Convolution1D(nb_filter=nb_filters, filter_length=i, border_mode="same", activation="relu", subsample_length=1)(X) pool_vecs = MaxPooling1D(pool_length=2)(feat_maps) # pool_vecs = Flatten()(pool_vecs) pooling_reps.append(pool_vecs) representation = merge(pooling_reps, mode='concat') mX = Masking()(representation) if deep_supervision: M = Input(shape=(None, ), name='M') inputs.append(M) # Configurations is_bidirectional = True if deep_supervision: is_bidirectional = False # Main part of the network for i in range(depth - 1): # num_units = 48 num_units = dim if is_bidirectional: num_units = num_units // 2 lstm = LSTM(num_units, activation='tanh', return_sequences=True, dropout_U=rec_dropout, dropout_W=dropout) if is_bidirectional: mX = Bidirectional(lstm)(mX) else: mX = lstm(mX) # Output module of the network return_sequences = (target_repl or deep_supervision) L = LSTM(dim, activation='tanh', return_sequences=return_sequences, dropout_W=dropout, dropout_U=rec_dropout)(mX) if dropout > 0: L = Dropout(dropout)(L) y = Dense(num_classes, activation=final_activation)(L) outputs = [y] return super(Network, self).__init__(inputs, outputs)
#model.add(Dense(11, input_shape=(16000,))) #model.add(Activation('relu')) # #model.add(Dense(128)) #model.add(Activation('relu')) #model.add(Dense(128)) #model.add(Activation('relu')) # #model.add(Dense(128)) #model.add(Activation('relu')) # #model.add(Dense(11)) #model.add(Activation('softmax')) a = Input(shape=(16000, 1)) b = Convolution1D(filters=128, kernel_size=3, activation='relu')(a) c = Flatten()(b) a1 = Input(shape=(32, 16, 1)) b1 = Convolution2D(129, kernel_size=(3, 3), activation='relu')(a1) c1 = Flatten()(b1) d = keras.layers.concatenate([c, c1]) d = Dense(128, activation='relu')(d) model = Model(inputs=[a, a1], outputs=d) model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') print model.summary() #model.fit(x_train, y_train, batch_size=32, nb_epoch=6, validation_data=(x_test, y_test))
# X_test_r[:, :, 1] = x_test[:, nb_features:2048] X_train_r = np.zeros((len(x_train), nb_features, 3)) # 3通道的输入 X_train_r[:, :, 0] = x_train[:, :nb_features] # 取X_train 的前1024列 X_train_r[:, :, 1] = x_train[:, nb_features:2048] X_train_r[:, :, 2] = x_train[:, 2048:] # reshape validation data X_test_r = np.zeros((len(x_test), nb_features, 3)) X_test_r[:, :, 0] = x_test[:, :nb_features] X_test_r[:, :, 1] = x_test[:, nb_features:2048] X_test_r[:, :, 2] = x_test[:, 2048:] nb_class = 6 model = Sequential() model.add( Convolution1D(nb_filter=8, filter_length=3, strides=1, input_shape=(nb_features, 3), padding="same")) model.add(Activation('relu')) model.add(Convolution1D(nb_filter=8, filter_length=3, padding="same")) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2, strides=2)) model.add( Convolution1D(nb_filter=16, filter_length=3, strides=1, padding="same")) model.add(Activation('relu')) model.add(Convolution1D(nb_filter=16, filter_length=3, padding="same")) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2, strides=2)) model.add(
# graph subnet with one input and one output, # convolutional layers concateneted in parallel """ CNN is built here. you can omit the code from "graph_in" to the "main sequential model" """ """ graph_in is actually the stacks of convolution layers and pooling layers """ graph_in = Input(shape=(sequence_length, embedding_dim)) convs = [] for fsz in filter_sizes: # highly recommand to put Batch Normalization here conv = Convolution1D(nb_filter=num_filters, filter_length=fsz, border_mode='valid', activation='relu', subsample_length=1)(graph_in) pool = MaxPooling1D(pool_length=2)(conv) flatten = Flatten()(pool) convs.append(flatten) if len(filter_sizes) > 1: out = Merge(mode='concat')(convs) else: out = convs[0] graph = Model(input=graph_in, output=out) # main sequential model """
X, invalid = texts_to_word_vec(word2vec, texts_flat) y = np.array(labels_flat) invalid = invalid + np.where(y[:, 0] > len(hist_diff_processed))[0].tolist() # Clear invalid rows, e.g., empty text X = np.delete(X, invalid, axis=0) y = np.delete(y, invalid, axis=0) print("Shapes of X / y: {} / {}".format(X.shape, y.shape)) ## Define model # Convolutional model (3x conv, flatten, 2x dense) print("Define model...") model = Sequential() model.add( Convolution1D(128, WINDOW_SIZE, padding='same', input_shape=(X.shape[1], X.shape[2]))) model.add(Convolution1D(64, 4, padding='same')) model.add(Convolution1D(32, 4, padding='same')) model.add(Convolution1D(16, 4, padding='same')) model.add(Convolution1D(16, 4, padding='same')) model.add(Flatten()) # Defines fraction of input units to drop model.add(Dropout(0.2)) model.add(Dense(180, activation='tanh')) model.add(Dropout(0.2)) model.add(Dense(90, activation='tanh')) model.add(Dense(7, activation='linear')) print("Model: ") model.summary()
def ANN_model(self, X, y, seq_length, pos_tensor, neg_tensor, Test_pos_tensor, Test_neg_tensor, complement_tensor, name): #complement_tensor = sequence_extraction_real(complement_list) model = Sequential() #Sequential model (linear stack of layers) model.add(Convolution1D(1, 19, border_mode='same', input_shape=(seq_length, 4), activation='relu')) #sanity check for dimensions #print('Shape of the output of first layer: {}'.format(model.predict_on_batch(pos_tensor[0:32,:,:]).shape)) #model.add(MaxPooling1D(pool_length=4)) model.add(Dropout(0.7)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) #output layer #model.add(LSTM(100)) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='binary_crossentropy', #classfy yes or no metrics=['accuracy']) #output is accuracy hist = model.fit(X, y, validation_split=0.3, nb_epoch=10) # starts training predictions_pos = model.predict(Test_pos_tensor) predictions_neg = model.predict(Test_neg_tensor) prediction_test = model.predict(complement_tensor) print('pos is ') print(predictions_pos) print('neg is ') print(predictions_neg) print('test is ') print(prediction_test) #print type(prediction_neg) positive_prediction = predictions_pos.tolist() negative_prediction = predictions_neg.tolist() test_prediction = prediction_test.tolist() #print positive_prediction positive_total = 0 negative_total = 0 #print len(positive_prediction) for i in positive_prediction: positive_total += i[0] for i in negative_prediction: negative_total += i[0] count = 1 output_file = name+'_output.txt' with open(output_file, 'w') as output: header = 'Position' + '\t' + 'Percent Accuracy' + '\n' output.write(header) for i in test_prediction: line = str(count) + '\t' + str(i[0]) + '\n' output.write(line) count +=1 print(len(positive_prediction), len(negative_prediction)) print('positive average is: ' + str(positive_total/len(positive_prediction))) print('negative average is: ' + str(negative_total/len(negative_prediction))) #prediction_comp = model.predict(complement_tensor) #have a look at the filter convlayer = model.layers[0] weights = convlayer.get_weights()[0].transpose().squeeze()
muons = BatchNormalization(momentum=0.6, name='muons_input_batchnorm')(Inputs[1]) elec = BatchNormalization(momentum=0.6, name='elec_input_batchnorm')(Inputs[2]) globalvars = BatchNormalization(momentum=0.6, name='globalvars_input_batchnorm')(Inputs[3]) #Inputs = [Input(shape=(25,)),Input(shape=(10,)),Input(shape=(10,)),Input(shape=(13,))] #jets = (Inputs[0]) #muons = (Inputs[1]) #elec = (Inputs[2]) #globalvars = (Inputs[3]) jets = Convolution1D(128, 1, kernel_initializer='lecun_uniform', activation='relu', name='jets_conv0')(jets) jets = Dropout(dropoutRate)(jets) jets = Convolution1D(64, 1, kernel_initializer='lecun_uniform', activation='relu', name='jets_conv1')(jets) jets = Dropout(dropoutRate)(jets) jets = Convolution1D(8, 1, strides=2, kernel_initializer='lecun_uniform', activation='relu', name='jets_conv2')(jets)
def main(): parser = argparse.ArgumentParser() parser.register('type', 'bool', str2bool) parser.add_argument('--emb_dim', type=str, default=300, help='Embeddings dimension') parser.add_argument('--hidden_size', type=int, default=300, help='Hidden size') parser.add_argument('--batch_size', type=int, default=256, help='Batch size') parser.add_argument('--n_epochs', type=int, default=50, help='Num epochs') parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer') parser.add_argument("--dropout_ratio", type=float, default=0.5, help="ratio of cells to drop out") parser.add_argument('--n_recurrent_layers', type=int, default=1, help='Num recurrent layers') parser.add_argument("--w_size", type=int, default=5, help="window size length of neighborhood in words") parser.add_argument("--pool_length", type=int, default=6, help="length for max pooling") parser.add_argument( "--nb_filter", type=int, default=150, help="nb of filter to be applied in convolution over words") parser.add_argument('--input_dir', type=str, default='./dataset/', help='Input dir') parser.add_argument('--save_model', type='bool', default=True, help='Whether to save the model') parser.add_argument('--model_fname', type=str, default='model/dual_encoder_lstm_classifier.h5', help='Model filename') parser.add_argument('--embedding_file', type=str, default='embeddings/glove.840B.300d.txt', help='Embedding filename') parser.add_argument('--seed', type=int, default=1337, help='Random seed') args = parser.parse_args() print('Model args: ', args) np.random.seed(args.seed) if not os.path.exists(args.model_fname): print("No pre-trained model...") print("Start building model...") # first, build index mapping words in the embeddings set # to their embedding vector print('Indexing word vectors.') embeddings_index = {} f = open(args.embedding_file, 'r') for line in f: values = line.split() word = values[0] #coefs = np.asarray(values[1:], dtype='float32') try: coefs = np.asarray(values[1:], dtype='float32') except ValueError: continue embeddings_index[word] = coefs f.close() print("Now loading UDC data...") train_c, train_r, train_l = pickle.load( open(args.input_dir + 'train.pkl', 'rb')) test_c, test_r, test_l = pickle.load( open(args.input_dir + 'test.pkl', 'rb')) dev_c, dev_r, dev_l = pickle.load( open(args.input_dir + 'dev.pkl', 'rb')) print('Found %s training samples.' % len(train_c)) print('Found %s dev samples.' % len(dev_c)) print('Found %s test samples.' % len(test_c)) MAX_SEQUENCE_LENGTH, MAX_NB_WORDS, word_index = pickle.load( open(args.input_dir + 'params.pkl', 'rb')) print("MAX_SEQUENCE_LENGTH: {}".format(MAX_SEQUENCE_LENGTH)) print("MAX_NB_WORDS: {}".format(MAX_NB_WORDS)) vocabs, E = init_vocab(args.emb_dim) print("Now loading entity-grid data...") train_egrid, train_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.train", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) dev_egrid, dev_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.dev", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) test_egrid, test_label = load_and_numberize_egrids_with_labels( filelist="./dataset/list.test", maxlen=MAX_SEQUENCE_LENGTH, w_size=args.w_size, vocabs=vocabs) #print (train_label[:10]) #print (list(train_l[:10])) #assert train_label == list(train_l) #assert dev_label == list(dev_l) #assert test_label == list(test_l) #randomly shuffle the training data #np.random.shuffle(train_egrid) print("Now loading embedding matrix...") num_words = min(MAX_NB_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, args.emb_dim)) for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector print("Now building the dual encoder lstm model...") encoder = Sequential() encoder.add( Embedding(output_dim=args.emb_dim, input_dim=MAX_NB_WORDS, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], mask_zero=True, trainable=True)) encoder.add(LSTM(units=args.hidden_size)) print("Now building the CNN egrid model...") sent_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') x = Embedding(output_dim=args.emb_dim, weights=[E], input_dim=len(vocabs), input_length=MAX_SEQUENCE_LENGTH, trainable=True)(sent_input) x = Convolution1D(nb_filter=args.nb_filter, filter_length=args.w_size, border_mode='valid', activation='relu', subsample_length=1)(x) x = MaxPooling1D(pool_length=args.pool_length)(x) x = Dropout(args.dropout_ratio)(x) x = Flatten()(x) x = Dropout(args.dropout_ratio)(x) x = Dense(300)(x) cnn = Model(sent_input, x) context_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') response_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') egrid_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') # these two models will share eveything from shared_cnn context_branch = encoder(context_input) response_branch = encoder(response_input) context_branch_cnn = cnn(egrid_input) concatenated = merge([context_branch, response_branch], mode='mul') concatenated = merge([concatenated, context_branch_cnn], mode='concat') out = Dense((1), activation="sigmoid")(concatenated) model = Model([context_input, response_input, egrid_input], out) model.compile(loss='binary_crossentropy', optimizer=args.optimizer) print(model.summary()) print("Now training the model...") histories = my_callbacks.Histories() bestAcc = 0.0 patience = 0 print("\tbatch_size={}, nb_epoch={}".format(args.batch_size, args.n_epochs)) for ep in range(1, args.n_epochs): #model.fit([train_c, train_r], train_l, #batch_size=args.batch_size, nb_epoch=1, callbacks=[histories], #validation_data=([dev_c, dev_r], dev_l), verbose=1) model.fit([train_c, train_r, train_egrid], train_l, batch_size=args.batch_size, epochs=1, callbacks=[histories], validation_data=([dev_c, dev_r, dev_egrid], dev_l), verbose=1) #model.save(model_name + "_ep." + str(ep) + ".h5") curAcc = histories.accs[0] if curAcc >= bestAcc: bestAcc = curAcc patience = 0 else: patience = patience + 1 #doing classify the test set y_pred = model.predict([test_c, test_r, test_egrid]) print("Perform on test set after Epoch: " + str(ep) + "...!") recall_k = compute_recall_ks(y_pred[:, 0]) #stop the model whch patience = 8 if patience > 10: print("Early stopping at epoch: " + str(ep)) break if args.save_model: print("Now saving the model... at {}".format(args.model_fname)) model.save(args.model_fname) else: print("Found pre-trained model...") model = K_load_model(args.model_fname) return model
def trainNetwork(self, activationFunction): model = Sequential() self.epochs = 200 verbose, batch_size = 2, 32 self.xTrainInput = np.expand_dims(self.xTrainInput, axis=2) self.xTestInput = np.expand_dims(self.xTestInput, axis=2) n_timesteps, n_features = self.xTrainInput.shape[ 1], self.xTrainInput.shape[2] model = Sequential() model.add( Convolution1D(filters=64, kernel_size=3, activation=activationFunction, use_bias=True, input_shape=(n_timesteps, n_features))) model.add(MaxPool1D(pool_size=2)) model.add( Convolution1D(filters=64, kernel_size=3, activation=activationFunction)) model.add(Dropout(0.5)) model.add(MaxPool1D(pool_size=2)) model.add(Flatten()) model.add(Dense(50, activation=activationFunction)) model.add(Dense(18, activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[ 'accuracy', 'mean_squared_error', 'mae', 'mape', 'cosine' ]) tensorboard = TensorBoard( log_dir=r'Results\logs' + str(self.sensor) + 'Epochs' + str(self.epochs) + str(activationFunction) + 'BatchSize33', histogram_freq=1, write_graph=True, ) keras_callbacks = [tensorboard] self.history = model.fit(self.xTrainInput, self.yTrainInput, epochs=self.epochs, batch_size=batch_size, verbose=verbose, validation_split=0.1, callbacks=keras_callbacks) # evaluate model self.yPred = model.predict_classes(self.xTestInput, batch_size=batch_size, verbose=0) self.yScore = model.predict_proba(self.xTestInput, batch_size=batch_size, verbose=0) self.fit = model.predict(self.xTestInput, batch_size=batch_size, verbose=0) plot_model(model, show_shapes=True, expand_nested=True, to_file=r'Results\Epochs ' + str(self.epochs) + str(activationFunction) + 'BatchSize32\Model ' + str(self.sensor) + 'Epochs' + str(self.epochs) + str(activationFunction) + '.png') #Calculate Metrics self.calculateMetrics(activationFunction) #Caluclate ROC self.calculateROC(activationFunction) #Plot Accuracy of the Model self.plotAccuracy(activationFunction) #Plot Loss of the Model self.plotLoss(activationFunction) #plot MSE self.plotMSE(activationFunction) #plot MAE self.plotMAE(activationFunction) #plot MAPE self.plotMAPE(activationFunction) #plot cosine proximity self.plotCosine(activationFunction) loss, accuracy, mse, mae, mape, cosine = model.evaluate( self.xTestInput, self.yTestInput, batch_size=batch_size, verbose=0) return loss, accuracy
except Exception as e: break X.append(x_i) Y.append(y_i) X, Y = np.array(X), np.array(Y) X_train, X_test, Y_train, Y_test = create_Xt_Yt(X, Y) X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], EMB_SIZE)) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], EMB_SIZE)) model = Sequential() model.add( Convolution1D(input_shape=(WINDOW, EMB_SIZE), nb_filter=8, filter_length=2, padding="same")) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Convolution1D(nb_filter=16, filter_length=4, padding="same")) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(AveragePooling1D()) model.add(Dropout(0.5)) model.add(Convolution1D(nb_filter=32, filter_length=4, padding="same")) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Flatten())
#DATA PREPARATION X , Y = [], [] format_data(df) X , Y = np.array(X) , np.array(Y) X_train, X_test, y_train, y_test = train_test(X, Y) X_train, X_test = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], EMB_SIZE)), np.reshape(X_test, (X_test.shape[0], X_test.shape[1], EMB_SIZE)) #MODEL DEFINITION print 'initializing model..' model = Sequential() model.add(Convolution1D(input_shape = (WINDOW, EMB_SIZE), nb_filter=16, filter_length=4, border_mode='same')) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Convolution1D(nb_filter=8, filter_length=4, border_mode='same')) model.add(BatchNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(64))
if OPTIMIZER_TYPE == "nadam": optimizer = Nadam(lr=LEARN_RATE) elif OPTIMIZER_TYPE == "adam": optimizer = Adam(lr=LEARN_RATE) else: optimizer = RMSprop(lr=LEARN_RATE, clipnorm=1.) #Build model #Convolution layers #As per https://arxiv.org/pdf/1412.6806v3.pdf model = Sequential() # FIRST 2 CONV LAYERS model.add(Convolution1D(N_CONV_FILTERS, FILT_LEN, init='he_normal', border_mode='same', W_regularizer=l2(L2_WEIGHT), activation='relu', #activity_regularizer=activity_l2(L2_WEIGHT), batch_input_shape=X_train.shape)) # model.add(PReLU()) model.add(Dropout(0.1)) model.add(Convolution1D(N_CONV_FILTERS, FILT_LEN, init='he_normal', border_mode='same', activation='relu', W_regularizer=l2(L2_WEIGHT))) #activity_regularizer=activity_l2(L2_WEIGHT),)) # model.add(PReLU()) model.add(Dropout(0.1)) # model.add(Convolution1D(N_CONV_FILTERS, FILT_LEN, # init='he_normal',
y_test1 = np.array(C) y_train = to_categorical(y_train1) y_test = to_categorical(y_test1) # reshape input to be [samples, time steps, features] X_train = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) X_test = np.reshape(testT, (testT.shape[0], testT.shape[1], 1)) lstm_output_size = 70 cnn = Sequential() cnn.add( Convolution1D(64, 3, border_mode="same", activation="relu", input_shape=(42, 1))) cnn.add(Convolution1D(64, 3, border_mode="same", activation="relu")) cnn.add(MaxPooling1D(pool_length=(2))) cnn.add(Convolution1D(128, 3, border_mode="same", activation="relu")) cnn.add(Convolution1D(128, 3, border_mode="same", activation="relu")) cnn.add(MaxPooling1D(pool_length=(2))) cnn.add(LSTM(lstm_output_size)) cnn.add(Dropout(0.1)) cnn.add(Dense(10, activation="softmax")) # define optimizer and objective, compile cnn cnn.compile(loss="categorical_crossentropy", optimizer="adam",
ar = ar.append(pd.Series(data=close_data[n]), ignore_index=True, verify_integrity=True) input_data = input_data.append(ar, ignore_index=True, verify_integrity=True) X_train, X_test, Y_train, Y_test = create_Xt_Yt(input_data.values, predict_data.values) model = Sequential() model.add( Convolution1D(input_shape=(X_train.shape[0], X_train.shape[1]), nb_filter=64, filter_length=2, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=2)) model.add( Convolution1D(input_shape=(X_train.shape[0], X_train.shape[1]), nb_filter=64, filter_length=2, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=2)) model.add(Dropout(0.25)) model.add(Flatten())
name='words_input') # Our word embedding layer wordsEmbeddingLayer = Embedding(word_embeddings.shape[0], word_embeddings.shape[1], weights=[word_embeddings], trainable=False) words = wordsEmbeddingLayer(words_input) # Now we add a variable number of convolutions words_convolutions = [] for filter_length in filter_lengths: words_conv = Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same', activation='relu', strides=1)(words) words_conv = GlobalMaxPooling1D()(words_conv) words_convolutions.append(words_conv) output = concatenate(words_convolutions) # We add a vanilla hidden layer together with dropout layers: output = Dropout(0.5)(output) output = Dense(hidden_dims, activation='tanh', kernel_regularizer=keras.regularizers.l2(0.01))(output) output = Dropout(0.25)(output)
dropout = 0.25 # Percentage of nodes to drop nb_filter = 32 # Number of filters to use in Convolution1D filter_length = 3 # Length of filter for Convolution1D # Initialize weights and biases for the Dense layers weights = initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=2) bias = bias_initializer='zeros' model1 = Sequential() model1.add(Embedding(nb_words + 1, embedding_dim, weights = [word_embedding_matrix], input_length = max_question_len, trainable = False)) model1.add(Convolution1D(filters = nb_filter, kernel_size = filter_length, padding = 'same')) model1.add(BatchNormalization()) model1.add(Activation('relu')) model1.add(Dropout(dropout)) model1.add(Convolution1D(filters = nb_filter, kernel_size = filter_length, padding = 'same')) model1.add(BatchNormalization()) model1.add(Activation('relu')) model1.add(Dropout(dropout)) model1.add(Flatten())
from keras.models import Model import keras.callbacks from keras.callbacks import ModelCheckpoint class LossHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) starting_time = time.time() # define the network architecture = Conv Net input_seq = Input(shape = (seq_length, 1)) conv1_layer = Convolution1D(nb_filter = 16, filter_length = 10, border_mode='valid', init = 'normal', activation = 'relu') conv1 = conv1_layer(input_seq) conv2 = Convolution1D(nb_filter = 32, filter_length = 3, border_mode='valid', init = 'normal', activation = 'relu')(conv1) flat = Flatten()(conv2) # dense1 = Dense(4080, activation = 'relu')(flat) # dense2 = Dense(3072, activation = 'relu')(dense1) # dense3 = Dense(2048, activation = 'relu')(dense2) dense4 = Dense(512, activation = 'relu')(flat) predictions = Dense(3, activation = 'linear')(dense4) # create the model model = Model(input=input_seq, output=predictions) # compile the model model.compile(loss='mean_squared_error', optimizer='Adagrad') compiling_time = time.time() - starting_time
def main(): #load data train = pd.read_csv("./data/train.csv") test = pd.read_csv("./data/test.csv") train = train.fillna('empty') test = test.fillna('empty') y_train = train.is_duplicate test_labels = test['test_id'].astype(int).tolist() print 'Processing text dataset...' train_question1 = [] process_questions(train_question1, train.question1, 'train_question1', train) train_question2 = [] process_questions(train_question2, train.question2, 'train_question2', train) test_question1 = [] process_questions(test_question1, test.question1, 'test_question1', test) test_question2 = [] process_questions(test_question2, test.question2, 'test_question2', test) # Find the length of questions lengths = [] for question in train_question1: lengths.append(len(question.split())) for question in train_question2: lengths.append(len(question.split())) lengths = pd.DataFrame(lengths, columns=['counts']) lengths.counts.describe print(np.percentile(lengths.counts, 99.5)) print "fitting a tokenizer..." num_words = 200000 all_questions = train_question1 + train_question2 + test_question1 + test_question2 tokenizer = Tokenizer(num_words=num_words) tokenizer.fit_on_texts(all_questions) print "converting to sequences..." train_question1_word_sequences = tokenizer.texts_to_sequences( train_question1) train_question2_word_sequences = tokenizer.texts_to_sequences( train_question2) test_question1_word_sequences = tokenizer.texts_to_sequences( test_question1) test_question2_word_sequences = tokenizer.texts_to_sequences( test_question2) word_index = tokenizer.word_index print "Words in index: %d" % len(word_index) print "padding sequences..." max_question_len = 36 train_q1 = pad_sequences(train_question1_word_sequences, maxlen=max_question_len, padding='post', truncating='post') train_q2 = pad_sequences(train_question2_word_sequences, maxlen=max_question_len, padding='post', truncating='post') test_q1 = pad_sequences(test_question1_word_sequences, maxlen=max_question_len, padding='post', truncating='post') test_q2 = pad_sequences(test_question2_word_sequences, maxlen=max_question_len, padding='post', truncating='post') #load embeddings print 'Indexing word vectors...' embeddings_index = {} f = codecs.open(os.path.join(GLOVE_DIR, 'glove.6B.300d.txt'), encoding='utf-8') for line in f: values = line.split(' ') word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) embedding_dim = 300 nb_words = len(word_index) word_embedding_matrix = np.zeros((nb_words + 1, embedding_dim)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. word_embedding_matrix[i] = embedding_vector print 'Null word embeddings: %d' % np.sum( np.sum(word_embedding_matrix, axis=1) == 0) units = 128 # Number of nodes in the Dense layers dropout = 0.25 # Percentage of nodes to drop nb_filter = 32 # Number of filters to use in Convolution1D filter_length = 3 # Length of filter for Convolution1D # Initialize weights and biases for the Dense layers weights = initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=2) bias = 'zeros' model1 = Sequential() model1.add( Embedding(nb_words + 1, embedding_dim, weights=[word_embedding_matrix], input_length=max_question_len, trainable=False)) model1.add( Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same')) model1.add(BatchNormalization()) model1.add(Activation('relu')) model1.add(Dropout(dropout)) model1.add( Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same')) model1.add(BatchNormalization()) model1.add(Activation('relu')) model1.add(Dropout(dropout)) model1.add(Flatten()) model2 = Sequential() model2.add( Embedding(nb_words + 1, embedding_dim, weights=[word_embedding_matrix], input_length=max_question_len, trainable=False)) model2.add( Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same')) model2.add(BatchNormalization()) model2.add(Activation('relu')) model2.add(Dropout(dropout)) model2.add( Convolution1D(filters=nb_filter, kernel_size=filter_length, padding='same')) model2.add(BatchNormalization()) model2.add(Activation('relu')) model2.add(Dropout(dropout)) model2.add(Flatten()) model3 = Sequential() model3.add( Embedding(nb_words + 1, embedding_dim, weights=[word_embedding_matrix], input_length=max_question_len, trainable=False)) model3.add(TimeDistributed(Dense(embedding_dim))) model3.add(BatchNormalization()) model3.add(Activation('relu')) model3.add(Dropout(dropout)) model3.add( Lambda(lambda x: K.max(x, axis=1), output_shape=(embedding_dim, ))) model4 = Sequential() model4.add( Embedding(nb_words + 1, embedding_dim, weights=[word_embedding_matrix], input_length=max_question_len, trainable=False)) model4.add(TimeDistributed(Dense(embedding_dim))) model4.add(BatchNormalization()) model4.add(Activation('relu')) model4.add(Dropout(dropout)) model4.add( Lambda(lambda x: K.max(x, axis=1), output_shape=(embedding_dim, ))) modela = Sequential() modela.add(Merge([model1, model2], mode='concat')) modela.add( Dense(units * 2, kernel_initializer=weights, bias_initializer=bias)) modela.add(BatchNormalization()) modela.add(Activation('relu')) modela.add(Dropout(dropout)) modela.add(Dense(units, kernel_initializer=weights, bias_initializer=bias)) modela.add(BatchNormalization()) modela.add(Activation('relu')) modela.add(Dropout(dropout)) modelb = Sequential() modelb.add(Merge([model3, model4], mode='concat')) modelb.add( Dense(units * 2, kernel_initializer=weights, bias_initializer=bias)) modelb.add(BatchNormalization()) modelb.add(Activation('relu')) modelb.add(Dropout(dropout)) modelb.add(Dense(units, kernel_initializer=weights, bias_initializer=bias)) modelb.add(BatchNormalization()) modelb.add(Activation('relu')) modelb.add(Dropout(dropout)) model = Sequential() model.add(Merge([modela, modelb], mode='concat')) model.add( Dense(units * 2, kernel_initializer=weights, bias_initializer=bias)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add(Dense(units, kernel_initializer=weights, bias_initializer=bias)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(dropout)) model.add(Dense(1, kernel_initializer=weights, bias_initializer=bias)) model.add(BatchNormalization()) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # save the best weights for predicting the test question pairs save_best_weights = 'question_pairs_weights.h5' t0 = time.time() callbacks = [ ModelCheckpoint(save_best_weights, monitor='val_loss', save_best_only=True), EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') ] history = model.fit([train_q1, train_q2, train_q1, train_q2], y_train, batch_size=256, epochs=10, validation_split=0.15, verbose=True, shuffle=True, callbacks=callbacks) t1 = time.time() print "Minutes elapsed: %f" % ((t1 - t0) / 60.) summary_stats = pd.DataFrame({ 'epoch': [i + 1 for i in history.epoch], 'train_acc': history.history['acc'], 'valid_acc': history.history['val_acc'], 'train_loss': history.history['loss'], 'valid_loss': history.history['val_loss'] }) plt.plot(summary_stats.train_loss) plt.plot(summary_stats.valid_loss) plt.show() model.load_weights(save_best_weights) predictions = model.predict([test_q1, test_q2, test_q1, test_q2], verbose=True) keras_submission = pd.DataFrame({ "test_id": test_labels, "is_duplicate": predictions.ravel() }) keras_submission.to_csv("keras_submission.csv", index=False)
# we start off with an efficient embedding layer which maps # our vocab indices into our word embeddings model.add(Embedding(word_embeddings.shape[0], word_embeddings.shape[1], input_length=max_sentence_len, dropout=0.2, weights=[word_embeddings], trainable=False)) #Set to true, to update word embeddings while training # we add a Convolution1D, which will learn nb_filter # word group filters of size filter_length: model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) # we use max over time pooling: model.add(GlobalMaxPooling1D()) # We add a vanilla hidden layer: model.add(Dense(hidden_dims, activation='relu')) model.add(Dropout(0.2)) # We project onto a single unit output layer, and squash it with a sigmoid: model.add(Dense(1, activation='sigmoid'))
#(n,256,32) x_train = np.load("train_datas_init/x_train_"+str((int(t)-1))+".npy") #(n,向量长度,通道数) y_train = fun(np.load("train_datas_init/y_train_"+str((int(t)-1))+".npy")) #one_hot编码向量 (n,) x_test = np.load("train_datas_init/X_test_"+str((int(t)-1))+".npy") y_test = fun(np.load("train_datas_init/y_test_"+str((int(t)-1))+".npy")) y_train = np_utils.to_categorical(y_train, num_classes=2) y_test = np_utils.to_categorical(y_test, num_classes=2) #################modeling####################### # 建立序贯模型 model = Sequential() #256*32 model.add(Convolution1D( #256*64 filters=64, kernel_size=2, padding='same', strides=1, input_shape=(256,32))) model.add(MaxPooling1D( #128*64 pool_size=2, strides=2, padding='same')) model.add(Convolution1D( #128*128 filters=128, kernel_size=2, padding='same', strides=1)) model.add(MaxPooling1D( #64*128
def build_model(x2_shape, x2_train): model1 = Sequential() model1.add( Embedding(nb_words, embedding_dim, weights=[word_embedding_matrix], input_length=max_daily_length)) model1.add(Dropout(dropout)) model1.add( Convolution1D(filters=nb_filter, kernel_size=filter_length1, padding='same', activation='relu')) model1.add(Dropout(dropout)) if deeper == True: model1.add( Convolution1D(filters=nb_filter, kernel_size=filter_length1, padding='same', activation='relu')) model1.add(Dropout(dropout)) model1.add( LSTM(rnn_output_size, activation=None, kernel_initializer=weights, dropout=dropout)) print("PRINTING MODEL1.OUTPUT AFTER LSTM: ") print(model1.output) #### model2 = Sequential() model2.add( Embedding(nb_words, embedding_dim, weights=[word_embedding_matrix], input_length=max_daily_length)) model2.add(Dropout(dropout)) model2.add( Convolution1D(filters=nb_filter, kernel_size=filter_length2, padding='same', activation='relu')) model2.add(Dropout(dropout)) if deeper == True: model2.add( Convolution1D(filters=nb_filter, kernel_size=filter_length2, padding='same', activation='relu')) model2.add(Dropout(dropout)) model2.add( LSTM(rnn_output_size, activation=None, kernel_initializer=weights, dropout=dropout)) #### print("PRINTING MODEL2.OUTPUT") print(type(model2.output)) print(model2.output) model3 = Sequential() model3.add( LSTM(1, batch_input_shape=(256, x2_train.shape[1], x2_train.shape[2]), stateful=True)) model = Sequential() # CUT OUT MODEL3.OUTPUT AND THIS MODEL WILL COMPILE model = Concatenate(axis=0)([ model1.output, model2.output, tf.keras.backend.transpose(model3.output) ]) model = Dense(hidden_dims, kernel_initializer=weights)(model) model = Dropout(dropout)(model) if deeper == True: model = Dense(hidden_dims // 2, kernel_initializer=weights)(model) model = Dropout(dropout)(model) model = Dense(1, kernel_initializer=weights, name='output')(model) print('model1: ' + str(model1)) print(model1.input) print('model2: ' + str(model2)) print(model2.input) print('model3: ' + str(model3)) print(model3.input) merged_model = Model(inputs=[model1.input, model2.input, model3.input], outputs=model) merged_model.compile(loss='mean_squared_error', optimizer=Adam(lr=learning_rate, clipvalue=1.0)) print(merged_model) return merged_model
def build_model(self, pretrained_embedding=None, fix_embedding=None, temperature=None): if fix_embedding is not None: print('Warning: fix_embedding for build_model is deprecated') fix_embedding = self.fix_embedding # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions word_input = Input(shape=(self.maxlen, ), dtype='int32', name='word_input') self.word_input = word_input # add embedding if self.use_pretrained_embedding and not pretrained_embedding is None: # pretrained_embedding = np.random.rand(max_features, embedding_dims) if fix_embedding: self.embedding_lookup = Embedding(output_dim=self.embedding_dims, input_dim=self.max_features, input_length=self.maxlen, weights=[pretrained_embedding], trainable=False) embedding_layer = self.embedding_lookup(word_input) self.embedding_weight = pretrained_embedding else: self.embedding_lookup = Embedding(output_dim=self.embedding_dims, input_dim=self.max_features, input_length=self.maxlen, weights=[pretrained_embedding]) embedding_layer = self.embedding_lookup(word_input) else: if fix_embedding: print('ERROR:Using random embedding as fix!') sys.exit(-1) self.embedding_lookup = Embedding(output_dim=self.embedding_dims, input_dim=self.max_features, input_length=self.maxlen) embedding_layer = self.embedding_lookup(word_input) # we add a Convolution1D, which will learn nb_filter # word group filters of size filter_length, note here # we have more than one filter_length: reshaped_embedding_layer = Reshape((self.maxlen, self.embedding_dims))(embedding_layer) conv_layer_list = list() conv_layer_output_list = list() for filter_length in self.filter_length_list: conv_layer = Convolution1D(nb_filter=self.nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', W_constraint=maxnorm(self.l2_constraint), subsample_length=1) conv_layer_list.append(conv_layer) conv_layer_output_list.append((conv_layer(reshaped_embedding_layer))) # we use max pooling for each conv layer: pool_layer_output_list = list() for i, conv_layer_output in enumerate(conv_layer_output_list): if self.pool == 'avg': pool_layer = AveragePooling1D(pool_length=self.maxlen+1-self.filter_length_list[i]) else: pool_layer = MaxPooling1D(pool_length=self.maxlen+1-self.filter_length_list[i]) pool_layer_output_list.append(pool_layer(conv_layer_output)) # We flatten the output of the conv layer, # so that we can add a vanilla dense layer: flat_layer_output_list = list() for i, pool_layer_output in enumerate(pool_layer_output_list): flat_layer_output_list.append(Flatten()(pool_layer_output)) merged_layer_output = Merge(mode='concat', concat_axis=1, name='feature_output')(flat_layer_output_list) self.feature_output = merged_layer_output # define feature_extractor part from above self.feature_extractor = Model(input=word_input, output=merged_layer_output) if self.hidden_dim is not None: merged_layer_output = Dropout(self.dropout_rate)(merged_layer_output) merged_layer_output = Dense(self.hidden_dim, activation='tanh')(merged_layer_output) # standard logistic regression part # feature_input = Input(shape=(self.feature_extractor.output_shape[1], ), dtype='float32') x = Dropout(self.dropout_rate)(merged_layer_output) # We project onto a single unit output layer, and squash it with a sigmoid: logits = Dense(self.label_dims)(x) if self.label_type == 'multi-class': end2end_output = Activation('softmax')(logits) elif self.label_type == 'multi-label': end2end_output = Activation('sigmoid')(logits) else: print('undefined label type {0}'.format(self.label_type)) sys.exit() # self.top_logistic_regression = Model(input=feature_input, output=x) # define the end-to-end model self.end2end_model = Model(input=word_input, output=end2end_output) if temperature is not None: hot_logits = Lambda(lambda x: x / temperature)(logits) if self.label_type == 'multi-class': end2end_soft_output = Activation('softmax', name='soft_output')(hot_logits) elif self.label_type == 'multi-label': end2end_soft_output = Activation('sigmoid', name='soft_output')(hot_logits) else: print('undefined label type {0}'.format(self.label_type)) sys.exit() # self.top_soft_logistic_regression = Model(input=feature_input, output=soft_output) self.soft_output = end2end_soft_output self.end2end_soft_model = Model(input=word_input, output=end2end_soft_output) if self.label_type == 'multi-class': loss_type = 'categorical_crossentropy' elif self.label_type == 'multi-label': loss_type = 'binary_crossentropy' self.end2end_opt = Adam(lr=self.lr) self.end2end_model.compile(loss=loss_type, optimizer=self.end2end_opt, metrics=['accuracy', 'categorical_accuracy']) if temperature is not None: self.end2end_soft_opt = Adam(lr=self.lr) self.end2end_soft_model.compile(loss=loss_type, optimizer=self.end2end_soft_opt, metrics=['accuracy', 'categorical_accuracy'])
def dense_cnn_model(): my_embedding = Embedding(input_dim=MAX_NB_WORDS + 1, output_dim=EMBEDDING_DIM, input_length=None) #128 #---------keyword 1 ------------------------- in1 = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') emb1 = my_embedding(in1) cnn1 = Convolution1D(filters=256, kernel_size=7, kernel_initializer='he_uniform', padding='valid', activation='relu')(emb1) # relu x1 = GlobalMaxPooling1D()(cnn1) cnn3 = Convolution1D(filters=256, kernel_size=3, kernel_initializer='he_uniform', padding='valid', activation='relu')(emb1) x3 = GlobalMaxPooling1D()(cnn3) # cnn5 = Convolution1D(filters=256, kernel_size=5, kernel_initializer='he_uniform', padding='valid', activation='relu')(emb1) x5 = GlobalMaxPooling1D()(cnn5) # cnn4 = Convolution1D(filters=256, kernel_size=2,kernel_initializer = 'he_uniform', padding='valid', activation='relu')(emb1) x1 = Merge(mode='concat', concat_axis=-1)([x1, x3, x5]) #block1 for i in range(4): x1 = add_layer(x1, 128) #128 x1 = BatchNormalization()(x1) x1 = PReLU()(x1) x1 = Dense(128)(x1) #block2 for i in range(4): x1 = add_layer(x1, 128) #x1 = BatchNormalization()(x1) #x1 = Dense(128)(x1) x = BatchNormalization()(x1) x = Dense(256)(x) #128 x = PReLU()(x) x = Dropout(0.35)(x) #0.25 y = Dense(8, activation='softmax')(x) #y = Dense(8, activation='sigmoid')(x) #model = Model(inputs=[in1, in2], outputs=y) model = Model(inputs=[in1], outputs=y) rmsprop = keras.optimizers.Adadelta(lr=1.0, rho=0.9, epsilon=1e-06) #lr=1.0 rho=0.95 model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=[macro_f1]) print model.summary() return model
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep decoder_input_data[i, t, target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) # Encoder x_encoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal')(encoder_inputs) x_encoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal', dilation_rate=2)(x_encoder) x_encoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal', dilation_rate=4)(x_encoder) decoder_inputs = Input(shape=(None, num_decoder_tokens)) # Decoder x_decoder = Convolution1D(256,
def train_CNN( datasets, vocab_processors, max_doc_lengths, dataset_size, w2vmodel=None, ): session_conf = tf.ConfigProto( allow_soft_placement=globals. ALLOW_SOFT_PLACEMENT, # determines if op can be placed on CPU when GPU not avail log_device_placement=globals. LOG_DEVICE_PLACEMENT, # whether device placements should be logged, we don't have any for CPU #operation_timeout_in_ms=60000 ) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) # sess = tf_debug.LocalCLIDebugWrapperSession(sess) backend.set_session(sess) init_op = tf.global_variables_initializer() sess.run(init_op) with sess.as_default(): """Create iterator from datasets to yield a batch at at time. This is used because we want to use fit_generator which doesn't take in a tf.Dataset but a generator. However, this might not be the case anymore, if we can upgrade. see https://github.com/keras-team/keras/issues/10110 """ def make_iterator(dataset, batch_num): while True: iterator = dataset.make_one_shot_iterator() next_val = iterator.get_next() for i in range(batch_num): try: *inputs, labels = sess.run(next_val) yield inputs, labels except tf.errors.OutOfRangeError: if globals.DEBUG: print("OutOfRangeError Exception Thrown") break except Exception as e: if globals.DEBUG: print(e) print("Unknown Exception Thrown") break # We calculated the number of elements per batch that we wanted. This ensures that we only get one full set of data per epoch train_batch_num = int( (dataset_size * (globals.TRAIN_SET_PERCENTAGE)) // globals.BATCH_SIZE) + 1 val_batch_num = int( (dataset_size * (1 - globals.TRAIN_SET_PERCENTAGE)) // globals.BATCH_SIZE) itr_train = make_iterator(datasets.abs_text_train_dataset, train_batch_num) itr_validate = make_iterator(datasets.abs_text_test_dataset, val_batch_num) main_input = Input(shape=(max_doc_lengths.abs_text_max_length, ), dtype="int32", name="main_input") #, tensor=input_x) embedding_layer = Embedding( input_dim=len(vocab_processors['text'].vocab), output_dim=globals.EMBEDDING_DIM, weights=[w2vmodel], input_length=max_doc_lengths.abs_text_max_length, trainable=globals.EMBEDDING_TRAINABLE, name="embedding")(main_input) dropout1 = Dropout(globals.MAIN_DROPOUT_KEEP_PROB[0], name="dropout1")(embedding_layer) # Convolutional block conv_blocks = [] for idx, sz in enumerate(globals.FILTER_SIZES): conv_name = "conv1D-%s%s" % (idx, sz) conv = Convolution1D(filters=globals.NUM_FILTERS, kernel_size=sz, padding="valid", activation="relu", strides=1, name=conv_name)(dropout1) # Found to be worse than sliding window maxpooling # conv = GlobalMaxPooling1D()(conv) conv = MaxPooling1D(pool_size=2)(conv) conv = Flatten()(conv) conv_blocks.append(conv) conv_blocks_concat = Concatenate()( conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] act = PReLU(alpha_initializer='zero', weights=None) softmax = Lambda(lambda x: backend.tf.nn.softmax(x)) dropout2 = Dropout( globals.MAIN_DROPOUT_KEEP_PROB[1])(conv_blocks_concat) dense = Dense(globals.HIDDEN_DIMS, activation="relu")(dropout2) dense = Dense(globals.HIDDEN_DIMS, activation="relu")(dense) dense = Dense(globals.HIDDEN_DIMS, activation="relu")(dense) model_output = Dense(1, activation="sigmoid")(dense) # stochastic gradient descent algo, currently unused # opt = SGD(lr=0.01) opt = Adam(lr=globals.LEARNING_RATE) model = Model(inputs=main_input, outputs=model_output) recall = Recall() precision = Precision() F1score = F1Score() # truepos_metricfn = BinaryTruePositives() # trueneg_metricfn = BinaryTrueNegatives() # falsepos_metricfn = BinaryFalsePositives() # falseneg_metricfn = BinaryFalseNegatives() model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[ 'accuracy', F1score, recall, precision, ]) # truepos_metricfn, # trueneg_metricfn, # falsepos_metricfn, # falseneg_metricfn]) callbacks = [] # callbacks.append(EarlyStopping(monitor="val_)) callbacks.append(ReduceLROnPlateau()) # Tensorboard in this version of Keras, broken. Need to update to latest version # callbacks.append(TensorBoard()) timestr = time.strftime("%Y%m%d%H%M%S") callbacks.append( ModelCheckpoint(timestr + "CNNweights.{epoch:02d}-{val_loss:.2f}.hdf5", period=5)) verbosity = 2 if globals.DEBUG: callbacks = [] callbacks.append(CSVLogger('training.log')) # callbacks.append(ProgbarLogger(count_mode='steps')) verbosity = 1 print(model.summary()) model.fit_generator(generator=itr_train, validation_data=itr_validate, validation_steps=val_batch_num, steps_per_epoch=train_batch_num, epochs=globals.NUM_EPOCHS, verbose=verbosity, workers=0, callbacks=callbacks) # Saves model with trained weights on all epochs if globals.SAVE_MODEL: pattern = re.compile(r"[^\/]*$") outxml_path = pattern.search( globals.XML_FILE).group(0).split(".")[0] outw2v_path = pattern.search( globals.PRETRAINED_W2V_PATH).group(0).split(".")[0] model.save("CNN_" + outxml_path + "_" + outw2v_path + "_saved_model" + timestr + ".h5")
top_words = 10000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) max_review_length = 1600 X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) embedding_vecor_length = 300 model = Sequential() model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length)) model.add(Convolution1D(64, 3, padding='same')) model.add(Convolution1D(32, 3, padding='same')) model.add(Convolution1D(16, 3, padding='same')) model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(180,activation='sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1,activation='sigmoid')) tensorBoardCallback = TensorBoard(log_dir='./logs', write_graph=True) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, epochs=3, callbacks=[tensorBoardCallback], batch_size=64)
y_train1 = np.array(Y) y_test1 = np.array(C) y_train = to_categorical(y_train1) y_test = to_categorical(y_test1) # reshape input to be [samples, time steps, features] X_train = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) X_test = np.reshape(testT, (testT.shape[0], testT.shape[1], 1)) cnn = Sequential() cnn.add( Convolution1D(64, 3, border_mode="same", activation="relu", input_shape=(41, 1))) cnn.add(MaxPooling1D(pool_length=(2))) cnn.add(Flatten()) cnn.add(Dense(128, activation="relu")) cnn.add(Dropout(0.5)) cnn.add(Dense(5, activation="softmax")) # define optimizer and objective, compile cnn cnn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy']) # train
def __trainClickPredModel(self): ## define the model # https://keras.io/layers/convolutional/ print("== define model") model = Sequential() model.add(Convolution1D(nb_filter=256, filter_length=1,#6, border_mode='same', # 'valid', #The valid means there is no padding around input or feature map, while same means there are some padding around input or feature map, making the output feature map's size same as the input's activation='relu', input_shape=(1, self.input_dim), init='lecun_uniform' # lecun_uniform for both gets AUC: 0.865961 | (good split) AUC: 0.861570 with avg pool at end # glorot_uniform for both gets AUC: 0.868817 | AUC: 0.863290 with avg pool at end # he_uniform for both gets AUC: 0.868218 | AUC: 0.873585 with avg pool at end )) #model.add(Dense(256,init='lecun_uniform',input_shape=(1,self.input_dim),activation='relu')) # model.add(MaxPooling1D(pool_length=2, stride=None, border_mode='same')) ## TODO: removed model.add(AveragePooling1D(pool_length=2, stride=None, border_mode='same')) # add a new conv1d on top # model.add(Convolution1D(256, 3, border_mode='same', init='glorot_uniform', activation='relu', )) #on the fence about effect #model.add(AveragePooling1D(pool_length=2, stride=None, border_mode='same')) #worse if added # # add a new conv1d on top AUC: 0.851369 with glorot uniform # model.add(Convolution1D(128, 3, border_mode='same',init='glorot_uniform',activation='relu',)) # # apply an atrous convolution 1d with atrous rate 2 of length 3 to a sequence with 10 timesteps, # # with 64 output filters # model = Sequential() # model.add(AtrousConvolution1D(128, 3, atrous_rate=2, border_mode='same', input_shape=(1,input_dim))) # # add a new atrous conv1d on top # model.add(AtrousConvolution1D(64, 2, atrous_rate=2, border_mode='same')) # we use max pooling: # model.add(GlobalMaxPooling1D()) model.add(GlobalAveragePooling1D()) # We add a vanilla hidden layer: model.add(Dense(128, init='glorot_uniform')) model.add(Dropout(0.1)) # 0.1 seems good, but is it overfitting? model.add(Activation('relu')) # # We project onto a single unit output layer, and squash it with a sigmoid: # model.add(Dense(1)) # model.add(Activation('sigmoid')) # model.add(Dense(output_dim, input_dim=input_dim, activation='softmax',init='glorot_uniform')) model.add(Dense(self.output_dim, activation='softmax', init='glorot_uniform')) print(model.summary()) #print(model.get_config()) # write model to file with open(self.model_config_filepath,'w') as f: json.dump(model.to_json(),f) ### Compile model print("== Compile model") # optimizer = SGD(lr = self.learning_rate, momentum = 0.9, decay = 0.0, nesterov = True) optimizer = Adam(lr=self.learning_rate) # compile the model model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) self.click_pred_model = model #actually run training self.trainClickPredModelRunTraining()