token = Tokenizer() token.fit_on_texts([text]) print(token.word_index) print("\n") x = token.texts_to_sequences([text]) print(x) from keras.utils import to_categorical word_size = len(token.word_index) + 1 x = to_categorical(x, num_classes=word_size) print(x) import numpy import tensorflow as tf from numpy import array from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Flatten, Embedding from keras.layers import Embedding model = Sequential() model.add(Embedding(16, 4)) padded_x = pad_sequences(x, 4)
word2idx, idx2word = pickle.load(f) VOCAB_SIZE = len(idx2word) EMB_SIZE = 128 LSTM_HIDDEN = 128 MAX_LEN = 15 # 단어 시퀀스 길이 MODEL_PATH = './chatbot_trained.h5' # 데이터 전처리 과정에서 생성한 SentencePiece model을 불러온다. SPM_MODEL = "./chatbot_model.model" sp = spm.SentencePieceProcessor() sp.Load(SPM_MODEL) # 워드 임베딩 레이어. Encoder와 decoder에서 공동으로 사용한다. K.clear_session() wordEmbedding = Embedding(input_dim=VOCAB_SIZE, output_dim=EMB_SIZE) # Encoder # ------- encoderX = Input(batch_shape=(None, MAX_LEN)) encEMB = wordEmbedding(encoderX) encLSTM1 = LSTM(LSTM_HIDDEN, return_sequences=True, return_state = True) encLSTM2 = LSTM(LSTM_HIDDEN, return_state = True) ey1, eh1, ec1 = encLSTM1(encEMB) # LSTM 1층 _, eh2, ec2 = encLSTM2(ey1) # LSTM 2층 # Decoder # ------- # Decoder는 1개 단어씩을 입력으로 받는다. decoderX = Input(batch_shape=(None, 1))
df = pd.DataFrame({'input': input, 'output': output}) print(df) # 결측값 제거(Nan값 있는 행 제거) df.dropna(axis=0, inplace=True) # 데이터셋 구성 X_train = np.array(df['input']).reshape(-1, 1) y_train = np.array(df['output']).reshape(-1, 1) vocab_size = len( word2idx ) + 1 # word2idx가 0부터 시작했다면 +1을 안해줘도 됨. Embedding layer는 0부터 처리하므로 +1을 해준다. X_train.shape x_input = Input(batch_shape=(None, X_train.shape[1])) hidden = Embedding(input_dim=vocab_size, output_dim=32)(x_input) hidden = Flatten()(hidden) y_output = Dense(vocab_size, activation='softmax')(hidden) model = Model(x_input, y_output) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.01)) # word --> word2vec을 확인하기 위한 모델 (predict용 모델) model_w = Model(x_input, hidden) model.summary() hist = model.fit(X_train, y_train, epochs=30,
from tensorflow.keras.datasets import imdb from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Flatten, Dense, Embedding ''' 情感预测: 结果分为正负两种结果 ''' max_features = 10000 maxlen = 20 (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) # embedding_layer = layers.Embedding(1000, 64) x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen) x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen) model = Sequential() model.add(Embedding(10000, 8, input_length=maxlen)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) model.summary() history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2) print('history:', history)
def Seq2Seq(c): # Variables latent_dim = 1024 # Inputs encoder_inputs = tf.keras.Input(shape=(1, ), dtype=tf.string, name="e_input") # Passage processing vectorize = c.get("vectorizer") embed = Embedding(c.get("vocab_size") + 1, c.get("embedding_dim"), mask_zero=True) encoder_vectorize = vectorize(encoder_inputs) encoder_embed = embed(encoder_vectorize) # Encoder - keep the states encoder = LSTM(latent_dim, name="encoder_lstm", return_state=True) _, state_h, state_c = encoder(encoder_embed) encoder_states = [state_h, state_c] # Set up the decoder with the encoder states as initial decoder_inputs = tf.keras.Input(shape=(1, ), dtype=tf.string, name="d_input") # Vectorize and embed decoder_vectorize = vectorize(decoder_inputs) decoder_embed = embed(decoder_vectorize) truncate = TruncateLayer(c.get("max_answer_len")) decoder_truncate = truncate(decoder_embed) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, name="lstm", return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_truncate, initial_state=encoder_states) decoder_dense = Dense(c.get("vocab_size"), activation="softmax") decoder_outputs = decoder_dense(decoder_outputs) # Initialize and return the model model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() # INFERENCE # Encoder submodel needed for inference encoder_inf_model = tf.keras.Model(encoder_inputs, encoder_states) # DECODER SUBMODEL # Decoder submodel inputs decoder_state_input_h = tf.keras.Input(shape=(latent_dim, )) decoder_state_input_c = tf.keras.Input(shape=(latent_dim, )) decoder_state_inputs = [decoder_state_input_h, decoder_state_input_c] # Decoder submodel definition and steps to output decoder_outputs, state_h, state_c = decoder_lstm( truncate(embed(vectorize(decoder_inputs))), initial_state=decoder_state_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_inf_model = tf.keras.Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states) # Return all models return model, encoder_inf_model, decoder_inf_model
training_labels = lab_encoder.transform(training_labels) vocab_size = 1000 embedding_dim = 16 max_len = 20 oov_token = "<OOV>" tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token) tokenizer.fit_on_texts(training_sentences) word_index = tokenizer.word_index sequences = tokenizer.texts_to_sequences(training_sentences) padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len) model = Sequential() model.add(Embedding(vocab_size, embedding_dim, input_length=max_len)) model.add(GlobalAveragePooling1D()) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() epochs = 500 history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs) # to save the trained model model.save("chatbot_model")
def GCN( loss='MSE', num_objects=80, num_relation=3, embed_dim=64, Din=128, H=512, Dout=128, batch_size=1, mask_size=16, num_rooms=35, lr=1e-4, ): num_edges = int(num_rooms * (num_rooms - 1) / 2) input_o = Input(shape=num_rooms, dtype=tf.int32, batch_size=batch_size) input_p = Input(shape=num_edges, dtype=tf.float32, batch_size=batch_size) input_t = Input(shape=(num_edges, 2), dtype=tf.int32, batch_size=batch_size) box_gt = Input(shape=(num_rooms, 4), dtype=tf.float32, batch_size=batch_size) mask_gt = Input(shape=(num_rooms, mask_size, mask_size), dtype=tf.int32, batch_size=batch_size) #Embedding to dense vectors embedding_o = Embedding(input_dim=num_objects, output_dim=embed_dim, input_length=num_rooms, mask_zero=True)(input_o) embedding_p = Embedding(input_dim=num_relation, output_dim=embed_dim, input_length=num_edges, mask_zero=True)(input_p) #Graph Convolutions new_s_obj, new_p_obj = GraphTripleConvNet(input_dim=Din, hidden_dim=H, batch_size=batch_size)( embedding_o, embedding_p, input_t) #box and mask nets to get scene layout output_box = box_net(gconv_dim=Dout)(new_s_obj) output_mask = Mask_regression(num_chan=Dout, mask_size=mask_size)(new_s_obj) output_rel = rel_aux_net(gconv_out=Dout, gconv_hidden_dim=H, out_dim=num_relation, batch_size=batch_size)(embedding_o, output_box, input_t) model = Model([input_o, input_p, input_t, box_gt, mask_gt], [output_box, output_mask, output_rel]) model.add_loss( total_loss(box_gt, mask_gt, input_p, output_box, output_mask, output_rel, loss)) model.compile(optimizer=optimizers.Adam(learning_rate=lr)) return model
max_len = max([len(i) for i in text]) print(f"Max length of a review: {max_len}") #Build pad sequence using max length padded_text = pad_sequences(text, maxlen=max_len, padding=padding, truncating=truncating) #Preprocess the labels so that they can be processed by the model label = pad_sequences(np.expand_dims(np.array(label), axis=-1), maxlen=1) #Build a model - Embedding layer should the first layer, followed by RNNs model = Sequential() model.add(Embedding(VOCAB_SIZE, EMBED_SIZE, input_length=max_len)) model.add(GRU(16)) model.add(Dense(32, activation='relu')) model.add(Dense(1, activation='sigmoid')) #Model compile and print summary model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # Model train implement earlystopping callback early_stopping = EarlyStopping('val_accuracy', patience=4) history = model.fit(padded_text, label,
y_val = np.delete(y_val, ind, axis=0) x_val = np.delete(x_val, ind, axis=0) # # Model building from tensorflow.keras import backend as K K.clear_session() latent_dim = 300 embedding_dim = 100 # Encoder encoder_inputs = Input(shape=(max_text_len, )) #embedding layer enc_emb = Embedding(x_voc, embedding_dim, trainable=True)(encoder_inputs) #encoder lstm 1 encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) #encoder lstm 2 encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) #encoder lstm 3
def make_embedding_layer(self, inputs=None): '''bert4keras中:inputs是tensor列表,暂时只考虑token和segment,不考虑position''' '''自己版本:仅构造网络层,不提供inputs,但inputs结构和bert4keras完全相同''' self.layers = self.layers or {} #build调用,创建所有的Eebedding层 if inputs == None: #Embedding-Token层,就是一个Embedding层,在MLM预测输出是共享使用(不用重新定义) self.layers['Embedding-Token'] = ShareEmbedding( input_dim=self.vocab_size, output_dim=self.hidden_size, embeddings_initializer=self.initializer, mask_zero=True, name='Embedding-Token') #Embedding-Segment层,Embedding层 self.layers['Embedding-Segment'] = Embedding( input_dim=2, output_dim=self.hidden_size, embeddings_initializer=self.initializer, mask_zero=True, name='Embedding-Segment') #Token + Segment self.layers['Embedding-Token-Segment'] = Add( name='Embedding-Token-Segment') #Embedding-Position层,参数可供训练 self.layers['Embedding-Position'] = PositionEmbedding( input_dim=self.max_position_embeddings, output_dim=self.hidden_size, name='Embedding-Position') #构建好所有的Embedding后的LayerNormalization层 self.layers['Embedding-Norm'] = LayerNormalization( name='Embedding-Norm') #经过一个Dropout self.layers['Embedding-Dropout'] = Dropout( rate=self.hidden_dropout_prob, name='Embedding-Dropout') #call调用 else: token_input, segment_input = inputs[:2] #生成Token词嵌入, token_x = self.layers['Embedding-Token'](token_input) #生成Segment词嵌入 segment_x = self.layers['Embedding-Segment'](segment_input) #将Token和Segment相加 x = self.layers['Embedding-Token-Segment']([token_x, segment_x]) #生成Position词嵌入,并加上Token和Segment词嵌入,到此处都一样 x = self.layers['Embedding-Position'](x) #经过LayerNormalization层 x = self.layers['Embedding-Norm'](x) #经过Dropout层 x_embeddings = self.layers['Embedding-Dropout'](x) return x_embeddings
X = tokenizer.texts_to_sequences(df['text'].values) X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH) print('Shape of data tensor:', X.shape) Y = pd.get_dummies(df['label']).values print('Shape of label tensor:', Y.shape) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.10, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) model = Sequential() model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1])) model.add(SpatialDropout1D(0.2)) model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(6, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) epochs = 1 #10 batch_size = 64 history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,
def define_models(self, embedding_matrix_input, embedding_matrix_target, max_length_text, max_length_summary): """Training Phase""" # Encoder encoder_inputs = Input(shape=(max_length_text, )) enc_emb = Embedding(self.x_vocab_size, self.embedding_dim, weights=[embedding_matrix_input], input_length=max_length_text, trainable=False)(encoder_inputs) encoder = Bidirectional( LSTM(self.latent_dim, return_sequences=True, return_state=True, dropout=0.3, recurrent_dropout=0.3)) encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder( enc_emb) state_h = Concatenate()([forward_h, backward_h]) state_c = Concatenate()([forward_c, backward_c]) # Decoder decoder_inputs = Input(shape=(None, )) dec_emb_layer = Embedding(self.y_vocab_size, self.embedding_dim, weights=[embedding_matrix_target], input_length=max_length_summary, trainable=False) dec_emb = dec_emb_layer(decoder_inputs) decoder_lstm = LSTM(2 * self.latent_dim, return_sequences=True, return_state=True, dropout=0.3, recurrent_dropout=0.3) decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c]) # Attention attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concatenate the context vectors with the decoder outpouts decoder_concat = Concatenate()([decoder_outputs, attn_out]) # Dense decoder_dense = TimeDistributed( Dense(self.y_vocab_size, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat) # model trainer_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs) """Inference Phase""" # Encoder encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, state_h, state_c]) # Decoder decoder_state_input_h = Input(shape=(2 * self.latent_dim, )) decoder_state_input_c = Input(shape=(2 * self.latent_dim, )) decoder_hidden_state_input = Input(shape=(max_length_text, 2 * self.latent_dim)) dec_emb2 = dec_emb_layer(decoder_inputs) decoder_outputs2, state_h2, state_c2 = decoder_lstm( dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c]) # Attention attn_out_inf, attn_states_inf = attn_layer( [decoder_hidden_state_input, decoder_outputs2]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_outputs2, attn_out_inf]) # Dense decoder_outputs2 = decoder_dense(decoder_inf_concat) decoder_model = Model([decoder_inputs] + [ decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c ], [decoder_outputs2] + [state_h2, state_c2]) return trainer_model, encoder_model, decoder_model
corpus.append(review) corpus[:5] onehot_repr = [one_hot(words, voc_size) for words in corpus] onehot_repr[:5] # Embedding Representation sent_len = 20 embedded_docs = pad_sequences(onehot_repr, padding = 'pre', maxlen = sent_len) embedded_docs[:5] # Creating Model embedded_vector_features = 50 model = Sequential() model.add(Embedding(voc_size, embedded_vector_features, input_length = sent_len)) model.add(Dropout(0.3)) model.add(LSTM(100)) model.add(Dropout(0.3)) model.add(Dense(1, activation = 'sigmoid')) model.compile('adam', loss = 'binary_crossentropy', metrics = ['accuracy']) model.summary() X = np.asarray(embedded_docs) y = np.asarray(y) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y) # Model training
def create_model(log, output_folder, epochs, early_stop): from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint from tensorflow.keras.layers import Input, Embedding, Dropout, Concatenate, LSTM, Dense, BatchNormalization from tensorflow.keras.models import Model, load_model from tensorflow.keras.optimizers import Nadam vec = vectorization(log) vocab_act_size = len(log.values["event"]) + 1 vocab_role_size = len(log.values["role"]) + 1 # Create embeddings + Concat act_input = Input(shape=(vec['prefixes']['x_ac_inp'].shape[1],), name="act_input") role_input = Input(shape=(vec['prefixes']['x_rl_inp'].shape[1],), name="role_input") act_embedding = Embedding(vocab_act_size, 100, input_length=vec['prefixes']['x_ac_inp'].shape[1],)(act_input) act_dropout = Dropout(0.2)(act_embedding) act_e_lstm_1 = LSTM(32, return_sequences=True)(act_dropout) act_e_lstm_2 = LSTM(100, return_sequences=True)(act_e_lstm_1) role_embedding = Embedding(vocab_role_size, 100, input_length=vec['prefixes']['x_rl_inp'].shape[1],)(role_input) role_dropout = Dropout(0.2)(role_embedding) role_e_lstm_1 = LSTM(32, return_sequences=True)(role_dropout) role_e_lstm_2 = LSTM(100, return_sequences=True)(role_e_lstm_1) concat1 = Concatenate(axis=1)([act_e_lstm_2, role_e_lstm_2]) normal = BatchNormalization()(concat1) act_modulator = Modulator(attr_idx=0, num_attrs=1, time=log.k)(normal) role_modulator = Modulator(attr_idx=1, num_attrs=1, time=log.k)(normal) # Use LSTM to decode events act_d_lstm_1 = LSTM(100, return_sequences=True)(act_modulator) act_d_lstm_2 = LSTM(32, return_sequences=False)(act_d_lstm_1) role_d_lstm_1 = LSTM(100, return_sequences=True)(role_modulator) role_d_lstm_2 = LSTM(32, return_sequences=False)(role_d_lstm_1) act_output = Dense(vocab_act_size, name="act_output", activation='softmax')(act_d_lstm_2) role_output = Dense(vocab_role_size, name="role_output", activation="softmax")(role_d_lstm_2) model = Model(inputs=[act_input, role_input], outputs=[act_output, role_output]) opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3) model.compile(loss={'act_output': 'categorical_crossentropy', 'role_output': 'categorical_crossentropy'}, optimizer=opt) model.summary() output_file_path = os.path.join(output_folder, 'model_{epoch:03d}-{val_loss:.2f}.h5') # Saving model_checkpoint = ModelCheckpoint(output_file_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto') early_stopping = EarlyStopping(monitor='val_loss', patience=early_stop) model.fit({'act_input':vec['prefixes']['x_ac_inp'], 'role_input':vec['prefixes']['x_rl_inp']}, {'act_output':vec['next_evt']['y_ac_inp'], 'role_output':vec['next_evt']['y_rl_inp']}, validation_split=0.2, verbose=2, batch_size=5, callbacks=[early_stopping, model_checkpoint], epochs=epochs) return model
from tensorflow.keras.preprocessing import sequence from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM seed = 10 np.random.seed(seed) # 指定亂數種子 # 載入 IMDb 資料集 top_words = 1000 (X_train, Y_train), (X_test, Y_test) = imdb.load_data(num_words=top_words) # 資料預處理 max_words = 100 X_train = sequence.pad_sequences(X_train, maxlen=max_words) X_test = sequence.pad_sequences(X_test, maxlen=max_words) # 定義模型 model = Sequential() model.add(Embedding(top_words, 32, input_length=max_words)) model.add(Dropout(0.25)) model.add(LSTM(32)) model.add(Dropout(0.25)) model.add(Dense(1, activation="sigmoid")) model.summary() # 顯示模型摘要資訊 # 編譯模型 model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"]) # 訓練模型 history = model.fit(X_train, Y_train, validation_split=0.2, epochs=5, batch_size=128,
def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) self.mode = 'train' self.input_ = {} def _emb(vocab_name, emb_name=None): return util.create_emb(vocab_name, emb_name) self.uemb = _emb('uid') self.demb = _emb('did') self.cat_emb = _emb('cat') self.scat_emb = _emb('sub_cat') self.entity_emb = _emb('entity') self.entity_type_emb = _emb('entity_type') if not FLAGS.bert_dir or not FLAGS.bert_only: self.word_emb = _emb('word') self.hour_emb = Embedding(24, FLAGS.emb_size, name='hour_emb') self.weekday_emb = Embedding(7, FLAGS.emb_size, name='weekday_emb') self.fresh_hour_emb = Embedding(300, FLAGS.emb_size, name='fresh_hour_emb') # 7 * 24 self.fresh_day_emb = Embedding(50, FLAGS.emb_size, name='fresh_day_emb') self.position_emb = Embedding(300, FLAGS.emb_size, name='position_emb') # self.title_lookup = mt.layers.LookupArray(FLAGS.title_lookup, name='title_lookup') self.doc_lookup = mt.layers.LookupArray(FLAGS.doc_lookup, name='doc_lookup') if _is_ok('enti'): self.entities_encoder = Encoders([self.entity_emb, self.entity_type_emb], None, FLAGS.pooling, name='entities_encoder') self.his_entities_encoder = SeqsEncoder(self.entities_encoder, None, FLAGS.seqs_pooling, name='his_entities_encoder') if not FLAGS.bert_dir or not FLAGS.bert_only: if _is_ok('^cur_title&') or _is_ok('abstract') or _is_ok('body'): if FLAGS.share_words_encoder: words_encoder = Encoder(self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='words_encoder') else: words_encoder = None if _is_ok('^cur_title&'): self.title_encoder = words_encoder or Encoder(self.word_emb, FLAGS.seqs_encoder, FLAGS.pooling, name='title_encoder') self.titles_encoder = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='titles_encoder') self.titles_encoder2 = SeqsEncoder(self.title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling2, name='titles_encoder2') if _is_ok('^abstract&'): self.abstract_encoder = words_encoder or Encoder(self.word_emb, FLAGS.seqs_encoder,name='abstract_encoder') self.abstracts_encoder = SeqsEncoder(self.abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='abstracts_encoder') if _is_ok('^body&'): self.body_encoder = words_encoder or Encoder(self.word_emb, None, FLAGS.pooling, name='body_encoder') self.bodies_encoder = SeqsEncoder(self.body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bodies_encoder') if FLAGS.bert_dir: # tpu 会做静态检查 bert_encoder会认为没有weight失败 # Weights for model bert_encoder have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape` tpu # max_input_length = None if not gezi.get('tpu') else FLAGS.max_bert_input_length max_input_length = None bert_encoder = mt.models.Bert(FLAGS.bert_dir, FLAGS.emb_size, max_input_length=max_input_length, return_sequences=FLAGS.bert_pooling_seqs, name='bert_encoder') self.bert_title_encoder = bert_encoder self.bert_abstract_encoder = bert_encoder self.bert_body_encoder = bert_encoder if FLAGS.bert_pooling_seqs: if FLAGS.share_words_encoder: bert_words_encoder = Encoder(None, bert_encoder, FLAGS.pooling, name='words_encoder') else: bert_words_encoder = None if _is_ok('bert_title'): self.bert_title_encoder = bert_words_encoder or Encoder(None, bert_encoder, FLAGS.pooling, name='bert_title_encoder') if _is_ok('bert_abstract'): self.bert_abstract_encoder = bert_words_encoder or Encoder(None, bert_encoder, FLAGS.pooling, name='bert_abstract_encoder') if _is_ok('bert_body'): self.bert_body_encoder = bert_words_encoder or Encoder(None, bert_encoder, FLAGS.pooling, name='bert_body_encoder') if _is_ok('bert_title'): self.bert_titles_encoder = SeqsEncoder(self.bert_title_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_titles_encoder') if _is_ok('bert_abstract'): self.bert_abstracts_encoder = SeqsEncoder(self.bert_abstract_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_abstracts_encoder') if _is_ok('bert_body'): self.bert_bodies_encoder = SeqsEncoder(self.bert_body_encoder, FLAGS.seqs_encoder, FLAGS.seqs_pooling, name='bert_bodies_encoder') self.sum_pooling = mt.layers.SumPooling() self.mean_pooling = mt.layers.MeanPooling() self.pooling = mt.layers.Pooling(FLAGS.pooling) self.feat_pooling = mt.layers.Pooling(FLAGS.feat_pooling, name='feat_pooling') self.his_simple_pooling = mt.layers.Pooling(FLAGS.his_simple_pooling) # self.his_entity_pooling = mt.layers.Pooling('att', name='his_entity_pooling') self.his_entity_pooling = util.get_att_pooling('din', name='his_entity_pooling') self.his_cat_pooling = mt.layers.Pooling('att', name='his_cat_pooling') self.his_scat_din_pooling = util.get_att_pooling('din', name='his_scat_din_pooling') self.dense = Dense(1) if not FLAGS.use_multi_dropout else mt.layers.MultiDropout(1, drop_rate=0.3) self.batch_norm = BatchNormalization() self.dropout = keras.layers.Dropout(FLAGS.dropout) # --arch-mlp-bot="13-512-256-64-16" --arch-mlp-top="512-256-1" activation = FLAGS.activation mlp_dims = [FLAGS.emb_size * 2, FLAGS.emb_size] if not FLAGS.big_mlp else [FLAGS.emb_size * 4, FLAGS.emb_size * 2, FLAGS.emb_size] self.dense_mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, name='dense_mlp') mlp_dims = [512, 256, 64] if not FLAGS.big_mlp else [1024, 512, 256] self.mlp = mt.layers.MLP(mlp_dims, activation=activation, drop_rate=FLAGS.mlp_dropout, batch_norm=FLAGS.mlp_bn, name='mlp') self.his_encoder = util.get_encoder(FLAGS.his_encoder) self.his_dense = keras.layers.Dense(FLAGS.hidden_size) self.his_pooling = util.get_att_pooling(FLAGS.his_pooling) self.his_pooling2 = util.get_att_pooling(FLAGS.his_pooling2) self.cur_dense = keras.layers.Dense(FLAGS.hidden_size) if FLAGS.his_strategy.startswith('bst'): self.transformer = mt.layers.transformer.Encoder(num_layers=1, d_model=FLAGS.hidden_size, num_heads=FLAGS.num_heads, dff=FLAGS.hidden_size, maximum_position_encoding=None, activation=FLAGS.transformer_activation, rate=FLAGS.transformer_dropout) self.fusion = mt.layers.SemanticFusion(drop_rate=0.1) if FLAGS.feat_pooling == 'cin': from deepctr.layers.interaction import CIN self.cin = CIN((128, 128,), 'relu', True, 0, 1024) self.feat_pooling = self.cin if FLAGS.aux_loss_rate or FLAGS.lm_target: vsize = gezi.get('vocab_sizes')['vid'][0] # hidden_size = FLAGS.hidden_size if FLAGS.his_encoder in ['lstm', 'gru'] else int(FLAGS.hidden_size / 2) hidden_size = int(FLAGS.hidden_size / 2) self.sampled_weight = self.add_weight(name='sampled_weight', shape=(vsize, hidden_size), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.sampled_bias = self.add_weight(name='sampled_bias', shape=(vsize,), #initializer = keras.initializers.RandomUniform(minval=-10, maxval=10, seed=None), dtype=tf.float32, trainable=True) self.softmax_loss_function = mt.seq2seq.gen_sampled_softmax_loss_function(5, vsize, weights=self.sampled_weight, biases=self.sampled_bias, log_uniform_sample=True, is_predict=False, sample_seed=1234)
def keras_word_embedding_updown(training_data, testing_data, training_class, testing_class, embedding_dimension=None, model_ex='simple', updown=True, save_path='Models'): # create tokenizer to generate training and testing tokens for later use tokens = Tokenizer() total_text = training_data + testing_data tokens.fit_on_texts(total_text) # get the max len of any of the string such that they can be padded with zeros max_token_length = max([len(strings.split()) for strings in total_text]) # num words in the vocab of the corpus vocab_size = len(tokens.word_index) + 1 # convert training and testing strings to tokens training_data_tokens = tokens.texts_to_sequences(training_data) testing_data_tokens = tokens.texts_to_sequences(testing_data) # pads the training and testing data with zeros to make all the same length # pads with zeros at the end of the data training_data_tokens_pad = pad_sequences(training_data_tokens, maxlen=max_token_length, padding='post') testing_data_tokens_pad = pad_sequences(testing_data_tokens, maxlen=max_token_length, padding='post') if embedding_dimension is None: embedding_dimension = 100 # if it is a classification of a binary, which updown is if updown: training_class, testing_class = updown_to_1_0(training_class, testing_class) model = Sequential() model.add( Embedding(vocab_size, embedding_dimension, input_length=max_token_length)) # GRU does not have a dropout because of a bug in tensor 2.0 which doesn't allow a gru with dropout to be saved if model_ex == 'simple': # create a word embedding model model.add( GRU(units=embedding_dimension, dropout=0, recurrent_dropout=0)) model.add(Dense(1, activation='sigmoid')) # Learning function for that model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # That it is a 100% accuracy, something broke elif model_ex == 'relu': # create word embedding model with close model.add(GRU(units=embedding_dimension)) model.add(Dense(units=50, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Learning function for that model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = ModelCheckpoint(save_path, save_best_only=True, verbose=1) model.fit(training_data_tokens_pad, training_class, batch_size=64, epochs=15, verbose=2, validation_data=(testing_data_tokens_pad, testing_class), callbacks=[callbacks]) return model
for i in range(1, len(token_list)): n_gram_sequence = token_list[:i+1] input_sequences.append(n_gram_sequence) # pad sequences max_sequence_len = max([len(x) for x in input_sequences]) input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')) # create predictors and label predictors, label = input_sequences[:,:-1],input_sequences[:,-1] label = ku.to_categorical(label, num_classes=total_words) model = Sequential() model.add(Embedding(total_words, 100, input_length=max_sequence_len-1)) model.add(Bidirectional(LSTM(150, return_sequences = True))) model.add(Dropout(0.2)) model.add(LSTM(100)) model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dense(total_words, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) history = model.fit(predictors, label, epochs=100, verbose=1) import matplotlib.pyplot as plt acc = history.history['acc'] loss = history.history['loss']
# padding and truncating data x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0) x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0) # model specification model = Sequential() model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length)) model.add(SpatialDropout1D(drop_embed)) model.add(Conv1D(n_conv, k_conv, activation='relu')) model.add(GlobalMaxPooling1D()) model.add(Dense(n_dense, activation='relu')) model.add(Dropout(dropout)) model.add(Dense(1, activation='relu')) # model summary model.summary() # mdoel compilation model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
y_tr = y_tokenizer.texts_to_sequences(y_tr) y_val = y_tokenizer.texts_to_sequences(y_val) #padding zero upto maximum length y_tr = pad_sequences(y_tr, maxlen=max_len_summary, padding='post') y_val = pad_sequences(y_val, maxlen=max_len_summary, padding='post') y_voc_size = len(y_tokenizer.word_index) + 1 from keras import backend as K K.clear_session() latent_dim = 500 # Encoder encoder_inputs = Input(shape=(max_len_text, )) enc_emb = Embedding(x_voc_size, latent_dim, trainable=True)(encoder_inputs) #LSTM 1 encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb) #LSTM 2 encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) #LSTM 3 encoder_lstm3 = LSTM(latent_dim, return_state=True, return_sequences=True) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) # Set up the decoder. decoder_inputs = Input(shape=(None, ))
def __init__(self, user_num: int, item_num: int, hidden_dim: int) -> None: super(MF, self).__init__() self.user_vector = Embedding(input_dim=user_num, output_dim=hidden_dim) self.item_vector = Embedding(input_dim=item_num, output_dim=hidden_dim) self.user_bias = Embedding(input_dim=user_num, output_dim=1) self.item_bias = Embedding(input_dim=item_num, output_dim=1)
import pickle import sys import os import pickle from objects_for_training import * # embedding matrix with pretrained GloVe representations with open('./embedding_matrix.p', 'rb') as file: embedding_matrix = pickle.load(file) pretrained_embedding_layer = Embedding(input_dim=input_dim, output_dim=embeddings_dim, weights=[embedding_matrix], input_length=dict_to_export['max_len'], embeddings_initializer=None, trainable=True) model = Sequential() model.add(pretrained_embedding_layer) model.add(Conv1D(filters=256, kernel_size=3, padding='same', activation='tanh')) model.add(MaxPooling1D(3)) model.add(Conv1D(filters=256, kernel_size=3, padding='same', activation='tanh')) model.add(MaxPooling1D(3)) model.add(Conv1D(filters=128, kernel_size=2, padding='same', activation='relu')) model.add(MaxPooling1D(2)) model.add(Flatten())
def get_gender_model(DATA): feed_forward_size = 2048 max_seq_len = 100 model_dim = 128*6 input_creative_id = Input(shape=(max_seq_len,), name='creative_id') x1 = Embedding(input_dim=NUM_creative_id+1, output_dim=128, weights=[DATA['creative_id_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_creative_id) # encodings = PositionEncoding(model_dim)(x1) # encodings = Add()([embeddings, encodings]) input_ad_id = Input(shape=(max_seq_len,), name='ad_id') x2 = Embedding(input_dim=NUM_ad_id+1, output_dim=128, weights=[DATA['ad_id_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_ad_id) input_product_id = Input(shape=(max_seq_len,), name='product_id') x3 = Embedding(input_dim=NUM_product_id+1, output_dim=128, weights=[DATA['product_id_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_product_id) input_advertiser_id = Input(shape=(max_seq_len,), name='advertiser_id') x4 = Embedding(input_dim=NUM_advertiser_id+1, output_dim=128, weights=[DATA['advertiser_id_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_advertiser_id) input_industry = Input(shape=(max_seq_len,), name='industry') x5 = Embedding(input_dim=NUM_industry+1, output_dim=128, weights=[DATA['industry_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_industry) input_product_category = Input( shape=(max_seq_len,), name='product_category') x6 = Embedding(input_dim=NUM_product_category+1, output_dim=128, weights=[DATA['product_category_emb']], trainable=args.not_train_embedding, # trainable=False, input_length=100, mask_zero=True)(input_product_category) # (bs, 100, 128*2) encodings = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6]) # (bs, 100) masks = tf.equal(input_creative_id, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention(8, 96)( [encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim, feed_forward_size) ff_out = ff(attention_out) # Add & Norm # ff_out (bs, 100, 128),但是attention_out是(bs,100,256) ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_gender = Dense(2, activation='softmax', name='gender')(encodings) # output_age = Dense(10, activation='softmax', name='age')(encodings) model = Model( inputs=[input_creative_id, input_ad_id, input_product_id, input_advertiser_id, input_industry, input_product_category], outputs=[output_gender] ) model.compile( optimizer=optimizers.Adam(2.5e-4), loss={ 'gender': losses.CategoricalCrossentropy(from_logits=False), # 'age': losses.CategoricalCrossentropy(from_logits=False) }, # loss_weights=[0.4, 0.6], metrics=['accuracy']) return model
def __init__(self, K, conv_dim): super(Encoder, self).__init__() self.embedding = Embedding(symbol_length, embedding_dim) self.pre_net = pre_net() self.cbhg = CBHG(K, conv_dim)
stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ') return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation), '') vectorize_layer = TextVectorization(standardize=custom_standardization, max_tokens=vocab_size, output_mode='int', output_sequence_length=sequence_lenght) text_ds = train_ds.map(lambda x, y: x) vectorize_layer.adapt(text_ds) model = Sequential([ vectorize_layer, Embedding(vocab_size, Embedding_dim, name="embedding"), GlobalAveragePooling1D(), Dense(16, activation='relu'), Dense(1) ]) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs") model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy']) model.fit(train_ds, validation_data=val_ds, epochs=15, callbacks=[tensorboard_callback])
encoder_input = pad_sequences( encoder_input, maxlen=MAX_ENCODER_SIZE, padding='post', value=0 ) decoder_input = pad_sequences( decoder_input, maxlen=MAX_DECODER_SIZE, padding="post", value=0 ) decoder_output = pad_sequences( decoder_output, maxlen=MAX_DECODER_SIZE, padding="post", value=0 ) print( "encoder input shape", encoder_input.shape, "decoder input shape", decoder_input.shape ) #Encoder Part #pass all input through embedding layer, to get embeddings #pass all embeddings to Bi-LSTM to get all sequences of hidden states (h1...htx) LATENT_DIM_EN = 50 #M1 LATENT_DIM_DE = 60 #M2 encoder_inp = Input( shape=(MAX_ENCODER_SIZE,) ) #(_,Tx) encoder_embedding = Embedding( ENCODER_VOCAB_SIZE, ENCODER_EMBEDDING_DIM, weights=[embedding_matrix], trainable=False ) embeddings_en = encoder_embedding( encoder_inp ) #(_,Tx, ENCODER_EMBEDDING_DIM) encoder_bilstm = Bidirectional( LSTM( LATENT_DIM_EN, return_sequences=True, dropout=0.1, recurrent_dropout=0.1 ) ) hidden_states = encoder_bilstm( embeddings_en ) #(_,Tx, 2*M1) #Attention Part #Repear s(t-1) using repeae vector #concatenate s(t-1) with each hidden state h_t #Pass it though a neurel network with output of one neuron #apply softmax over time axis, other wise it alphas will be one #get weigher hidden states (when we multiple alpha with hidden state) #sum all weighted hidden state this is context #last 2 steps can be achieved by dot product over axis=1 def softmax_over_time(x): #(softmax on time axis instead of axis=-1) e = K.exp( x - K.max(x, axis=1, keepdims=True) )
# print(tweet[0]) print(encoded_docs[0]) # print(padded_sequence[0]) # Build the model from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense, Dropout from tensorflow.keras.layers import SpatialDropout1D from tensorflow.keras.layers import Embedding import tensorflow as tf import datetime embedding_vector_length = 32 model = Sequential() model.add(Embedding(vocab_size, embedding_vector_length, input_length=200)) model.add(SpatialDropout1D(0.25)) model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5)) model.add(Dropout(0.2)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) log_dir = "./logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) history = model.fit(padded_sequence, sentiment_label[0],
from tensorflow.keras.utils import plot_model from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.callbacks import TensorBoard from tensorflow.keras.optimizers import SGD, Adam from tensorflow.keras.losses import sparse_categorical_crossentropy import pickle import numpy as np import os from dataset import ptb import math # ハイパーパラメータの設定 batch_size = 20 wordvec_size = 100 hidden_size = 100 # RNNの隠れ状態ベクトルの要素数 time_size = 35 # RNNを展開するサイズ lr = 20.0 max_epoch = 4 max_grad = 0.25 input = Input(batch_shape=(batch_size, None)) output = Embedding(vocab_size, wordvec_size)(input) output = LSTM(hidden_size, return_sequences=True, stateful=True, )(output) output = Dense(vocab_size)(output) model = Model(input, output)
"""#Compile LSTM""" from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector,\ Activation, Flatten, Reshape, concatenate, Dropout, BatchNormalization from tensorflow.keras.optimizers import Adam, RMSprop from tensorflow.keras import Input, layers from tensorflow.keras import optimizers from tensorflow.keras.layers import add inputs1 = Input(shape=(OUTPUT_DIM,)) fe1 = Dropout(0.5)(inputs1) fe2 = Dense(256, activation='relu')(fe1) inputs2 = Input(shape=(max_length,)) se1 = Embedding(vocab_size, Glove, mask_zero=True)(inputs2) se2 = Dropout(0.5)(se1) se3 = LSTM(256)(se2) decoder1 = add([fe2,se3]) decoder2 = Dense(256, activation='relu')(decoder1) outputs = Dense(vocab_size, activation='softmax')(decoder2) caption_model = Model(inputs=[inputs1,inputs2], outputs=outputs) caption_model.layers[2].set_weights([embedding_matrix]) caption_model.layers[2].trainable = False caption_model.compile(loss='categorical_crossentropy', optimizer='adam') """#Create Callback""" import tensorflow as tf
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1) num_classes = len(data.label.unique()) embedding_dim = 750 input_length = 150 lstm_units = 130 lstm_dropout = 0.1 recurrent_dropout = 0.1 spatial_dropout = 0.2 filters = 64 kernel_size = 3 print(num_classes) input_layer = Input(shape=(input_length, )) output_layer = Embedding(input_dim=input_dim, output_dim=embedding_dim, input_shape=(input_length, ))(input_layer) output_layer = SpatialDropout1D(spatial_dropout)(output_layer) output_layer = Bidirectional( LSTM(lstm_units, return_sequences=True, dropout=lstm_dropout, recurrent_dropout=recurrent_dropout))(output_layer) output_layer = Conv1D(filters, kernel_size=kernel_size, padding='valid', kernel_initializer='glorot_uniform')(output_layer) avg_pool = GlobalAveragePooling1D()(output_layer)