embed_index_input = Input(shape=(step_length,)) embedding = Embedding(emb_vocab+2, emb_length, weights=[word_embedding], mask_zero=True, input_length=step_length)(embed_index_input) hash_index_input = Input(shape=(step_length,)) encoder_embedding = Embedding(hash_vocab+2, hash_length, weights=[hash_embedding], mask_zero=True, input_length=step_length)(hash_index_input) pos_input = Input(shape=(step_length, pos_length)) senna_hash_pos_merge = merge([embedding, encoder_embedding, pos_input], mode='concat') input_mask = Masking(mask_value=0)(senna_hash_pos_merge) dp_1 = Dropout(0.5)(input_mask) hidden_1 = Bidirectional(LSTM(128, return_sequences=True))(dp_1) hidden_2 = Bidirectional(LSTM(64, return_sequences=True))(hidden_1) dp_2 = Dropout(0.5)(hidden_2) output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2) model = Model(input=[embed_index_input,hash_index_input,pos_input], output=output) sgd = SGD(lr=0.05, momentum=0.9, decay=1e-6, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) print(model.summary()) number_of_train_batches = int(math.ceil(float(train_samples)/batch_size)) number_of_dev_batches = int(math.ceil(float(dev_samples)/batch_size))
def build_doc_scorer(self, r_query_idf, permute_idxs): p = self.p ng_fsizes = self.NGRAM_NFILTER maxpool_poses = self._cascade_poses() filter_sizes = list() added_fs = set() for ng in sorted(ng_fsizes): # n-gram in input for n_x, n_y in ng_fsizes[ng]: dim_name = self._get_dim_name(n_x, n_y) if dim_name not in added_fs: filter_sizes.append((n_x, n_y)) added_fs.add(dim_name) re_input, cov_sim_layers, pool_sdim_layer, pool_sdim_layer_context, pool_filter_layer, ex_filter_layer, re_lq_ds = \ self._cov_dsim_layers(p['simdim'], p['maxqlen'], filter_sizes, p['nfilter'], top_k=p['kmaxpool'], poses=maxpool_poses, selecter=p['distill']) query_idf = Reshape( (p['maxqlen'], 1))(Activation('softmax', name='softmax_q_idf')(Flatten()(r_query_idf))) if p['combine'] < 0: raise RuntimeError( "combine should be 0 (LSTM) or the number of feedforward dimensions" ) elif p['combine'] == 0: doc_score_layer = LSTM(1, dropout=0.0, recurrent_regularizer=None, recurrent_dropout=0.0, unit_forget_bias=True, \ name="lstm_merge_score_idf", recurrent_activation="hard_sigmoid", bias_regularizer=None, \ activation="tanh", recurrent_initializer="orthogonal", kernel_regularizer=None, kernel_initializer="glorot_uniform") else: if not p['td']: dout = Dense(1, name='dense_output') d1 = Dense(p['combine'], activation='relu', name='dense_1') d2 = Dense(p['combine'], activation='relu', name='dense_2') doc_score_layer = lambda x: dout(d1(d2(x))) else: extra_features_layer = Dense(1, name='q_scores_dense_output', use_bias=True) dout = Dense(1, name='dense_output') d1 = Dense(p['combine'], activation='relu', name='dense_1') d2 = TimeDistributed( Dense(p['combine'], activation='relu', name='dense_2'), input_shape=(p['maxqlen'], p['kmaxpool'] * p['winlen'] + 1)) doc_score_layer = lambda x: dout(d1(d2((x)))) def _permute_scores(inputs): scores, idxs = inputs return tf.gather_nd(scores, backend.cast(idxs, 'int32')) self.vis_out = None self.visout_count = 0 def _scorer(doc_inputs, dataid): self.visout_count += 1 self.vis_out = {} doc_qts_scores = [query_idf] for ng in sorted(ng_fsizes): if p['distill'] == 'firstk': input_ng = max(ng_fsizes) else: input_ng = ng for n_x, n_y in ng_fsizes[ng]: dim_name = self._get_dim_name(n_x, n_y) if n_x == 1 and n_y == 1: doc_cov = doc_inputs[input_ng] re_doc_cov = doc_cov else: doc_cov = cov_sim_layers[dim_name](re_input( doc_inputs[input_ng])) re_doc_cov = re_lq_ds[dim_name]( pool_filter_layer[dim_name](Permute( (1, 3, 2))(doc_cov))) self.vis_out['conv%s' % ng] = doc_cov if p['context']: ng_signal = pool_sdim_layer_context[dim_name]( [re_doc_cov, doc_inputs['context']]) else: ng_signal = pool_sdim_layer[dim_name](re_doc_cov) doc_qts_scores.append(ng_signal) if len(doc_qts_scores) == 1: doc_qts_score = doc_qts_scores[0] else: doc_qts_score = Concatenate(axis=2)(doc_qts_scores) if permute_idxs is not None: doc_qts_score = Lambda(_permute_scores)( [doc_qts_score, permute_idxs]) if not p['td']: # Original PACRR architecture doc_qts_score = Flatten()(doc_qts_score) if p['use_bm25']: doc_qts_score = Concatenate(axis=1)( [doc_qts_score, doc_inputs['bm25_score']]) if p['use_overlap_features']: doc_qts_score = Concatenate(axis=1)( [doc_qts_score, doc_inputs['doc_overlap_features']]) doc_score = doc_score_layer(doc_qts_score) else: # PACRR-DRMM architecture doc_score = Flatten()(doc_score_layer(doc_qts_score)) if p['use_bm25']: doc_score = Concatenate(axis=1)( [doc_score, doc_inputs['bm25_score']]) if p['use_overlap_features']: doc_score = Concatenate(axis=1)( [doc_score, doc_inputs['doc_overlap_features']]) doc_score = extra_features_layer(doc_score) return doc_score return _scorer
model.add( LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2, input_shape=(max_len, embedding_dim))) model.add( LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2, input_shape=(max_len, embedding_dim))) model.add(TimeDistributed(Dense(n_tags, activation='softmax'))) model.compile(optimizer="Nadam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) model.summary() history = model.fit_generator(train_gen, epochs=4, verbose=1, validation_data=validation_gen) pred = model.predict_generator(test_gen, verbose=1) from seqeval.metrics import precision_score, recall_score, f1_score, classification_report idx2tag = {i: w for w, i in dict_tag.items()} def pred2label(pred):
def run(args): if path.exists("./fit/logs/tumn.log"): os.remove("./fit/logs/tumn.log") file_formatter = Formatter( '[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s > %(message)s') file_handler = FileHandler('./fit/logs/tumn.log') file_handler.setFormatter(file_formatter) # stream_handler = ChalkHandler() stream_handler = StreamHandler() logger = logging.getLogger("Tumn") logger.setLevel(logging.DEBUG) logger.addHandler(file_handler) logger.addHandler(stream_handler) dataset_name = args['dataset_name'] epoch = args['epoch'] seq_chunk = args['seq_chunk'] batch_size = args['batch_size'] word2vec_size = args['word2vec_size'] verbosity = args['verbosity'] tensorboard = args['tensorboard'] # Creating tumn model logger.info("[Fit] Generating model...") model = None if args['load']: model = load_model(args['load']) logger.info("[Fit] Loaded model from %s." % args['load']) else: model = Sequential([ Bidirectional(LSTM(5, activation='relu', dropout=0.2, return_sequences=True), input_shape=(None, word2vec_size)), TimeDistributed(Dense(20, activation='relu')), TimeDistributed(Dense(1, activation='sigmoid')), Reshape((-1, )) ]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) model.summary() # Reading parsed comments logger.info("[Fit] Reading parsed dataset from %s ..." % dataset_name) train_set = process_data(args, logger, dataset_name, "train") train_zipped = [] test_set = [[], []] test_zipped = [] test_from_train = True if path.exists("./fit/dataset/%s/%s" % (dataset_name, 'test')): test_set = process_data(args, logger, dataset_name, "test") test_zipped = [] test_from_train = False else: logger.info( "[Fit] No validation set found. It will split train set into train set and validation set." ) logger.info("[Fit] Done reading %d train set & %d test sets!" % (len(train_set[0]), len(test_set[0]))) # Creating Word embedding model if not path.exists("./fit/models/word2vec.txt"): logger.info("[Fit] Creating word2vec model...") w_model = Word2Vec(train_set[0], min_count=1, size=word2vec_size, iter=10, sg=0) w_model.save("./fit/models/word2vec.txt") else: logger.info("[Fit] Reading from saved word2vec model...") w_model = Word2Vec.load("./fit/models/word2vec.txt") train_set[0] = bind_word(train_set[0], w_model) train_zipped = list(zip(*train_set)) # Zipping Models if test_from_train: train_zipped, test_zipped = split_train_set(train_zipped) else: test_set[0] = bind_word(test_set[0], w_model) test_zipped = list(zip(*test_set)) train_zipped = sorted(train_zipped, key=lambda zip: len(zip[0])) test_zipped = sorted(test_zipped, key=lambda zip: len(zip[0])) # Preprocess input, outputs logger.info("[Fit] Preprocessing train dataset...") train_generator = TumnSequence(train_zipped, seq_chunk, batch_size) logger.info("[Fit] Preprocessing test dataset...") test_generator = TumnSequence(test_zipped, seq_chunk, batch_size) logger.info("[Fit] Done generating %d train set & %d test sets!" % (len(train_zipped), len(test_zipped))) # Fit the model logger.info("[Fit] Fitting the model...") model_path = \ "./fit/models/%s (Date %s" % (dataset_name, datetime.datetime.now().strftime("%m-%d %Hh %Mm ")) + \ ", Epoch {epoch:02d}, Acc {val_categorical_accuracy:.3f}, Loss {val_loss:.3f}).hdf5" callbacks = [ModelCheckpoint(filepath=model_path)] if tensorboard: callbacks.append(TensorBoard(log_dir=tensorboard)) model.fit_generator(generator=train_generator, validation_data=test_generator, epochs=epoch, verbose=verbosity, callbacks=callbacks, shuffle=True)
def cinq_charEmb_end_model(args): latent_dim = args.latent_dim '''Inputs P and Q''' P_Input = Input(shape=(args.c_max_len, ), name='P') Q_Input = Input(shape=(args.q_max_len, ), name='Q') char_embedding = Embedding(args.char_size, args.char_emb_dim) '''Inputs context vector''' context_vector = Input(shape=(args.c_max_len, args.context_vector_level + args.punctuation_level)) answer_start = Input(shape=(args.c_max_len, 1)) '''char embedding interact''' P = char_embedding(P_Input) Q = char_embedding(Q_Input) encoder = Bidirectional(GRU(units=args.char_emb_dim, return_sequences=True)) passage_encoding = P passage_encoding = encoder(passage_encoding) passage_encoding = TimeDistributed( Dense(args.char_emb_dim, use_bias=False, trainable=True, weights=np.concatenate( (np.eye(args.char_emb_dim), np.eye(args.char_emb_dim)), axis=1)))(passage_encoding) question_encoding = Q question_encoding = encoder(question_encoding) question_encoding = TimeDistributed( Dense(args.char_emb_dim, use_bias=False, trainable=True, weights=np.concatenate( (np.eye(args.char_emb_dim), np.eye(args.char_emb_dim)), axis=1)))(question_encoding) '''Attention over question''' # compute the importance of each step question_attention_vector = TimeDistributed(Dense(1))(question_encoding) question_attention_vector = Lambda(lambda q: keras.activations.softmax( q, axis=1))(question_attention_vector) # apply the attention question_attention_vector = Lambda(lambda q: q[0] * q[1])( [question_encoding, question_attention_vector]) question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))( question_attention_vector) question_attention_vector = RepeatVector( args.c_max_len)(question_attention_vector) # Answer start prediction answer_end_charEmb = Lambda( lambda arg: concatenate([arg[0], arg[1], arg[2], arg[3], arg[4]]))([ passage_encoding, question_attention_vector, answer_start, multiply([passage_encoding, question_attention_vector]), multiply([passage_encoding, answer_start]) ]) answer_end_charEmb = TimeDistributed( Dense(args.char_emb_dim, activation='relu'))(answer_end_charEmb) answer_end_charEmb = TimeDistributed(Dense(latent_dim))(answer_end_charEmb) # cinq method concat_start_context = Concatenate(2)([answer_start, context_vector]) gru_start_context = Bidirectional( GRU(args.hidden_size, return_sequences=True))(concat_start_context) answer_end_cinq = TimeDistributed(Dense( latent_dim, activation='relu'))(gru_start_context) # merge two method answer_end = Multiply()([answer_end_charEmb, answer_end_cinq]) answer_end = Lambda(lambda a: K.sum(a, axis=2))(answer_end) # answer_start = Flatten()(answer_start) output_end = Activation(K.softmax)(answer_end) # define model in/out and compile outputs = [output_end] inputs = [context_vector, P_Input, Q_Input, answer_start] model = Model(inputs, outputs) model.compile(optimizer=args.optimizer, loss=args.loss, metrics=['acc']) return model
def train_lstm(lstm_units, amount_filters, filters): classifier = Sequential() classifier.add(TimeDistributed(Conv2D(amount_filters, filters[0], input_shape=(50, 50, 1), activation='relu'), input_shape=(max_seq_len, 50, 50, 1))) classifier.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2)))) if len(filters) > 1: classifier.add(TimeDistributed(Conv2D(amount_filters, filters[1], activation='relu'))) classifier.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2)))) classifier.add(TimeDistributed(Flatten())) classifier.add(LSTM(units=lstm_units[0], activation='tanh', return_sequences=True if len(lstm_units) > 1 else False, input_shape=(max_seq_len, 25 * 25))) if len(lstm_units) > 1: classifier.add(LSTM(units=lstm_units[1], activation='tanh', return_sequences=True if len(lstm_units) > 2 else False)) if len(lstm_units) > 2: classifier.add(LSTM(units=lstm_units[2], activation='tanh')) classifier.add(Dense(units=10, activation='softmax')) classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', metrics.categorical_accuracy]) classifier.summary() max_weights = MaxWeights() # Fit the classifier classifier.fit_generator(seqIt , callbacks=[max_weights] , steps_per_epoch=190 , epochs=15 , validation_data=testSeqIt , validation_steps=testSeqIt.samples/64) classifier.set_weights(max_weights.weights) classifier.save('model_lstm{}_cnn{}.h5'.format(lstm_units, filters)) classifier.save_weights('model_weights_lstm{}_cnn{}.h5'.format(lstm_units, filters)) conf_mat = np.zeros((10, 10)) for idx in range(900): spl = testSeqIt._get_batches_of_transformed_samples([idx]) pred = classifier.predict(spl[0]) cls = np.argmax(spl[1]) classifier.reset_states() k = np.argmax(pred) conf_mat[cls, k] += 1 out_str = 'lstm units {}\n' + \ 'best val categorical accuracy {}\n' + \ 'confusion mat for best epoch \n{}\n' + \ 'all accuracy per epoch \n{}\n\n' print(out_str.format(lstm_units, max_weights.max_acc, conf_mat, max_weights.acc_hist)) print(out_str.format(lstm_units, max_weights.max_acc, conf_mat, max_weights.acc_hist), file=open('lstm64.txt', 'w'))
def create_model(self): tdat_input = Input(shape=(self.tdatlen, )) com_input = Input(shape=(self.comlen, )) smlnode_input = Input(shape=(self.smllen, )) smledge_input = Input(shape=(self.smllen, self.smllen)) tdel = Embedding(output_dim=self.embdims, input_dim=self.tdatvocabsize, mask_zero=False) tde = tdel(tdat_input) #se = Embedding(output_dim=self.smldims, input_dim=self.smlvocabsize, mask_zero=False)(smlnode_input) se = tdel(smlnode_input) tenc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) tencout, tstate_h = tenc(tde) de = Embedding(output_dim=self.embdims, input_dim=self.comvocabsize, mask_zero=False)(com_input) dec = CuDNNGRU(self.recdims, return_sequences=True) decout = dec(de, initial_state=tstate_h) tattn = dot([decout, tencout], axes=[2, 2]) tattn = Activation('softmax')(tattn) tcontext = dot([tattn, tencout], axes=[2, 1]) wrknodes = se for k in range(self.config['asthops']): astwork = OurCustomGraphLayer()([wrknodes, smledge_input]) astwork = concatenate([ astwork, wrknodes ]) # combine the new node vectors with the previous iteration astwork = Dense(self.embdims)( astwork ) # use a dense layer to squash back to proper dimension wrknodes = astwork #astwork = CuDNNGRU(self.recdims, return_sequences=True)(astwork, initial_state=tstate_h) # attend decoder words to nodes in ast aattn = dot([decout, astwork], axes=[2, 2]) aattn = Activation('softmax')(aattn) acontext = dot([aattn, astwork], axes=[2, 1]) context = concatenate([tcontext, decout, acontext]) out = TimeDistributed(Dense(self.tdddims, activation="relu"))(context) out = Flatten()(out) out1 = Dense(self.comvocabsize, activation="softmax")(out) model = Model( inputs=[tdat_input, com_input, smlnode_input, smledge_input], outputs=out1) if self.config['multigpu']: model = keras.utils.multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001, clipnorm=20.), metrics=['accuracy']) return self.config, model
# define LSTM x1 = Input(shape=(t_len, x_len), dtype='float32') x2 = Input(shape=(t_len, ), dtype='int32') x = Embedding(input_dim=len(show_to_index), output_dim=SHOW_DIMENSION, embeddings_initializer='uniform', input_length=t_len)(x2) y = concatenate([x, x1], axis=2) y = LSTM(10, input_shape=(t_len, SHOW_DIMENSION + x_len), return_sequences=True)(y) y = Dropout(0.6)(y) #y=LSTM(10, return_sequences=True)(y) #y=Dropout(0.6)(y) y = TimeDistributed(Dense(1, activation='sigmoid'))(y) y = Reshape((t_len, ))(y) model = Model(inputs=[x1, x2], outputs=y) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'acc']) # fit model for one epoch on this sequence model.fit([x1_train, x2_train], y_train, epochs=500, batch_size=100, verbose=2) model.evaluate([x1_train, x2_train], y_train, batch_size=50, verbose=0) model.evaluate([x1_test, x2_test], y_test, batch_size=50, verbose=0) a = model.get_layer(index=1).get_weights()[0][:, 0] b = ['na'] + shows c = dict(zip(b, a))
def _createModel(self, outDim): model = Sequential() if self._ngrams != 0 and (self._featuresType == 'lstm' or self._featuresType == 'bilstm'): model.add( Reshape(target_shape=(self._numFields * self._numPhrases * int(self._phraseLen / self._ngrams), self._ngrams, self._vecLen))) if self._masking: model.add(Masking(mask_value=0.)) for layer in range(self._featuresLayers): if self._noLstm: model.add(TimeDistributed(Dense(self._featuresDim))) else: if self._featuresType == 'bilstm': model.add( TimeDistributed( Bidirectional( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none'))))) else: model.add( TimeDistributed( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none')))) for _ in range(self._afterFeaturesLayers): if self._featuresType == 'bilstm': model.add( TimeDistributed( Dense(self._featuresDim * 2, activation='relu'))) else: model.add( TimeDistributed( Dense(self._featuresDim, activation='relu'))) if self._masking: model.add(NonMasking()) if self._pooling == 'avg': model.add(TimeDistributed(GlobalAveragePooling1D())) elif self._pooling == 'max': model.add(TimeDistributed(GlobalMaxPooling1D())) if not self._noLstm and not self._noLstmP and self._featuresType == 'bilstm': model.add( Reshape(target_shape=(self._numFields * self._numPhrases, int(self._phraseLen / self._ngrams), self._featuresDim * 2))) else: model.add( Reshape(target_shape=(self._numFields * self._numPhrases, int(self._phraseLen / self._ngrams), self._featuresDim))) else: model.add( Reshape(target_shape=(self._numFields * self._numPhrases, self._phraseLen, self._vecLen))) if self._masking: model.add(Masking(mask_value=0.)) for layer in range(self._featuresLayers): if self._noLstm or (self._ngrams != 0 and self._noLstmP): model.add(TimeDistributed(Dense(self._featuresDim))) else: if self._featuresType == 'bilstm': model.add( TimeDistributed( Bidirectional( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none'))))) elif self._featuresType == 'lstm': model.add( TimeDistributed( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none')))) elif self._featuresType == 'cnn': model.add( TimeDistributed( Conv1D(self._featuresDim, kernel_size=self._cnnWindow, padding='same', activation='relu'))) for _ in range(self._afterFeaturesLayers): if self._featuresType == 'bilstm': model.add( TimeDistributed( Dense(self._featuresDim * 2, activation='relu'))) else: model.add( TimeDistributed( Dense(self._featuresDim, activation='relu'))) #if not (self._featuresType == 'cnn' and layer < self._featuresLayers -1): if self._masking: model.add(NonMasking()) if self._pooling == 'avg': model.add(TimeDistributed(GlobalAveragePooling1D())) elif self._pooling == 'max': model.add(TimeDistributed(GlobalMaxPooling1D())) if not self._noLstm and not self._noLstmP and self._featuresType == 'bilstm': model.add( Reshape(target_shape=(self._numFields, self._numPhrases, self._featuresDim * 2))) else: model.add( Reshape(target_shape=(self._numFields, self._numPhrases, self._featuresDim))) if self._masking: model.add(Masking(mask_value=0.)) for layer in range(self._featuresLayers): if self._noLstm or self._noLstmP: model.add(TimeDistributed(Dense(self._featuresDim))) else: if self._featuresType == 'bilstm': model.add( TimeDistributed( Bidirectional( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none'))))) elif self._featuresType == 'lstm': model.add( TimeDistributed( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none')))) elif self._featuresType == 'cnn': model.add( TimeDistributed( Conv1D(self._featuresDim, kernel_size=self._cnnWindow, padding='same', activation='relu'))) for _ in range(self._afterFeaturesLayers): if self._featuresType == 'bilstm': model.add( TimeDistributed( Dense(self._featuresDim * 2, activation='relu'))) else: model.add( TimeDistributed( Dense(self._featuresDim, activation='relu'))) #if not (self._featuresType == 'cnn' and layer < self._featuresLayers -1): if self._masking: model.add(NonMasking()) if self._pooling == 'avg': model.add(TimeDistributed(GlobalAveragePooling1D())) elif self._pooling == 'max': model.add(TimeDistributed(GlobalMaxPooling1D())) if self._masking: model.add(Masking(mask_value=0.)) for layer in range(self._featuresLayers): if self._noLstm or self._noLstmP: model.add(Dense(self._featuresDim)) else: if self._featuresType == 'bilstm': model.add( Bidirectional( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none')))) elif self._featuresType == 'lstm': model.add( LSTM(self._featuresDim, dropout=self._dropout, recurrent_dropout=self._dropout, return_sequences=(self._pooling != 'none'))) elif self._featuresType == 'cnn': model.add( Conv1D(self._featuresDim, kernel_size=self._cnnWindow, padding='same', activation='relu')) for _ in range(self._afterFeaturesLayers): if self._featuresType == 'bilstm': model.add(Dense(self._featuresDim * 2, activation='relu')) else: model.add(Dense(self._featuresDim, activation='relu')) #if not (self._featuresType == 'cnn' and layer < self._featuresLayers -1): if self._masking: model.add(NonMasking()) if self._pooling == 'avg': model.add(GlobalAveragePooling1D()) elif self._pooling == 'max': model.add(GlobalMaxPooling1D()) model.add(Dropout(self._dropout)) for _ in range(self._hiddenLayers): if self._hiddenDim is None: model.add(Dense(outDim, activation='relu')) else: model.add(Dense(self._hiddenDim, activation='relu')) model.add(Dense(outDim, activation='softmax')) #opt = RMSprop(lr=self._learningRate, decay=self._learningRateDecay) opt = Adam(lr=self._learningRate, decay=self._learningRateDecay) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
time_loss_weights = 1. / (nt - 1) * np.ones( (nt, 1)) # equally weight all timesteps except the first time_loss_weights[0] = 0 prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True) inputs = Input(shape=(nt, ) + input_shape) errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) errors_by_time = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time model = Model(inputs=inputs, outputs=final_errors) model.compile(loss='mean_absolute_error', optimizer='adam') train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True) val_generator = SequenceGenerator(val_file,
# read in embedding and translate if args.agg_we != None or args.align_op_we != None: print(" fetching word embedding") embedding_matrix = get_embedding_matrix(args.embedding, VOCAB, EMBED_HIDDEN_SIZE, tokenizer) embed = Embedding(VOCAB, EMBED_HIDDEN_SIZE, weights=[embedding_matrix], input_length=MAX_LEN, trainable=False) prem = embed(premise) hypo = embed(hypothesis) if args.timedist: translate = TimeDistributed( Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION)) prem = translate(prem) hypo = translate(hypo) # read in antonym embedding and translate if args.agg_ae != None or args.align_op_ae != None: print(" fetching antonym word embedding") antonym_embedding_matrix = get_embedding_matrix(args.ant_embedding, VOCAB, ANT_SENT_HIDDEN_SIZE, tokenizer) antonym_embed = Embedding(VOCAB, ANT_SENT_HIDDEN_SIZE, weights=[antonym_embedding_matrix], input_length=MAX_LEN, trainable=False)
def _conv_block_td(inputs, filters, input_shape, kernel=(3, 3), strides=(1, 1), trainable=True): channel_axis = 3 #if backend.image_data_format() == 'channels_first' else -1 x = TimeDistributed(layers.Conv2D(filters, kernel, padding='same', use_bias=False, strides=strides, input_shape=input_shape), name='conv1_td')(inputs) x = TimeDistributed(layers.BatchNormalization(axis=channel_axis), name='conv1_bn_td')(x) return layers.ReLU(6., name='conv1_relu_td')(x)
# functional 로 모델 생성 - 진행 중 마지막 결론을 어떻게 낼지 몰라서 중단함 from numpy import array from keras.models import Model from keras.layers import Input from keras.layers import LSTM from keras.layers import Dense from keras.layers import RepeatVector from keras.layers import TimeDistributed input1 = Input(shape=(N_TIME_STEPS, N_FEATURES)) encoded1 = LSTM(50, activation='tanh')(input1) encoded1 = RepeatVector(N_TIME_STEPS)(encoded1) encoded2 = LSTM(50, activation='tanh', return_sequences=True)(encoded1) decoded1 = TimeDistributed(Dense(3))(encoded2) autoencoder = Model(inputs=input1, outputs=decoded1) encoder = Model(input1, encoded2) encoded_input = Input(shape=(200, 50)) autoencoder.layers[-1].input_shape decoder_layer = autoencoder.layers[-1] decoder = Model(encoded_input, decoder_layer(encoded_input)) autoencoder.compile(optimizer='adam', loss='mse') autoencoder.fit(X_train, X_train, validation_data=(X_test, X_test), epochs=10,
def generate_DeepConvLSTM_model(x_shape, class_number, filters, lstm_dims, learning_rate=0.01, regularization_rate=0.01, metrics=['accuracy']): """ Generate a model with convolution and LSTM layers. See Ordonez et al., 2016, http://dx.doi.org/10.3390/s16010115 Parameters ---------- x_shape : tuple Shape of the input dataset: (num_samples, num_timesteps, num_channels) class_number : int Number of classes for classification task filters : list of ints number of filters for each convolutional layer lstm_dims : list of ints number of hidden nodes for each LSTM layer learning_rate : float learning rate regularization_rate : float regularization rate metrics : list Metrics to calculate on the validation set. See https://keras.io/metrics/ for possible values. Returns ------- model : Keras model The compiled Keras model """ dim_length = x_shape[1] # number of samples in a time series dim_channels = x_shape[2] # number of channels output_dim = class_number # number of classes weightinit = 'lecun_uniform' # weight initialization model = Sequential() # initialize model model.add(BatchNormalization(input_shape=(dim_length, dim_channels))) # reshape a 2 dimensional array per file/person/object into a # 3 dimensional array model.add(Reshape(target_shape=(dim_length, dim_channels, 1))) for filt in filters: # filt: number of filters used in a layer # filters: vector of filt values model.add( Convolution2D(filt, kernel_size=(3, 1), padding='same', kernel_regularizer=l2(regularization_rate), kernel_initializer=weightinit)) model.add(BatchNormalization()) model.add(Activation('relu')) # reshape 3 dimensional array back into a 2 dimensional array, # but now with more dept as we have the the filters for each channel model.add(Reshape(target_shape=(dim_length, filters[-1] * dim_channels))) for lstm_dim in lstm_dims: model.add( LSTM(units=lstm_dim, return_sequences=True, activation='tanh')) model.add(Dropout(0.5)) # dropout before the dense layer # set up final dense layer such that every timestamp is given one # classification model.add( TimeDistributed( Dense(units=output_dim, kernel_regularizer=l2(regularization_rate)))) model.add(Activation("softmax")) # Final classification layer - per timestep model.add(Lambda(lambda x: x[:, -1, :], output_shape=[output_dim])) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=metrics) return model
def model_gru(input_shape): """ Function creating the model's graph in Keras. Argument: input_shape -- shape of the model's input data (using Keras conventions) Returns: model -- Keras model instance """ strd = (2, 2, 1) # strides for maxpooling sz = (3, 3, 3) # size of filter in stackblock X_input = Input(shape=input_shape) H, W, T, _ = input_shape X = stackBlock(X_input, 4, sz, 1) X = stackBlock(X, 4, sz, 2) X = MaxPooling3D((2, 2, 1), strides=strd)(X) X = stackBlock(X, 4, sz, 3) X = stackBlock(X, 4, sz, 4) X = MaxPooling3D((2, 2, 1), strides=strd)(X) X = stackBlock(X, 4, sz, 5) X = stackBlock(X, 4, sz, 6) X = MaxPooling3D((2, 2, 1), strides=strd)(X) sp = int_shape(X) print(sp) # (m),H,W,T,C: must transform into (batch_size, timesteps, input_dim) X = Permute((3, 1, 2, 4))(X) X = Reshape((T, -1))(X) sp = int_shape(X) print(sp) # Step 3: First GRU Layer X = GRU(32, return_sequences=True)( X) # GRU (use 32 units and return the sequences) X = Dropout(0.2)(X) X = BatchNormalization()(X) # Step 4: Second GRU Layer X = GRU(32, return_sequences=True)( X) # GRU (use 32 units and return the sequences) X = Dropout(0.2)(X) X = BatchNormalization()(X) X = Dropout(0.2)(X) # Step 5: Time-distributed dense layer X = TimeDistributed(Dense(1, activation="sigmoid"))( X) # time distributed (sigmoid) sp = int_shape(X) print(sp) X = Reshape((T, 1, 1, 1))(X) X = Permute((2, 3, 1, 4))(X) model = Model(inputs=X_input, outputs=X) return model
MASK_VALUE = data['MASK_VALUE'] X_train, Y_train = data['X_train'], data['Y_train'] X_val, Y_val = data['X_val'], data['Y_val'] # X_test, Y_test = data['X_test'], data['Y_test'] ####################################################################################################### # Create model model = Sequential() model.add(Masking(mask_value=MASK_VALUE, input_shape=(WINDOW_SIZE, N_FEATURES))) for n_units in GRU_ARCH: model.add(GRU(n_units, return_sequences=True, dropout=0.1)) #, recurrent_dropout=0.2)) model.add(TimeDistributed(Dense(1, activation='sigmoid'))) model.compile(loss='binary_crossentropy', optimizer='adam', sample_weight_mode='temporal') print(model.summary()) ####################################################################################################### # Train batch_size = 128 n_epochs = 100 # metric_to_monitor = 'val_majority_bacc' metric_to_monitor = 'val_last_bacc' # Compute class weights and sample weights; for train and validation sets train_1_cnt = np.count_nonzero(Y_train[:, -1, 0])
def get_char_embedding_model(): ## Imports from keras.models import Model, Input from keras.layers import LSTM, Embedding, Dense, TimeDistributed from keras.layers import Bidirectional, concatenate, SpatialDropout1D ## Apparently the trick here is to wrap the parts that should be applied to characters in a TimeDistributed so that characters in a layer apply the same layers to every character sequence ## Returns a Tensor word_in = Input(shape=(constants.MAX_SENT_LEN, )) ortho_word_in = Input(shape=(constants.MAX_SENT_LEN, )) ## To find word embedding emb_word = Embedding(input_dim=n_words + 2, output_dim=20, input_length=constants.MAX_SENT_LEN, mask_zero=True)(word_in) ortho_emb_word = Embedding(input_dim=n_ortho_words + 2, output_dim=20, input_length=constants.MAX_SENT_LEN, mask_zero=True)(ortho_word_in) ## To find character embedding for characters of that word char_in = Input(shape=( constants.MAX_SENT_LEN, constants.MAX_WORD_LEN, )) emb_char = TimeDistributed( Embedding(input_dim=n_chars + 2, output_dim=10, input_length=constants.MAX_WORD_LEN, mask_zero=True))(char_in) ortho_char_in = Input(shape=( constants.MAX_SENT_LEN, constants.MAX_WORD_LEN, )) ortho_emb_char = TimeDistributed( Embedding(input_dim=n_ortho_chars + 2, output_dim=10, input_length=constants.MAX_WORD_LEN, mask_zero=True))(ortho_char_in) ## Character CNN to get the word encoding by characters # char_encoding = TimeDistributed(Conv1D()) char_encoding = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(emb_char) ortho_char_encoding = TimeDistributed( LSTM(units=20, return_sequences=False, recurrent_dropout=0.5))(ortho_emb_char) print(char_encoding.shape, ' | ', ortho_char_encoding.shape) ## main LSTM x = concatenate( [char_encoding, emb_word, ortho_char_encoding, ortho_emb_word]) x = SpatialDropout1D(0.3)(x) main_lstm = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.6))(x) out = TimeDistributed(Dense(n_tags + 1, activation="softmax"))(main_lstm) model = Model([char_in, word_in, ortho_char_in, ortho_word_in], out) return model
attention_4_2 = dot([x5_out, x2_out], axes=[2, 2]) attention_4_2 = Activation('softmax')(attention_4_2) context_4_2 = dot([attention_4_2, x2_out], axes=[2, 1]) x5_2_out_combined_context = concatenate([context_4_2, x5_out]) attention_4_3 = dot([x5_out, x1_out], axes=[2, 2]) attention_4_3 = Activation('softmax')(attention_4_3) context_4_3 = dot([attention_4_3, x1_out], axes=[2, 1]) x5_3_out_combined_context = concatenate([context_4_3, x5_out]) out = Add()([x2_out_combined_context, \ x3_out_combined_context, x3_1_out_combined_context,\ x4_out_combined_context, x4_1_out_combined_context, x4_2_out_combined_context, \ x5_out_combined_context, x5_1_out_combined_context, x5_2_out_combined_context, x5_3_out_combined_context]) fc1_out = TimeDistributed(Dense(150, activation="relu"))( out) # equation (5) of the paper output = TimeDistributed(Dense(n_tags, activation="softmax"))( fc1_out) # equation (6) of the paper model = Model([input, profile_input], output) model.summary() ################################################################################ # Setting up the model with categorical x-entropy loss and the custom accuracy function as accuracy rmsprop = keras.optimizers.RMSprop(lr=0.003, rho=0.9, epsilon=None, decay=0.0) # add decay=0.5 after 15 epochs model.compile(optimizer=rmsprop, loss="categorical_crossentropy", metrics=["accuracy", accuracy])
cnn.add(BatchNormalization(momentum=0.9)) cnn.add(LeakyReLU(alpha=0.2)) cnn.add(Dropout(0.2)) cnn.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same')) cnn.add(BatchNormalization(momentum=0.9)) cnn.add(LeakyReLU(alpha=0.2)) cnn.add(Dropout(0.2)) cnn.add(Flatten()) cnn_lstm = Sequential() cnn_lstm.add( TimeDistributed(cnn, input_shape=(num_timesteps, GENERATE_SQUARE, GENERATE_SQUARE, IMAGE_CHANNELS))) cnn_lstm.add(LSTM((50), return_sequences=True)) cnn_lstm.add(Dropout(0.2)) cnn_lstm.add( Dense(1, activation="sigmoid", kernel_regularizer=regularizers.l2(0.01))) cnn_lstm.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0002, beta_1=0.5), metrics=['accuracy']) cnn_lstm.summary() ###################################################################################################### print("Start training...") epoch = 0
Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=maxlen, weights=[embedding_weights])) autoencoder.add(Dropout(0.5)) autoencoder.add(Lambda(lambda x: K.sum(x, axis=1), output_shape=(300, ))) autoencoder.add(Dropout(0.5)) autoencoder.add(Dense(512)) autoencoder.add(BatchNormalization()) autoencoder.add(PReLU()) autoencoder.add(Dropout(0.5)) autoencoder.add(RepeatVector(maxlen)) autoencoder.add(TimeDistributed(Dense(300))) autoencoder.add(BatchNormalization()) autoencoder.add(PReLU()) autoencoder.add(Dropout(0.5)) autoencoder.add(TimeDistributed(Dense(vocab_size, activation='softmax'))) autoencoder.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print_summary(autoencoder.layers) generator = minibatches(X_train, word2idx) samples_per_epoch = X_train.shape[0] autoencoder.fit_generator(generator,
# why latent_dim is 6 . i think it should be 3 # latent_dim is the dimention of GUR , notinput decoder_GRU = GRU(LATENT_DIM, return_state=True, return_sequences=True) # ise _ to get output states # https://keras.io/layers/recurrent/ # initial_state should be a list of tensors decoder_output, _ = decoder_GRU(decoder_input, initial_state=encoder_states) # https://keras.io/getting-started/functional-api-guide/#all-models-are-callable-just-like-layers # https://keras.io/layers/wrappers/ # https://blog.csdn.net/u012193416/article/details/79477220 # https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/ # ? not very understand about syntax # ? might be following lecture 4 to recode it better decoder_dense = TimeDistributed(Dense(1)) decoder_output = decoder_dense(decoder_output) # also GRU # ? not do sequentail and model add here # ? why go on thos way, why not basically going same as model add as lecture 4 # https://github.com/say543/RNNForTimeSeriesForecastTutorial/blob/master/4_multi_step_encoder_decoder_simple.ipynb # model class API # https://keras.io/models/model/#model-class-api # https://keras.io/getting-started/functional-api-guide/ # here using multiple input, sinlge output (providing output is for dense) # Note that by calling a model you aren't just reusing the architecture of the model, you are also reusing its weights. # [] should be forming a list model = Model([encoder_input, decoder_input], decoder_output)
def _get_entailment_input_combiner(self): base_combiner = super(MultipleTrueFalseMemoryNetwork, self)._get_entailment_input_combiner() return TimeDistributed(base_combiner, name="timedist_%s" % base_combiner.name)
tensorboard = TensorBoard( log_dir=os.path.join(args.input_job_dir, 'logs'), histogram_freq=0, write_graph=True, embeddings_freq=0) callbacks = [tensorboard] # model model_input = Input(shape=(140,)) model = Embedding(input_dim=args.input_words, output_dim=140, input_length=140)(model_input) model = Dropout(args.input_dropout)(model) model = Bidirectional( LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(args.input_tags, activation="softmax"))( model) # softmax output layer model = Model(model_input, out) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() history = model.fit(X_train, np.array(y_train), batch_size=32, epochs=1, validation_split=0.1, verbose=1, callbacks=callbacks) loss, accuracy = model.evaluate(X_test, np.array(y_test)) # save model print('saved model to ', args.output_model_path) model.save(MODEL_FILE) with file_io.FileIO(MODEL_FILE, mode='rb') as input_f: with file_io.FileIO(args.output_model_path + '/' + MODEL_FILE, mode='wb+') as output_f:
def _get_knowledge_combiner(self, layer_num: int): base_combiner = super(MultipleTrueFalseMemoryNetwork, self)._get_knowledge_combiner(layer_num) return TimeDistributed(base_combiner, name="timedist_%s" % base_combiner.name)
timesteps = 12 jsoninput = '../data/EventDump_10Ktracks.json' json_data = open(jsoninput,'r').read() parsed_json_data = json.loads(json_data) BD = BatchData(parsed_json_data) valdata,rand_int = BD.sample_batch(timesteps,nval) valindata = valdata[:,0:timesteps-1] valtarget = valdata[:,1:timesteps] inputs = Input(shape=(timesteps-1,ndim)) outputs = inputs outputs = LSTM(output_dim=nhidden,return_sequences=True,init='glorot_uniform')(outputs) outputs = TimeDistributed(Dense(nhidden,activation='relu',init='glorot_uniform'))(outputs) outputs = TimeDistributed(Dense(ndim,activation='linear',init='glorot_uniform'))(outputs) model = Model(input=inputs,output=outputs) model.summary() model.compile(loss='mse', optimizer='Nadam', metrics=['accuracy'] ) for ibatch in range(niter): print(ibatch) data,rand_int = BD.sample_batch(timesteps,iter_size) indata = data[:,0:timesteps-1] target = data[:,1:timesteps]
def model(self, input_shape, output_dim: int): """ Build a recurrent network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=input_shape) # print("input shape", input_shape) x = input_data if self.cnn_config: if self.cnn_config.kernel_2d is not None: reshape_up = Reshape((*input_shape, 1)) x = reshape_up(x) # x = K.expand_dims(input_data, -1) z = None if self.cnn_config: dil = 1 if self.cnn_config.dilation < -1 and self.cnn_config.cnn_layers > 1: dil = (-self.cnn_config.dilation)**( self.cnn_config.cnn_layers - 1) for layer_i in range(0, self.cnn_config.cnn_layers): in_layer_activation = self.cnn_config.cnn_activation if not self.cnn_config.cnn_activation_before_bn_do or self.cnn_config.cnn_dense: in_layer_activation = None if self.cnn_config.kernel_2d is not None: conv = Conv2D(self.cnn_config.filters, self.cnn_config.kernel_2d, strides=self.cnn_config.conv_stride_2d, padding="same", activation=in_layer_activation) else: conv = Conv1D(self.cnn_config.filters, self.cnn_config.kernel_size, strides=self.cnn_config.conv_stride, padding=self.cnn_config.conv_border_mode, dilation_rate=dil, activation=in_layer_activation, name='conv1d' + str(layer_i + 1)) if self.cnn_config.cnn_dense: if layer_i == 0: z = x else: if self.cnn_config.kernel_2d is not None: # print(layer_i, x.shape, z.shape) if layer_i % (self.cnn_config.cnn_layers // 5) == 0 and layer_i > 1: z = x else: z = concatenate([z, x], axis=-1) else: z = concatenate([z, x], axis=-1) x = conv(z) if (layer_i < self.cnn_config.cnn_layers - 1 or self.rnn_layers > 0 ) and self.cnn_config.kernel_2d is None: if self.cnn_config.cnn_bn: x = BatchNormalization()(x) if not self.cnn_config.cnn_activation_before_bn_do: x = Activation(self.cnn_config.cnn_activation, name=self.activation + "C" + str(layer_i))(x) if self.cnn_config.cnn_dropout_rate > 0.01: x = Dropout( rate=self.cnn_config.cnn_dropout_rate)(x) else: # print("Before x = conv(x)", x.shape) x = conv(x) # print("After x = conv(x)", x.shape) if (layer_i < self.cnn_config.cnn_layers - 1 or self.rnn_layers > 0 ) and self.cnn_config.kernel_2d is None: if self.cnn_config.cnn_dropout_rate is None: self.cnn_config.cnn_dropout_rate = self.dropout_rate if self.cnn_config.cnn_do_bn_order: if self.cnn_config.cnn_dropout_rate > 0.01: x = Dropout( rate=self.cnn_config.cnn_dropout_rate)(x) if not self.cnn_config.cnn_activation_before_bn_do: x = Activation(self.cnn_config.cnn_activation, name=self.activation + "C" + str(layer_i))(x) if self.cnn_config.cnn_bn: x = BatchNormalization()(x) else: if self.cnn_config.cnn_bn: x = BatchNormalization()(x) if not self.cnn_config.cnn_activation_before_bn_do: x = Activation(self.cnn_config.cnn_activation, name=self.activation + "C" + str(layer_i))(x) if self.cnn_config.cnn_dropout_rate > 0.01: x = Dropout( rate=self.cnn_config.cnn_dropout_rate)(x) if self.cnn_config.dilation < -1: dil = dil // (-self.cnn_config.dilation) if self.cnn_config.dilation > 1: dil *= self.cnn_config.dilation if self.cnn_config.kernel_2d is not None: # if self.cnn_config.kernel_2d is not None and (layer_i+1) % (self.cnn_config.cnn_layers // 5) == 0: if self.cnn_config.cnn_bn: x = BatchNormalization()(x) # if not self.cnn_config.cnn_activation_before_bn_do: x = Activation(self.cnn_config.cnn_activation, name=self.activation + "C" + str(layer_i))(x) if not self.cnn_config.cnn_dense: pool = MaxPooling2D(pool_size=(1, 2)) x = pool(x) elif (layer_i + 1) % (self.cnn_config.cnn_layers // 5) == 0: # elif layer_i == self.cnn_config.cnn_layers - 1: pool = MaxPooling2D(pool_size=(1, 2)) x = pool(x) if self.cnn_config.cnn_dropout_rate > 0.01: x = Dropout(rate=self.cnn_config.cnn_dropout_rate)(x) if self.cnn_config and self.cnn_config.kernel_2d is not None: # print("Before reshape", x.shape, type(x)) reshape = Reshape((input_shape[0], -1)) x = reshape(x) # x = K.reshape(x, (x.shape[0], x.shape[1], -1)) # print("After reshape", x.shape) z = None for layer_i in range(0, self.rnn_layers): # noinspection PyCallingNonCallable rnn = self.rnn_type.value(self.rnn_units, return_sequences=True, name='rnn' + str(layer_i + 1)) if self.bd_merge and layer_i == 0: rnn = Bidirectional(rnn, merge_mode=self.bd_merge.name) if self.rnn_dense and layer_i > 0: z = concatenate([z, x], axis=-1) else: z = x x = rnn(z) if layer_i < self.rnn_layers - 1: if self.rnn_bn: x = BatchNormalization(name="R_BN_" + str(layer_i))(x) x = Activation(self.rnn_activation, name=self.activation + "R" + str(layer_i))(x) if self.rnn_dropout_rate > 0.01: x = Dropout(rate=self.rnn_dropout_rate, name="R_DO_" + str(layer_i))(x) if self.time_distributed_dense: if self.activation_before_bn_do: x = Activation(self.activation, name=self.activation)(x) if self.do_bn_order: if self.dropout_rate > 0.01: x = Dropout(rate=self.dropout_rate, name="TDD_DO")(x) if not self.activation_before_bn_do: x = Activation(self.activation, name=self.activation)(x) if self.bn: x = BatchNormalization(name="TDD_BN")(x) else: if self.bn: x = BatchNormalization(name="TDD_BN")(x) if not self.activation_before_bn_do: x = Activation(self.activation, name=self.activation)(x) if self.dropout_rate > 0.01: x = Dropout(rate=self.dropout_rate, name="TDD_DO")(x) x = TimeDistributed(Dense(output_dim))(x) # Add softmax activation layer x = Activation('softmax', name='softmax')(x) # Specify the model # print("After activation") model = Model(inputs=input_data, outputs=x) # print("After model") model.name = self.model_name() if self.cnn_config: # Cannot pass self.field to lambda function as self gets included in the function and the function # becomes not serializable since self is not serializable and then the model does not serialize kernel_size = self.cnn_config.kernel_2d[ 0] if self.cnn_config.kernel_2d is not None else self.cnn_config.kernel_size conv_border_mode = "same" if self.cnn_config.kernel_2d is not None else self.cnn_config.conv_border_mode conv_stride = self.cnn_config.conv_stride_2d[ 0] if self.cnn_config.conv_stride_2d is not None else self.cnn_config.conv_stride dilation = abs(self.cnn_config.dilation) cnn_layers = self.cnn_config.cnn_layers model.output_length = lambda input_length: cnn_output_length( input_length, kernel_size, conv_border_mode, conv_stride, dilation, cnn_layers) else: model.output_length = lambda input_length: input_length model.summary() return model
[0.2] [0.4] [0.6] [0.8]]] 的向量时,需要将这个向量“展平”成形状为(1, 5)的向量:[[0. 0.2 0.4 0.6 0.8]],然后用Dense(5)来输出5个连续的值。这里(1, 5)其实忽略了“5个步长,每个步长一个特征值,即(1, 5, 1)”,这个特性,而单单输出了“一个序列,序列中包含5个值” 在使用TimeDistributed封装器的时候,就不需要进行这种“展平”操作,直接使用TimeDistributed(Dense(1)),就可以输出一个(1, 5, 1)的数据。 使用TimeDistributed来将Dense层独立地应用到这5个时间步的每一个,这就是时间分布层TimeDistributed名字的由来,他使得每一个时间步都调用一次某个相同的网络层,调用的这些网络层,都有相同的权重参数 """ # prepare sequence length = 5 seq = array([i / float(length) for i in range(length)]) # 输入为1个样本,5个步长,1个特征值 X = seq.reshape(1, 5, 1) # 输出为1个样本,5个步长,1个特征值 y = seq.reshape(1, 5, 1) # define LSTM configuration # create LSTM model = Sequential() # 输入类型为5个步长和1个特征值,return_sequences=True返回整个序列 model.add(LSTM(5, input_shape=(5, 1), return_sequences=True)) model.add(TimeDistributed(Dense(1))) model.compile(loss='mean_squared_error', optimizer='adam') print(model.summary()) # train LSTM model.fit(X, y, epochs=1000, batch_size=1, verbose=2) # evaluate result = model.predict(X, batch_size=1, verbose=0) for value in result[0, :, 0]: print('%.1f' % value)
def create_cdl_model_masked(model_name: str, num_filters: int = 16, time_steps: int = 5, dropout: float = 0.1, batchnorm: bool = True) -> Model: """ Function to define the Time Distributed UNET Model """ """Requires stack of Sequential SAR data (with vh vv channels stacked), where each image is a different timestep""" inputs = Input(shape=(time_steps, dems, dems, 2)) c1 = conv2d_block_time_dist(inputs, num_filters * 1, kernel_size=3, batchnorm=batchnorm) p1 = TimeDistributed(MaxPooling2D((2, 2)))(c1) p1 = TimeDistributed(Dropout(dropout))(p1) c2 = conv2d_block_time_dist(p1, num_filters * 2, kernel_size=3, batchnorm=batchnorm) p2 = TimeDistributed(MaxPooling2D((2, 2)))(c2) p2 = TimeDistributed(Dropout(dropout))(p2) c3 = conv2d_block_time_dist(p2, num_filters * 4, kernel_size=3, batchnorm=batchnorm) p3 = TimeDistributed(MaxPooling2D((2, 2)))(c3) p3 = TimeDistributed(Dropout(dropout))(p3) c4 = conv2d_block_time_dist(p3, num_filters * 8, kernel_size=3, batchnorm=batchnorm) p4 = TimeDistributed(MaxPooling2D((2, 2)))(c4) p4 = Dropout(dropout)(p4) c5 = conv2d_block_time_dist(p4, num_filters * 8, kernel_size=3, batchnorm=batchnorm) p5 = TimeDistributed(MaxPooling2D((2, 2)))(c5) p5 = TimeDistributed(Dropout(dropout))(p5) c6 = conv2d_block_time_dist(p5, num_filters * 8, kernel_size=3, batchnorm=batchnorm) p6 = TimeDistributed(MaxPooling2D((2, 2)))(c6) p6 = TimeDistributed(Dropout(dropout))(p6) c7 = conv2d_block_time_dist(p6, num_filters=num_filters * 16, kernel_size=3, batchnorm=batchnorm) # Expanding to 64 x 64 x 1 u8 = TimeDistributed( Conv2DTranspose(num_filters * 4, (3, 3), strides=(2, 2), padding='same'))(c7) u8 = concatenate([u8, c6]) u8 = TimeDistributed(Dropout(dropout))(u8) c8 = conv2d_block_time_dist(u8, num_filters * 4, kernel_size=3, batchnorm=batchnorm) u9 = TimeDistributed( Conv2DTranspose(num_filters * 2, (3, 3), strides=(2, 2), padding='same'))(c8) u9 = concatenate([u9, c5]) u9 = TimeDistributed(Dropout(dropout))(u9) c9 = conv2d_block_time_dist(u9, num_filters * 2, kernel_size=3, batchnorm=batchnorm) u10 = TimeDistributed( Conv2DTranspose(num_filters * 1, (3, 3), strides=(2, 2), padding='same'))(c9) u10 = concatenate([u10, c4]) u10 = TimeDistributed(Dropout(dropout))(u10) c10 = conv2d_block_time_dist(u10, num_filters * 1, kernel_size=3, batchnorm=batchnorm) u11 = TimeDistributed( Conv2DTranspose(num_filters * 1, (3, 3), strides=(2, 2), padding='same'))(c10) u11 = concatenate([u11, c3]) u11 = TimeDistributed(Dropout(dropout))(u11) c11 = conv2d_block_time_dist(u11, num_filters * 1, kernel_size=3, batchnorm=batchnorm) u12 = TimeDistributed( Conv2DTranspose(num_filters * 1, (3, 3), strides=(2, 2), padding='same'))(c11) u12 = concatenate([u12, c2]) u12 = TimeDistributed(Dropout(dropout))(u12) c12 = conv2d_block_time_dist(u12, num_filters * 1, kernel_size=3, batchnorm=batchnorm) u13 = TimeDistributed( Conv2DTranspose(num_filters * 1, (3, 3), strides=(2, 2), padding='same'))(c12) u13 = concatenate([u13, c1]) u13 = TimeDistributed(Dropout(dropout))(u13) c13 = conv2d_block_time_dist(u13, num_filters * 1, kernel_size=3, batchnorm=batchnorm) outputs = TimeDistributed( Conv2D(1, (1, 1), activation='sigmoid', name='last_layer'))(c13) model = Model(inputs=inputs, outputs=[outputs]) model.__asf_model_name = model_name model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['accuracy']) return model
def final_model_1( input_dim, # CNN parameters #filters=200, kernel_size=11, conv_stride=2, conv_border_mode='same', filters=350, kernel_size=11, conv_stride=1, conv_border_mode='same', cnn_layers=3, cnn_dropout=0.2, cnn_activation='relu', # RNN parameters reccur_units=29, recur_layers=2, recur_implementation=2, recurrent_dropout=0.2, reccur_merge_mode='concat', # Fully Connected layer parameters fc_units=[50], fc_dropout=0.2, fc_activation='relu'): """ Build a deep network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) nn = input_data # Add convolutional layers for i in range(cnn_layers): layer_name = 'cnn_' + str(i) nn = Conv1D(filters, kernel_size, strides=conv_stride, padding=conv_border_mode, activation=None, name=layer_name)(nn) nn = Activation(cnn_activation, name='act_' + layer_name)(nn) nn = Dropout(cnn_dropout, name='drop_' + layer_name)(nn) nn = BatchNormalization(name='bn_' + layer_name)(nn) # TODO: Add bidirectional recurrent layers #for i in range(recur_layers): layer_name = 'bidir_rnn' #+str(i) nn = Bidirectional(GRU(reccur_units, return_sequences=True, implementation=recur_implementation, name=layer_name, dropout=0.2, recurrent_dropout=recurrent_dropout), merge_mode=reccur_merge_mode)(nn) nn = BatchNormalization(name='bn_' + layer_name)(nn) # TODO: Add a Fully Connected layers fc_layers = len(fc_units) for i in range(fc_layers): layer_name = 'fc_' + str(i) nn = TimeDistributed(Dense(units=fc_units[i], name=layer_name))(nn) nn = Dropout(fc_dropout, name='drop_' + layer_name)(nn) nn = Activation(fc_activation, name='act_' + layer_name)(nn) nn = TimeDistributed(Dense(units=29, name='fc_out'))(nn) # TODO: Add softmax activation layer y_pred = Activation('softmax', name='softmax')(nn) # TODO: Specify the model model = Model(inputs=input_data, outputs=y_pred) # TODO: Specify model.output_length: select custom or Udacity version model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride) print(model.summary(line_length=110)) return model
def __rgb(self): rgbinput = Input((10,150,100,3),name='rgbinput') x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=64,padding='same'))(rgbinput) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=64,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x= TimeDistributed(MaxPooling2D((2,2),strides=(2,2),data_format='channels_last'))(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=128,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x= TimeDistributed(MaxPooling2D((2,2),strides=(2,2),data_format='channels_last'))(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=256,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=512,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=512,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x= TimeDistributed(MaxPooling2D((2,2),strides=(2,2),data_format='channels_last'))(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=1024,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(Conv2D(kernel_size=(3,3),filters=1024,padding='same'))(x) x = TimeDistributed(LeakyReLU(0.01))(x) x = TimeDistributed(BatchNormalization())(x) x = TimeDistributed(GlobalAveragePooling2D())(x) x = TimeDistributed(Dropout(0.4))(x) x = TimeDistributed(Dense(1024))(x) x = TimeDistributed(Dropout(0.4))(x) x = LSTM(1024)(x) pred = Dense(27,activation ='softmax')(x) rgbmodel = Model(inputs=rgbinput,outputs=pred,name='rgb_model') rgbmodel.compile(Adam(0.0001),loss='categorical_crossentropy',metrics=['categorical_accuracy']) #rgbmodel.summary() return rgbmodel