def train(run_name, start_epoch, stop_epoch, img_w): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 minibatch_size = 32 if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) fdir = os.path.dirname( get_file('wordlists.tgz', origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) img_gen = TextImageGenerator( monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size**2), val_split=words_per_epoch - val_words) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words // minibatch_size, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch)
def clipped_relu(x): return keras.activations.relu(x, max_value=20) NB_FREQUENCIES = 161 inputs = Input(shape=(None, NB_FREQUENCIES), name='main_input') labels = Input(shape=(None, ), name='labels') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') h1 = TimeDistributed(Dense(128, activation=clipped_relu))(inputs) h2 = TimeDistributed(Dense(128, activation=clipped_relu))(h1) h3 = TimeDistributed(Dense(128, activation=clipped_relu))(h2) lb = GRU(128, go_backwards=True, return_sequences=True)(h3) lf = GRU(128, return_sequences=True)(h3) h4 = Add()([lb, lf]) # add the two layers h5 = TimeDistributed(Dense(128, activation=clipped_relu))(h4) h6 = TimeDistributed(Dense(29, activation='softmax'), name='aux_output')(h5) loss_out = Lambda(ctc_loss_lambda, output_shape=(1, ), name='main_output')([h6, labels, input_length, label_length]) model = keras.models.Model(inputs=[inputs, labels, input_length, label_length], outputs=[loss_out, h6]) model.summary() if b:
nominator = K.sum((y_true * (1 - K.round(K.clip(y_pred, 0, 1))) + (1 - y_true) * K.round(K.clip(y_pred, 0, 1)))) return (nominator / denominator) VALIDATION_SPLIT = 0.2 # ration for split of training data and test data NUM_EPOCHS = 2 # number of epochs the network is trained DROPOUT = 0.2 #REGULARIZATION = 0.1 BATCH_SIZE = 64 LR = 0.005 model = Sequential() model.add( Embedding(MAX_NUM_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH)) model.add(GRU(128, dropout=0.25, return_sequences=True)) model.add(GRU(128, dropout=0.25)) model.add(Dense(labels.shape[1], activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[hamming, f1, precision, recall]) history = model.fit(train_data, train_labels, class_weight=class_weight, validation_split=VALIDATION_SPLIT, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE) model.save("models/model_EP_%s_DO_%s_BAT_%s_LR_%s.h5" % (str(NUM_EPOCHS), str(DROPOUT), str(BATCH_SIZE), str(LR)))
if(i == 1): top_k_acc = top_2_categorical_accuracy if(i == 2): top_k_acc = top_3_categorical_accuracy if(i == 3): top_k_acc = top_4_categorical_accuracy if(i == 4): top_k_acc = top_5_categorical_accuracy #變數設置 group_train_text = group1_train_text group_train_label = group1_train_label group_test_text = group1_test_text group_test_label = group1_test_label global_first_inputs = Input(shape = (8,18,)) global_inputs = GRU(units = 32,return_sequences=False)(global_first_inputs) global_inputs = Dropout(0.35)(global_inputs) global_inputs = Dense(units=256,activation='relu')(global_inputs) global_inputs = Dropout(0.35)(global_inputs) global_outputs = Dense(units = 18,activation='softmax')(global_inputs) global_model = Model(inputs = global_first_inputs , outputs = global_outputs ) group_first_inputs = Input(shape = (8,18,)) group_inputs = GRU(units = 32,return_sequences=False)(group_first_inputs) group_inputs = Dropout(0.35)(group_inputs) group_inputs = Dense(units=256,activation='relu')(group_inputs) group_inputs = Dropout(0.35)(group_inputs) group_outputs = Dense(units = 18,activation='softmax')(group_inputs) group_model = Model(inputs = group_first_inputs , outputs = group_outputs) global_model.summary()
def build(self): if K.image_data_format() == 'channels_first': input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h) else: input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c) self.input_data = Input(name='the_input', shape=input_shape, dtype='float32') self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data) self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), kernel_initializer='he_normal', name='conv1')(self.zero1) self.batc1 = BatchNormalization(name='batc1')(self.conv1) self.actv1 = Activation('relu', name='actv1')(self.batc1) self.drop1 = SpatialDropout3D(0.5)(self.actv1) self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.drop1) self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1) self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv2')(self.zero2) self.batc2 = BatchNormalization(name='batc2')(self.conv2) self.actv2 = Activation('relu', name='actv2')(self.batc2) self.drop2 = SpatialDropout3D(0.5)(self.actv2) self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.drop2) self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2) self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv3')(self.zero3) self.batc3 = BatchNormalization(name='batc3')(self.conv3) self.actv3 = Activation('relu', name='actv3')(self.batc3) self.drop3 = SpatialDropout3D(0.5)(self.actv3) self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.drop3) self.resh1 = TimeDistributed(Flatten())(self.maxp3) self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1) self.gru_2 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1) self.attn = SeqSelfAttention( attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, kernel_regularizer=keras.regularizers.l2(1e-4), bias_regularizer=keras.regularizers.l1(1e-4), attention_regularizer_weight=1e-4, name='Attention')(self.gru_2) # transforms RNN output to character activations: self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.attn) self.y_pred = Activation('softmax', name='softmax')(self.dense1) self.labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32') self.input_length = Input(name='input_length', shape=[1], dtype='int64') self.label_length = Input(name='label_length', shape=[1], dtype='int64') self.loss_out = CTC( 'ctc', [self.y_pred, self.labels, self.input_length, self.label_length]) self.model = Model(inputs=[ self.input_data, self.labels, self.input_length, self.label_length ], outputs=self.loss_out)
def evaluate_fold(fold_ix, use_pretrained_embedding, bi_directional, num_rnns, merge_mode, hidden_size): if use_pretrained_embedding: embedding_matrix = get_embedding_matrix(unique_words, generator, max_features, init='uniform', unit_length=False) embedding_layer = Embedding( max_features, EMBEDDING_DIM, weights=[embedding_matrix], input_length=maxlen, trainable=True, mask_zero=True) # If false, initialize unfound words with all 0's else: embedding_layer = Embedding(max_features, embedding_size, input_length=maxlen, trainable=True, mask_zero=True) if bi_directional: rnn_layer_fact = lambda: Bidirectional(GRU( hidden_size, return_sequences=True, consume_less="cpu"), merge_mode=merge_mode) else: rnn_layer_fact = lambda: GRU( hidden_size, return_sequences=True, consume_less="cpu") model = Sequential() model.add(embedding_layer) for i in range(num_rnns): model.add(rnn_layer_fact()) model.add(TimeDistributedDense(out_size)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', sample_weight_mode="temporal") X_train, y_train, train_ys_by_tag, seq_len_train = fold2training_data[ fold_ix] X_dev, y_dev, dev_ys_by_tag, seq_len_dev = fold2dev_data[fold_ix] X_test, y_test, test_ys_by_tag, seq_len_test = fold2test_data[fold_ix] # init loop vars f1_scores = [-1] num_since_best_score = 0 patience = 3 best_weights = None for i in range(30): print("{ts}: Epoch={epoch}".format(ts=get_ts(), epoch=i)) epochs = 1 # epochs per training instance results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, verbose=0) micro_metrics, _ = score_predictions(model, X_dev, dev_ys_by_tag, seq_len_dev) print(micro_metrics) f1_score = micro_metrics.f1_score best_f1_score = max(f1_scores) if f1_score <= best_f1_score: num_since_best_score += 1 else: # score improved num_since_best_score = 0 best_weights = model.get_weights() f1_scores.append(f1_score) if num_since_best_score >= patience: break # load best weights model.set_weights(best_weights) train_predictions_by_tag = get_predictions(model, X_train, train_ys_by_tag, seq_len_train) test_predictions_by_tag = get_predictions(model, X_test, test_ys_by_tag, seq_len_test) return train_predictions_by_tag, test_predictions_by_tag, train_ys_by_tag, test_ys_by_tag
C_1 = Dense(32, activation='relu')(C); C_2 = Dense(32, activation='relu')(T); C_ = Add()([C_1,C_2]); R_S = Dense(DIM_Gs, activation='relu')(C_); # the output GFT; #------ I(V,l,G_s[t],G_s[t-1]) ---- C_i = Dense(DIM_hm, activation='relu')(C_); S_i = Dense(DIM_hm, activation='relu')(Gs_in); S_i_ = Dense(DIM_hm, activation='relu')(Gs_in_); G_i = Add()([C_i,S_i,S_i_]); G_i = Dense(32, activation='relu')(G_i); I_G = Dense(DIM_Gi, activation='relu')(G_i); #------ GRU for Pi(G_s,G_i,C*) ------- G_S = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_Gs, input_length=ACT_STEPS,name = 'emb1')(Gs_in); G_I = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_Gi, input_length=ACT_STEPS,name = 'emb2')(Gi_in); # L = Embedding(output_dim=ACT_OUT_DIM, input_dim=DIM_COM, input_length=ACT_STEPS,name = 'emb3')(l_in); G_S = GRU(units=DIM_ha)(G_S); G_I = GRU(units=DIM_ha)(G_I); G_S = Dense(ACT_OUT_DIM, activation='relu')(G_S); G_I = Dense(ACT_OUT_DIM, activation='relu')(G_I); h_a = Add()([G_S,G_I]); L = Dense(ACT_OUT_DIM, activation='relu')(l_in); h_a = Dense(ACT_OUT_DIM, activation='relu')(h_a); h_a = Add()([h_a,L]); A_out = Dense(ACT_OUT_DIM, activation='relu')(h_a); Q_out = Dense(ACT_OUT_DIM, activation='relu')(h_a); model = Model(inputs=[Gs_in,Gi_in,l_in,o_in,Gs_in_], outputs=[A_out,R_S,I_G,Q_out]); <<<<<<< HEAD sgd = optimizers.SGD(lr=0.00001, decay=0.0, momentum=0.4, nesterov=True);
'reset_states': reset_states, 'num_layers': num_layers, 'classes': class_string } t = str(int(round(time.time()))) model_name = t + '-num_layers_%(num_layers)s_maxlen_%(input_length)s_lstmsize_%(lstm_size)s_trainsize_%(trainsize)s_testsize_%(testsize)s_classes_%(classes)s' % fd model_path = model_path + model_name + '/' if not os.path.exists(model_path): os.makedirs(model_path) # Define an input sequence and process it. inputs = Input(shape=(None, input_dim)) lstm_outputs = inputs for layer_no in range(num_layers - 1): lstm_outputs = GRU(lstm_size, return_state=False, return_sequences=True)(lstm_outputs) #last layer, that does not return sequences lstm_outputs = GRU(lstm_size, return_state=False, return_sequences=False)(lstm_outputs) dense = Dense(num_classes, activation='softmax') outputs = dense(lstm_outputs) model = Model(inputs, outputs) if optimizer == 'RMS': optimizer = RMSprop(lr=learning_rate) if optimizer == 'Adam': optimizer = Adam(lr=learning_rate) #loss = 'categorical_crossentropy' loss = 'binary_crossentropy' model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) print(model.summary())
def NN_model(args, training=True): global N_COL global N_ROW if args.model == 'densenet121': from keras.applications.densenet import DenseNet121 input_tensor = Input(shape=(N_COL, N_ROW, 3)) base_model = DenseNet121(input_shape=(N_COL, N_ROW, 3), include_top=False, weights='imagenet', input_tensor=input_tensor, pooling=None) elif args.model == 'resnet18': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_18(input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) elif args.model == 'resnet18_2222': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_18_2222( input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) elif args.model == 'resnet18_2222_64': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_18_2222_start_from64( input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) elif args.model == 'resnet34': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_34(input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) elif args.model == 'resnet50': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_50(input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) elif args.model == 'resnet101': import resnet NOT_CARE = 1 base_model = resnet.ResnetBuilder.build_resnet_101( input_shape=(N_COL, N_ROW, 3), num_outputs=NOT_CARE, include_top=False) else: raise TypeError('model should be in the list of the supported model!') print('Input col: ', N_COL) print('Input row: ', N_ROW) x = base_model.output #CNN to RNN x = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1, 3)))( x) # switchaxes from [b,h,w,c] to [b,w,h,c] conv_shape = x.get_shape() # b, h,w,c resnet 18 -> (?, 16, 32, 256) print('conv_shape', conv_shape) x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])), name='reshape')(x) x = Dense(para.dense_size, activation='relu', kernel_initializer='he_normal', name='dense1')(x) #x = BatchNormalization()(x) # GRU RNN gru_1 = GRU(para.rnn_size, return_sequences=True, init='he_normal', name='gru1')(x) gru_1b = GRU(para.rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(x) gru1_merged = add([gru_1, gru_1b]) gru1_merged = BatchNormalization()(gru1_merged) gru_2 = GRU(para.rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(para.rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru2_b')(gru1_merged) gru2_merged = concatenate([gru_2, gru_2b]) gru2_merged = BatchNormalization()(gru2_merged) inner = Dense(para.num_classes, kernel_initializer='he_normal', name='dense2')(gru2_merged) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[para.max_text_len], dtype='float32') # (None ,7) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1) if training: return Model( inputs=[base_model.input, labels, input_length, label_length], outputs=loss_out), conv_shape[1] else: return Model(inputs=[base_model.input], outputs=y_pred)
def build(input_shape=(32, None, 1), rnn_unit=256, num_classes=5990, max_string_len=10): input = Input(shape=input_shape, name='the_input') m = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', name='conv1')(input) m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m) m = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', name='conv2')(m) m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m) m = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='conv3')(m) m = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', name='conv4')(m) m = MaxPooling2D(pool_size=(2, 1), strides=(2, 1), padding='valid', name='pool3')(m) m = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv5')(m) m = BatchNormalization(axis=3)(m) m = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', name='conv6')(m) m = BatchNormalization(axis=3)(m) m = MaxPooling2D(pool_size=(2, 1), strides=(2, 1), padding='valid', name='pool4')(m) m = Conv2D(512, kernel_size=(2, 2), activation='relu', padding='valid', name='conv7')(m) m = Permute((2, 1, 3), name='permute')(m) m = TimeDistributed(Flatten(), name='timedistrib')(m) m = Bidirectional(GRU(rnn_unit, return_sequences=True, implementation=2), name='blstm1')(m) m = Bidirectional(GRU(rnn_unit, return_sequences=True, implementation=2), name='blstm2')(m) y_pred = Dense(num_classes, name='blstm2_out', activation='softmax')(m) base_model = Model(inputs=input, outputs=y_pred) label = Input(name='label', shape=[max_string_len], dtype='int64') seq_length = Input(name='seq_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')( [label, y_pred, seq_length, label_length]) model = Model(input=[input, label, seq_length, label_length], output=[loss_out]) model.summary() return base_model, model
def training(self): #init network configuration self.setColumns() #preprocessing data X_train, Y_train = self.pre_processing_for_data( str(self.PATH + '/data/' + sys.argv[1] + '/' + sys.argv[1] + '.csv')) #create model model = Sequential() #input layer model.add( GRU(output_dim=int(self.hidden_unit[0]), return_sequences=True, input_shape=(self.timesteps, self.data_dim))) #hidden layer for i in range(0, self.hidden_layer): if i == self.hidden_layer - 1: model.add(GRU(int(self.hidden_unit[i]))) else: model.add(GRU(int(self.hidden_unit[i]), return_sequences=True)) model.add(Dropout(float(self.dropout[i]))) #output layer model.add(Dense(len(Y_train[0]), activation=self.in_activation)) #set cost-function, optimizser, metrics model.compile(loss=self.loss_function, optimizer=rmsprop(lr=self.learning_rate), metrics=['accuracy']) #do training model.fit(X_train, Y_train, batch_size=self.batch_size, nb_epoch=self.epoch, validation_data=(X_train, Y_train)) #save model out = open( self.PATH + '/data/' + sys.argv[1] + '/' + sys.argv[1] + '.out', 'w') P = model.predict_classes(X_train, verbose=0) score = model.evaluate(X_train, Y_train, verbose=0) model_name = 'train_' + time.strftime("%Y%m_%d_%H_%M", time.localtime()) json.dump( { 'ntb': { 'model_name': model_name, 'samples': self.samples, 'score': score[0], 'accuracy': score[1], #'recall_score' : recall_score(P, Y_train, average='weighted'), # it's not working because of multi-dimension #'precision_score' : precision_score(P, Y_train, average='weighted') # it's not working becuse of multi-dimension }, }, out, separators=(',', ':')) if not path.exists(self.PATH + '/data/' + sys.argv[1] + '/' + model_name): mkdir(self.PATH + '/data/' + sys.argv[1] + '/' + model_name) model.save(self.PATH + '/data/' + sys.argv[1] + '/' + model_name + '/' + 'model' + '.h5') #model.save('./test_py/gru/weight.h5') del model out.close()
test_size=int(len(X) * 0.2), shuffle=False) # トレーニング n_in = 1 n_hidden = 20 n_out = 1 epochs = 10 batch_size = 10 model = Sequential() #model.add(SimpleRNN(n_hidden, input_shape=(inputlen, n_in), kernel_initializer='random_normal')) model.add( GRU(n_hidden, input_shape=(inputlen, n_in), kernel_initializer='random_normal')) model.add(Dense(n_out, kernel_initializer='random_normal')) model.add(Activation('linear')) model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.01, beta_1=0.9, beta_2=0.999)) model.fit(x, y, batch_size=batch_size, epochs=epochs, validation_data=(val_x, val_y)) # 予測 in_ = x[:1] # x の先頭 (1,20,1) 配列 predicted = [None for _ in range(inputlen)]
def train(load=None): # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 if K.image_data_format() == 'channels_first': input_shape = (1, IMG_W, IMG_H) else: input_shape = (IMG_W, IMG_H, 1) batch_size = 32 downsample_factor = pool_size**2 output_size = len(LETTERS) + 1 if not load: tiger_train = TextImageGenerator(join(DATA_PATH, 'train'), batch_size, downsample_factor) tiger_val = TextImageGenerator(join(DATA_PATH, 'val'), batch_size, downsample_factor) print(tiger_train.n) print(tiger_val.n) act = 'relu' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv1')(input_data) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=act, kernel_initializer='he_normal', name='conv2')(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (IMG_W // (pool_size**2), (IMG_H // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) # transforms RNN output to character activations: inner = Dense(output_size, kernel_initializer='he_normal', name='dense2')(concatenate([gru_2, gru_2b])) y_pred = Activation('softmax', name='softmax')(inner) Model(inputs=input_data, outputs=y_pred).summary() labels = Input(name='the_labels', shape=[MAX_OUT_LEN], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) if load: model = load_model(load, compile=False) print('Model loaded from file.') else: model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd, metrics=['accuracy']) if not load: # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) # Create a TensorBoard instance with the path to the logs directory tensorboard = TensorBoard(log_dir='logs/{}'.format(time()), batch_size=batch_size, update_freq=128) history = model.fit_generator(generator=tiger_train.next_batch(), steps_per_epoch=tiger_train.n, epochs=1, validation_data=tiger_val.next_batch(), validation_steps=tiger_val.n, callbacks=[tensorboard]) # save model and architecture to single file modelName = join( './models', "model-" + str(datetime.datetime.utcnow()).replace( ' ', '_').replace(':', '-').replace('.', '-') + ".h5") model.save(modelName) print("Saved model to disk:%s\n" % modelName) return model
graph.add_node(TimeDistributedMaxPooling2D(pool_size=(SENTENCE_LENGTH - n + 1, 1)), name='maxpool{}gram'.format(n), input='conv{}gram'.format(n)) graph.add_node(Dropout(0.15), name='dropout{}gram'.format(n), input='maxpool{}gram'.format(n)) graph.add_node(TimeDistributedFlatten(), name='flatten{}gram'.format(n), input='dropout{}gram'.format(n)) log('Adding bi-directional GRU') graph.add_node(GRU(72), name='gru_forwards', inputs=['flatten{}gram'.format(n) for n in NGRAMS], concat_axis=-1) graph.add_node(GRU(72, go_backwards=True), name='gru_backwards', inputs=['flatten{}gram'.format(n) for n in NGRAMS], concat_axis=-1) # graph.add_node(GRU(16), name='gru', input='flatten4gram') ADDITIONAL_FC = True graph.add_node(Dropout(0.7), name='gru_dropout', inputs=['gru_forwards', 'gru_backwards'])
def test_attention_mm1(batch_size, word_embed_size, sent_embed_size, doc_embed_size, vocab_size, max_words, max_sents, num_classes, should_fit_model): """ AttentionMM """ def sum_over_axis(X, axis): return K.mean(X, axis=axis) E = np.random.random((vocab_size, word_embed_size)) # LHS sentence sent_in_left = Input(shape=(max_words,), dtype="int32") sent_emb_left = Embedding(input_dim=vocab_size, output_dim=word_embed_size, mask_zero=True, weights=[E])(sent_in_left) sent_enc_left = Bidirectional(GRU(sent_embed_size, return_sequences=False))(sent_emb_left) sent_model_left = Model(inputs=sent_in_left, outputs=sent_enc_left) # RHS sentence sent_in_right = Input(shape=(max_words,), dtype="int32") sent_emb_right = Embedding(input_dim=vocab_size, output_dim=word_embed_size, mask_zero=True, weights=[E])(sent_in_right) sent_enc_right = Bidirectional(GRU(sent_embed_size, return_sequences=False))(sent_emb_right) sent_model_right = Model(inputs=sent_in_right, outputs=sent_enc_right) # LHS document doc_in_left = Input(shape=(max_sents, max_words), dtype="int32") doc_emb_left = TimeDistributed(sent_model_left)(doc_in_left) doc_enc_left = Bidirectional(GRU(doc_embed_size, return_sequences=True))(doc_emb_left) # RHS document doc_in_right = Input(shape=(max_sents, max_words), dtype="int32") doc_emb_right = TimeDistributed(sent_model_right)(doc_in_right) doc_enc_right = Bidirectional(GRU(doc_embed_size, return_sequences=True))(doc_emb_right) # attention doc_att = custom_attn.AttentionMM("concat")([doc_enc_left, doc_enc_right]) # prediction fc1_dropout = Dropout(0.2)(doc_att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.2)(fc1) doc_pred = Dense(num_classes, activation="softmax")(fc2_dropout) model = Model(inputs=[doc_in_left, doc_in_right], outputs=doc_pred) model.summary() if should_fit_model: Xleft = np.random.random((batch_size*2, max_sents, max_words)) Xright = np.random.random((batch_size*2, max_sents, max_words)) y = np.random.randint(0, num_classes, batch_size*2) Y = np_utils.to_categorical(y, num_classes=num_classes) model.compile(optimizer="adam", loss="categorical_crossentropy") model.fit([Xleft, Xright], Y, batch_size=batch_size, epochs=1) return
def create_model(Vocabulary_size, X_max_len, n_phonetic_features, n1, n2, n3, n4, n5, n6, HIDDEN_DIM, LAYER_NUM): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors) == 1 else add(vectors, **kwargs) current_word = Input(shape=(X_max_len,), dtype='float32', name='input1') # for encoder (shared) decoder_input = Input(shape=(X_max_len,), dtype='float32', name='input3') # for decoder -- attention right_word1 = Input(shape=(X_max_len,), dtype='float32', name='input4') right_word2 = Input(shape=(X_max_len,), dtype='float32', name='input5') right_word3 = Input(shape=(X_max_len,), dtype='float32', name='input6') right_word4 = Input(shape=(X_max_len,), dtype='float32', name='input7') left_word1 = Input(shape=(X_max_len,), dtype='float32', name='input8') left_word2 = Input(shape=(X_max_len,), dtype='float32', name='input9') left_word3 = Input(shape=(X_max_len,), dtype='float32', name='input10') left_word4 = Input(shape=(X_max_len,), dtype='float32', name='input11') phonetic_input = Input(shape=(n_phonetic_features,), dtype='float32', name='input12') emb_layer1 = Embedding(Vocabulary_size, EMBEDDING_DIM, input_length=X_max_len, mask_zero=False, name='Embedding') list_of_inputs = [current_word, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3, left_word4] list_of_embeddings = [emb_layer1(i) for i in list_of_inputs] list_of_embeddings = [Dropout(0.50, name='drop1_' + str(i))(j) for i, j in enumerate(list_of_embeddings)] list_of_embeddings = [GaussianNoise(0.05, name='noise1_' + str(i))(j) for i, j in enumerate(list_of_embeddings)] conv4s = [Conv1D(filters=no_filters, kernel_size=4, padding='valid', activation='relu', strides=1, name='conv4_' + str(i))(j) for i, j in enumerate(list_of_embeddings) ] maxPool4 = [MaxPooling1D(name='max4_' + str(i))(j) for i, j in enumerate(conv4s)] avgPool4 = [AveragePooling1D(name='avg4_' + str(i))(j) for i, j in enumerate(conv4s)] pool4s=[add([i, j], name='merge_conv4_' + str(k)) for i, j, k in zip(maxPool4, avgPool4, range(len(maxPool4)))] conv5s = [Conv1D(filters=no_filters, kernel_size=5, padding='valid', activation='relu', strides=1, name='conv5_' + str(i))(j) for i, j in enumerate(list_of_embeddings) ] maxPool5 = [MaxPooling1D(name='max5_' + str(i))(j) for i, j in enumerate(conv5s)] avgPool5 = [AveragePooling1D(name='avg5_' + str(i))(j) for i, j in enumerate(conv5s)] pool5s=[add([i, j], name='merge_conv5_' + str(k)) for i, j, k in zip(maxPool5, avgPool5, range(len(maxPool5)))] mergedPools=pool4s+pool5s concat = concatenate(mergedPools, name='main_merge') x = Dropout(0.15, name='drop_single1')(concat) x = Bidirectional(GRU(rnn_output_size), name='bidirec1')(concat) total_features = [x, phonetic_input] concat2 = concatenate(total_features, name='phonetic_merging') x = Dense(HIDDEN_DIM, activation='relu', kernel_initializer='he_normal', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense1')(concat2) x = Dropout(0.15, name='drop_single2')(x) x = Dense(HIDDEN_DIM, kernel_initializer='he_normal', activation='tanh', kernel_constraint=maxnorm(3), bias_constraint=maxnorm(3), name='dense2')(x) x = Dropout(0.15, name='drop_single3')(x) out1 = Dense(n1, kernel_initializer='he_normal', activation='softmax', name='output1')(x) out2 = Dense(n2, kernel_initializer='he_normal', activation='softmax', name='output2')(x) out3 = Dense(n3, kernel_initializer='he_normal', activation='softmax', name='output3')(x) out4 = Dense(n4, kernel_initializer='he_normal', activation='softmax', name='output4')(x) out5 = Dense(n5, kernel_initializer='he_normal', activation='softmax', name='output5')(x) out6 = Dense(n6, kernel_initializer='he_normal', activation='softmax', name='output6')(x) # Luong et al. 2015 attention model emb_layer = Embedding(Vocabulary_size, EMBEDDING_DIM, input_length=X_max_len, mask_zero=True, name='Embedding_for_seq2seq') current_word_embedding = emb_layer(list_of_inputs[0]) # current_word_embedding = smart_merge([ current_word_embedding, right_word_embedding1, left_word_embedding1]) encoder, state = GRU(rnn_output_size, return_sequences=True, unroll=True, return_state=True, name='encoder')(current_word_embedding) encoder_last = encoder[:, -1, :] decoder = emb_layer(decoder_input) decoder = GRU(rnn_output_size, return_sequences=True, unroll=True, name='decoder')(decoder,initial_state=[encoder_last]) attention = dot([decoder, encoder], axes=[2, 2], name='dot') attention = Activation('softmax', name='attention')(attention) context = dot([attention, encoder], axes=[2, 1], name='dot2') decoder_combined_context = concatenate([context, decoder], name='concatenate') outputs = TimeDistributed(Dense(64, activation='tanh'), name='td1')(decoder_combined_context) outputs = TimeDistributed(Dense(Vocabulary_size, activation='softmax'), name='td2')(outputs) all_inputs = [ current_word, decoder_input, right_word1, right_word2, right_word3, right_word4, left_word1, left_word2, left_word3, left_word4, phonetic_input ] all_outputs = [outputs, out1, out2, out3, out4, out5, out6] model = Model(inputs=all_inputs, outputs=all_outputs) opt = Adam() return model
kernel_initializer='he_normal', name='conv3')(inner) # inner = BatchNormalization()(inner) # inner = Dropout(0.2)(inner) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # Two layers of bidirectional GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal',
one_hots_test = np.reshape(one_hots_test, (-1, 1)) one_hots_test = ohe.fit_transform(one_hots_test).A p = np.random.permutation(len(train_x)) train_x = np.array(train_x)[p] one_hots_train = one_hots_train[p] train_x = train_x.reshape([train_x.shape[0], train_x.shape[1], -1]) test_x = np.array(test_x) test_x = test_x.reshape([test_x.shape[0], test_x.shape[1], -1]) input_shape = (train_x.shape[1], train_x.shape[2]) print('Build RNN Model:') model= Sequential() model.add(GRU(32,input_shape = input_shape,activation = 'relu',return_sequences = False ) ) model.add(Dense(64, init='normal', activation='relu')) model.add(Dropout(0.3)) # model.add(Dense(128, init='normal', activation='relu')) # model.add(Dropout(0.3)) # model.add(Dense(32, init='normal', activation='relu')) # model.add(Dropout(0.3)) model.add(Dense(5, activation='softmax')) # training his=LossHistory() model.compile(loss='mean_squared_error', optimizer=Adam(1e-4), metrics=['accuracy']) X_train = train_x Y_train = one_hots_train
metaData = { "maxLength": maxLength, "vocab_size": vocab_size, "output_dimen": output_dimen, "sentiment_tag": sentiment_tag } __pickleStuff("./data/meta_sentiment_chinese.p", metaData) # build model and train embedding_dim = 256 model = Sequential() model.add(Embedding(vocab_size, embedding_dim, input_length=maxLength)) # Each input would have a size of (maxLength x 256) and each of these 256 sized vectors are fed into the GRU layer one at a time. # All the intermediate outputs are collected and then passed on to the second GRU layer. model.add(GRU(256, dropout=0.9, return_sequences=True)) # Using the intermediate outputs, we pass them to another GRU layer and collect the final output only this time model.add(GRU(256, dropout=0.9)) # The output is then sent to a fully connected layer that would give us our final output_dim classes model.add(Dense(output_dimen, activation='softmax')) # We use the adam optimizer instead of standard SGD since it converges much faster tbCallBack = TensorBoard(log_dir='./Graph/sentiment_chinese', histogram_freq=0, write_graph=True, write_images=True) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() model.fit(totalX, totalY,
def negative_samples(input_length, input_dim, output_length, output_dim, hidden_dim, ns_amount, learning_rate, drop_rate): q_encoder_input = Input(shape=(input_length, input_dim)) r_decoder_input = Input(shape=(output_length, output_dim)) weight_data_r = Input(shape=(1, )) weight_data_w = Input(shape=(1, ns_amount)) if ns_amount == 0: weight_data_w_list = [] else: weight_data_w_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=2))(weight_data_w) fixed_r_decoder_input = adding_weight( output_length, output_dim)([r_decoder_input, weight_data_r]) w_decoder_input = Input(shape=(output_length, output_dim, ns_amount)) if ns_amount == 0: w_decoder_input_list = [] else: w_decoder_input_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=3))(w_decoder_input) if ns_amount == 1: # print("===w_decoder_input_list:", w_decoder_input_list.shape) w_decoder_input_list = [w_decoder_input_list] weight_data_w_list = [weight_data_w_list] fixed_w_decoder_input = [] for i in range(ns_amount): w_decoder_input_list[i] = Reshape( (output_length, output_dim))(w_decoder_input_list[i]) weight_data_w_list[i] = Reshape((1, ))(weight_data_w_list[i]) w_decoder_weighted = adding_weight(output_length, output_dim)( [w_decoder_input_list[i], weight_data_w_list[i]]) w_decoder_weighted_masked = Masking( mask_value=0., input_shape=(output_length, output_dim))(w_decoder_weighted) fixed_w_decoder_input.append(w_decoder_weighted_masked) q_encoder_input_masked = Masking(mask_value=0., input_shape=(input_length, input_dim))(q_encoder_input) fixed_r_decoder_input_masked = Masking( mask_value=0., input_shape=(output_length, output_dim))(fixed_r_decoder_input) encoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional1") q_encoder_output = encoder(q_encoder_input_masked) q_encoder_output = Dropout(rate=drop_rate, name="dropout1")(q_encoder_output) decoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional2") r_decoder_output = decoder(fixed_r_decoder_input_masked) r_decoder_output = Dropout(rate=drop_rate, name="dropout2")(r_decoder_output) # doc_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(q_encoder_input) # doc_output = Flatten()(q_encoder_input) # que_output = MaxPooling1D(pool_size=20, stride=5, padding='same')(fixed_r_decoder_input) # que_output = Flatten()(fixed_r_decoder_input) # output_vec = Concatenate(axis=1, name="dropout_con")([q_encoder_output, r_decoder_output]) # output_hid = Dense(hidden_dim, name="output_hid", activation="relu")(output_vec) # similarity = Dense(1, name="similarity", activation="softmax")(output_hid) # Difference between kernel, bias, and activity regulizers in Keras # https://stats.stackexchange.com/questions/383310/difference-between-kernel-bias-and-activity-regulizers-in-keras # output = Dense(128, kernel_regularizer=keras.regularizers.l2(0.0001))(output_vec) # activation="relu", # output = Dense(64, name="output_hid", kernel_regularizer=keras.regularizers.l2(0.0001))(output) # activation="relu", # similarity = Dense(1, name="similarity", activation="softmax")(output) w_decoder_output_list = [] for i in range(ns_amount): w_decoder_output = decoder(fixed_w_decoder_input[i]) w_decoder_output = Dropout(rate=drop_rate)(w_decoder_output) w_decoder_output_list.append(w_decoder_output) # similarities = [ similarity ] similarities = [ Dot(axes=1, normalize=True)([q_encoder_output, r_decoder_output]) ] for i in range(ns_amount): similarities.append( Dot(axes=1, normalize=True)([q_encoder_output, w_decoder_output_list[i]])) loss_data = Lambda(lambda x: loss_c(x))(similarities) model = Model([ q_encoder_input, r_decoder_input, w_decoder_input, weight_data_r, weight_data_w ], similarities[0]) ada = adam(lr=learning_rate) model.compile(optimizer=ada, loss=lambda y_true, y_pred: loss_data) return model
print(vocabSize, embeddingSize) x_in = Input( shape = ( numSentencesPerDoc, numWordsPerSentence ) , name='Input' ) embLayer = Embedding( input_dim=embWeights.shape[0], output_dim=embWeights.shape[1], weights=[embWeights] ,mask_zero=False , trainable=False, embeddings_regularizer=regularizers.l2(0.0000001) , input_length=numWordsPerSentence, name='Embedding' ) sent_vecs = [] extraDimLayer = Lambda(lambda x: K.expand_dims(x), name='extraDimForConvo') squeezeSecondLayer = Lambda(lambda x: K.squeeze(x, 1), name='squeezeLayer') biRnn_Layer = Bidirectional(GRU(WORD_GRU_NUM, return_sequences=True, bias_regularizer=regularizers.l2(eta) ,kernel_regularizer=regularizers.l2(eta),recurrent_regularizer=regularizers.l2(eta) ,dropout=dr, recurrent_dropout=dropWordRnnOut, unroll=True), merge_mode='concat') CONTEXT_DIM = 2*WORD_GRU_NUM att_layer1 = Dense(CONTEXT_DIM, use_bias=True, activation='tanh') att_layer2 = Dense(1, use_bias=False) for i in range(numSentencesPerDoc): x_pop = Lambda(lambda x: x[:,i], output_shape=(numWordsPerSentence, ) , name='convert_shape_'+'sentence'+str(i))( x_in ) emb = embLayer(x_pop) emb = Dropout(dropWordEmb)(emb)
def build_model(self): #paper download url:https://arxiv.org/abs/1508.04025 #fig1 enc_in = Input(shape=(self.input_length, ), dtype='int32', name='enc_input') enc_embedding = Embedding(input_dim=self.num_vocab, output_dim=self.embedding_dim, input_length=self.input_length, trainable=True, name='enc_embedding') enc_embedded = enc_embedding(enc_in) encoded, state = GRU(units=self.num_units, return_sequences=True, return_state=True, name='enc_GRU')(enc_embedded) #\fig1 ################################ ###### decoder model ############ ################################## #fig2 dec_in = Input(shape=(self.output_length, ), dtype='int32', name='dec_input') dec_embedding = Embedding(input_dim=self.num_vocab, output_dim=self.embedding_dim, input_length=self.output_length, trainable=True, name='dec_embedding') #share weights with encoder embedding layer dec_embedding.embeddings = enc_embedding.embeddings dec_embedded = dec_embedding(dec_in) decoded = GRU(units=self.num_units, return_sequences=True, name='dec_GRU')(dec_embedded, initial_state=state) #Luong's global attention repeat_dec = TimeDistributed(RepeatVector(self.input_length), name='repeat_dec') rep_decoded = repeat_dec(decoded) #/fig2 #fig3 annotation_layer = TimeDistributed(Dense(units=self.num_units), name='annotation_layer') annotation = annotation_layer(encoded) repeat_enc = TimeDistributed(RepeatVector(self.output_length), name='repeat_enc') rep_annotation = repeat_enc(annotation) rep_annotation = Permute( (2, 1, 3), input_shape=(self.input_length, self.output_length, self.num_units), name='permute_rep_annotation')(rep_annotation) #fig4 attention_mul = Multiply(name='attention_mul') elem_score = attention_mul([rep_decoded, rep_annotation]) score = Lambda(lambda x: K.sum(x, axis=3, keepdims=True), name='score')(elem_score) attention_weight = Lambda(lambda x: softmax(x, axis=2), name='attention_weight')(score) context_mul = Multiply(name='context_mul') #\fig4 #fig5 rep_encoded = repeat_enc(encoded) rep_encoded = Permute((2, 1, 3), input_shape=(self.input_length, self.output_length, self.num_units), name='permute_rep_encoded')(rep_encoded) elem_context = context_mul([rep_encoded, attention_weight]) context = Lambda(lambda x: K.sum(x, axis=2), name='context')(elem_context) concat = Concatenate(axis=-1) dec_and_att = Lambda(lambda x: K.concatenate([x[0], x[1]], axis=-1), name='dec_att_concat')([decoded, context]) #\fig5 #full_connection and output #fig6 fc1 = TimeDistributed(Dense(units=self.num_units * 2), name='fc1')(dec_and_att) fc1_activated = Activation('tanh')(fc1) fc2 = TimeDistributed(Dense(units=self.num_vocab), name='fc2')(fc1_activated) preds = Activation('softmax', name='softmax')(fc2) #\fig6 model = Model([enc_in, dec_in], preds) model.summary() plot_model(model, to_file='seq2seq_attention_model_plot.png', show_shapes=True, show_layer_names=True) return model
def get_Model(training): input_shape = (img_w, img_h, 1) # (128, 64, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) # Convolution layer (VGG) inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')( inputs) # (None, 128, 64, 64) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64) inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')( inner) # (None, 64, 32, 128) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128) inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256) inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512) inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) # CNN to RNN inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) # RNN layer gru_1 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) # (None, 32, 512) gru_1b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) # (None, 32, 512) gru1_merged = BatchNormalization()(gru1_merged) gru_2 = GRU(256, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2_merged = concatenate([gru_2, gru_2b]) # (None, 32, 1024) gru2_merged = BatchNormalization()(gru2_merged) # transforms RNN output to character activations: inner = Dense(num_classes, kernel_initializer='he_normal', name='dense2')(gru2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1) if training: return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: return Model(inputs=[inputs], outputs=y_pred)
def trainGestureRNN(numLayers, numNodesPerLayer, useGRU, batchSize, numEpochs, learningRate, l1Reg, l2Reg, dropoutI, dropoutH, sequences, classes, trainRange, valRange, testRange, numClasses, numObservations, numSequences, numFeatures, modelFile, callbacks=None, outDirectory='', trainMode='continue'): """ Returns True if training was completed, False if interrupted. """ trainModes = ['continue', 'overwrite', 'skip'] if trainMode.lower() not in trainModes: raise ValueError( "Parameter 'trainMode' must be one of 'continue', 'overwrite', or 'skip'" ) if dropoutI < 0 or dropoutH < 0 or l2Reg < 0 or l1Reg < 0: raise ValueError('Regularization parameters must be non-negative.') if outDirectory is not None and outDirectory != '': outDirectory = outDirectory + '\\' else: outDirectory = '' # initialize, compile, and train model #finish preparing data #class labels must be made into binary arrays binaryClasses = np.zeros((numObservations, numSequences, numClasses)) # tell cost function which timesteps to ignore sampleWeights = np.ones((numObservations, numSequences)) #eh...just use for loops for i in range(numObservations): for j in range(numSequences): if classes[i, j] >= 0: binaryClasses[i, j, classes[i, j]] = 1 else: sampleWeights[i, j] = 0 sequences = sequences.transpose((1, 0, 2)) binaryClasses = binaryClasses.transpose((1, 0, 2)) sampleWeights = sampleWeights.T trainData = [ sequences[trainRange, :, :], binaryClasses[trainRange, :, :], sampleWeights[trainRange, :] ] valData = [ sequences[valRange, :, :], binaryClasses[valRange, :, :], sampleWeights[valRange, :] ] testData = [ sequences[testRange, :, :], binaryClasses[testRange, :, :], sampleWeights[testRange, :] ] modelFile = outDirectory + 'Keras' + modelFile weightsFile = modelFile + '_Weights' completedEpochs = 0 if (trainMode == 'overwrite') or (not os.path.isfile(modelFile + '.json') or not os.path.isfile(weightsFile + '.h5')): model = Sequential() #add masking layer to indicate dummy timesteps model.add(Masking(0, input_shape=(numObservations, numFeatures))) if dropoutI: model.add(Dropout(dropoutI)) for i in range(numLayers): if useGRU: model.add( GRU(output_dim=numNodesPerLayer, return_sequences=True, W_regularizer=l2(l2Reg))) else: model.add( LSTM(output_dim=numNodesPerLayer, return_sequences=True, W_regularizer=l2(l2Reg))) if dropoutH: model.add(Dropout(dropoutH)) model.add( TimeDistributed( Dense(output_dim=numClasses, activation='softmax', W_regularizer=l2(l2Reg)))) else: model = model_from_json(open(modelFile + '.json', 'rb').read()) model.load_weights(weightsFile + '.h5') #compile model and training objective function sgd = SGD(lr=learningRate) rms = RMSprop(lr=learningRate) adagrad = Adagrad(lr=learningRate) model.compile(loss='categorical_crossentropy', optimizer=rms, sample_weight_mode='temporal', metrics=['accuracy']) checkp = [ModelCheckpoint(weightsFile + '.h5', save_best_only=True)] if callbacks is None: callbacks = checkp else: callbacks += checkp try: if trainMode != 'skip': completedEpochs = model.fit(x=trainData[0], y=trainData[1], sample_weight=trainData[2], validation_data=valData, batch_size=batchSize, nb_epoch=numEpochs, callbacks=callbacks, verbose=2) completedEpochs = len(completedEpochs.history['loss']) except KeyboardInterrupt: if (not queryUser('Training interrupted. Compute test statistics?')): return 0, float('nan'), float('nan'), float('nan') #retrieve the best weights based upon validation set loss if os.path.isfile(weightsFile + '.h5'): model.load_weights(weightsFile + '.h5') scores = model.test_on_batch(x=testData[0], y=testData[1], sample_weight=testData[2]) predictedClasses = model.predict_classes(x=testData[0]) scores[1] = accuracy(classes[:, testRange].T, predictedClasses) scores.append(balancedAccuracy(classes[:, testRange].T, predictedClasses)) scores.append( weightedAccuracy(classes[:, testRange].T, predictedClasses, forgetFactor=0)) print( "Test loss of %.5f\nFrame-wise accuracy of %.5f\nSequence-wise accuracy of %.5f\nFinal frame accuracy of %0.5f" % (scores[0], scores[1], scores[2], scores[3])) if trainMode != 'skip': modelString = model.to_json() open(modelFile + '.json', 'wb').write(modelString) model.save_weights(weightsFile + '.h5', overwrite=True) print('Model and weights saved to %s and %s.' % (modelFile + '.json', weightsFile + '.h5')) return completedEpochs, scores[0], scores[1], scores[2], scores[3]
print("train_X shape", X_train.shape) print("valid_X shape", X_valid.shape) # print("target shape", y_train.shape) # print("training size:", len(train_inputs['X']), 'validation', len(valid_inputs['X']), 'test size:', len(test_inputs['X']) ) # print("sum sizes", len(train_inputs['X']) + len(valid_inputs['X']) + len(test_inputs['X'])) ## build CNN from keras.models import Model, Sequential from keras.layers import Conv1D, Dense, Flatten from keras.callbacks import EarlyStopping, ModelCheckpoint LATENT_DIM = 5 BATCH_SIZE = 32 model = Sequential() model.add(GRU(LATENT_DIM, input_shape=(time_step_lag, 1))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mse') model.summary() earlystop = EarlyStopping(monitor='val_loss', patience=5) # Test the model X_test = test_inputs['X'] y1_test = test_inputs['target_load'] y1_test = y_scaler.inverse_transform(y1_test) if not os.path.exists(output_dir + '/original_' + predict_component + '_lag' + str(time_step_lag) + '.csv'): np.savetxt(output_dir + '/original_' + predict_component + '_lag' + str(time_step_lag) + '.csv', y1_test,
shape = list(input_shape) assert len(shape)== 3 outshape = [None, shape[2]] return tuple(outshape) def mean_along_time(x): return K.means(x,axis=1) def sum_one(x): return x.sum(axis=-1,keepdims=True) def sum_one_output_shape(input_shape): shape = list(input_shape) assert len(shape)==2 outshape = [None, 1] return tuple(outshape) shared_GRU = GRU(output_dim = dim_gru, return_sequences = False, input_shape = (maxlen,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid') shared_backGRU = GRU(output_dim = dim_gru,go_backwards=True, return_sequences = False, input_shape = (maxlen,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid') pass_input = Input(shape=(maxlen_pass,dim_glove), dtype='float32', name='pass_input') pass_gru = GRU(output_dim = dim_gru, dropout_W=args.dropout, return_sequences = True, input_shape = (maxlen_pass,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')(pass_input) # maxlen_pass, dim_gru pass_backgru = GRU(output_dim = dim_gru, dropout_W=args.dropout ,go_backwards=True, return_sequences = True, input_shape = (maxlen_pass,dim_glove), init = 'glorot_uniform', inner_init = 'orthogonal', inner_activation = 'sigmoid')(pass_input) # maxlen_pass, dim_gru pass_con = merge([pass_gru,pass_backgru],mode='concat') # maxlen_pass, 2*dim_gru ques_input = Input(shape=(maxlen,dim_glove), dtype='float32', name='ques_input') gru_out = shared_GRU(ques_input) backgru_out = shared_backGRU(ques_input) ques_con = merge([gru_out,backgru_out],mode='concat') # , 2*dim_gru repeat_ques = RepeatVector(maxlen_pass)(ques_con) # maxlen_pass, 2*dim_gru mul_ques_pass = merge([pass_con,repeat_ques],mode='mul') # maxlen_pass, 2*dim_gru permute_qp_mul = Permute((2,1))(mul_ques_pass) # 2*dim_gru, maxlen_pass #cos_ques_pass = merge([ques_con,pass_con],mode='cos',dot_axes=[1,2]) # ,maxlen_pass
i += 1 # 字句長度不足補空白 X = sequence.pad_sequences(X, maxlen=MAX_SENTENCE_LENGTH) # 資料劃分訓練組及測試組 Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42) # 模型構建 EMBEDDING_SIZE = 128 HIDDEN_LAYER_SIZE = 64 BATCH_SIZE = 32 NUM_EPOCHS = 10 model = Sequential() # 加『嵌入』層 model.add(Embedding(vocab_size, EMBEDDING_SIZE,input_length=MAX_SENTENCE_LENGTH)) # 加『GRU』層 model.add(GRU(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1)) model.add(Activation("sigmoid")) # binary_crossentropy:二分法 model.compile(loss="binary_crossentropy", optimizer="adam",metrics=["accuracy"]) # 模型訓練 model.fit(Xtrain, ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,validation_data=(Xtest, ytest)) # 預測 score, acc = model.evaluate(Xtest, ytest, batch_size=BATCH_SIZE) print("\nTest score: %.3f, accuracy: %.3f" % (score, acc)) print('{} {} {}'.format('預測','真實','句子')) for i in range(5): idx = np.random.randint(len(Xtest)) xtest = Xtest[idx].reshape(1,MAX_SENTENCE_LENGTH)
def train(model_file): """ Train a neural network to take speech as input and produce gesture as an output Args: model_file: file to store the model Returns: """ # Get the data X = np.load(DATA_DIR + '/X_train.npy') if ENCODED: # If we learn speech-representation mapping we use encoded motion as output Y = np.load(DATA_DIR + '/' + str(N_OUTPUT) + '/Y_train_encoded.npy') # Correct the sizes train_size = min(X.shape[0], Y.shape[0]) X = X[:train_size] Y = Y[:train_size] else: Y = np.load(DATA_DIR + '/Y_train.npy') N_train = int(len(X) * 0.9) N_validation = len(X) - N_train # Split on training and validation X_train, X_validation, Y_train, Y_validation = train_test_split( X, Y, test_size=N_validation) # Define Keras model model = Sequential() model.add( TimeDistributed(Dense(N_HIDDEN), input_shape=(N_CONTEXT, N_INPUT))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.1)) model.add(TimeDistributed(Dense(N_HIDDEN))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.1)) model.add(TimeDistributed(Dense(N_HIDDEN))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.1)) model.add(GRU(N_HIDDEN, return_sequences=False)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.1)) model.add(Dense(N_OUTPUT)) model.add(Activation('linear')) print(model.summary()) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999) model.compile(loss='mean_squared_error', optimizer=optimizer) hist = model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_validation, Y_validation)) model.save(model_file) # Save convergence results into an image pyplot.plot(hist.history['loss'], linewidth=3, label='train') pyplot.plot(hist.history['val_loss'], linewidth=3, label='valid') pyplot.grid() pyplot.legend() pyplot.xlabel('epoch') pyplot.ylabel('loss') pyplot.savefig(model_file.replace('hdf5', 'png'))
# Write f.write('Batch size: ' + str(batch_size) + '\n') f.write('Num outtuples: ' + str(num_outtuples) + '\n') f.write('Max num of letters: ' + str(features_per_sample) + '\n') f.write('Num epochs: ' + str(num_epochs) + '\n') # Create the model of RNN input_shape = (num_outtuples, features_per_sample) model = Sequential() # Masking adds a padding and a special vector to ignore the padding values. #model.add(Masking(input_shape = input_shape, mask_value = 0.0)) model.add(Embedding(max_ord_value, 500, input_length=features_per_sample)) # GRU is the main RNN layer model.add( GRU(256, return_sequences=True, input_shape=(num_outtuples, features_per_sample))) model.add( GRU(512, return_sequences=False, input_shape=(num_outtuples, features_per_sample))) # Fully connected layer with 1 neuron output model.add(Dense(1)) # Final output value between 0 and 1 as probability model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Write
def test_gru_benchmark(self): try: import lasagne from keras.layers.recurrent import GRU except: print('\n This test require lasagne and keras.') return np.random.seed(12082518) X = np.random.rand(32, 12, 13) g1 = nnet.GRU((None, 12, 13), hidden_info=8, resetgate=nnet.Gate(), updategate=nnet.Gate(), hidden_update=nnet.Gate(nonlinearity=T.tanh), batch_norm=False, dropoutW=None, dropoutU=None) f1 = T.function(g1.input_var, outputs=g1(True)) x1 = f1(X)[0] g2 = GRU(output_dim=8, input_shape=(12, 13), activation=T.tanh, inner_activation=T.sigmoid, dropout_W=None, dropout_U=None, return_sequences=True) g2.set_weights(g1.get_params_value(True, True)) f2 = T.function([g2.get_input(True)], outputs=g2.get_output(True)) x2 = f2(X) l_in = lasagne.layers.InputLayer(shape=(None, 12, 13)) l = lasagne.layers.GRULayer(l_in, num_units=8) lasagne.layers.set_all_param_values(l, g1.get_params_value(True, True) + [T.np_constant((1, 8))]) f3 = T.function([l_in.input_var], outputs=lasagne.layers.get_output(l, deterministic=False)) x3 = f3(X) print('Odin - Keras: ', np.sum(np.abs(x1 - x2))) print('Odin - Lasagne: ', np.sum(np.abs(x1 - x3))) print('Keras - Lasagne:', np.sum(np.abs(x2 - x3))) self.assertAlmostEqual(np.sum(np.abs(x1 - x3)), 0.) # print(g1.get_params(True, True)) # p1 = g1.get_params_value(True, True) # print(g2.get_params()[0]) # p2 = [T.get_value(i) for i in g2.get_params()[0]] # print([np.sum(np.abs(i - j)) for i, j in zip(p1, p2)]) print() time.sleep(1) start = time.time() for i in xrange(12): f1(X) print('Odin GRU speed:', (time.time() - start) / 12) time.sleep(1) start = time.time() for i in xrange(12): f3(X) print('Lasagne GRU speed:', (time.time() - start) / 12) time.sleep(1) start = time.time() for i in xrange(12): f2(X) print('Keras GRU speed:', (time.time() - start) / 12)
''' model building ''' input_tensor = Input((width, height, 3)) x = input_tensor for i in range(3): x = Conv2D(32, (3, 3), activation="relu")(x) x = Conv2D(32, (3, 3), activation="relu")(x) #BatchNormalization() x = BatchNormalization(axis=-1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) conv_shape = x.get_shape() x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2]*conv_shape[3])))(x) x = Dense(32, activation='relu')(x) gru_1 = GRU(opts.rnn_size, return_sequences=True, kernel_initializer="he_normal", name="gru1")(x) gru_1b = GRU(opts.rnn_size, go_backwards=True, kernel_initializer="he_normal", name="gru1_b", return_sequences=True)(x) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(opts.rnn_size, return_sequences=True, kernel_initializer="he_normal", name="gru2")(gru1_merged) gru_2b = GRU(opts.rnn_size, go_backwards=True, kernel_initializer="he_normal", name="gru2_b", return_sequences=True)(gru1_merged) x = concatenate([gru_2, gru_2b]) x = Dropout(0.25)(x) x = Dense(n_class+1, activation="softmax", kernel_initializer="he_normal")(x) base_model = Model(inputs=input_tensor, outputs=x) labels = Input(name='the_labels', shape=[n_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])