class ModelA(ModelBase): def __init__(self, wv, maxlen=50, max_num_deptag=50): super().__init__(wv, maxlen, max_num_deptag) embedding_layer = self.wv.model.wv.get_embedding_layer() sequence_input = Input(shape=(self.maxlen,), dtype='int32') mask = Masking(mask_value=-1)(sequence_input) embedded_sequences = embedding_layer(mask) x = (Conv1D(filters=32, kernel_size=3, activation='relu'))(embedded_sequences) x = (MaxPooling1D(pool_size=2))(x) x = (Bidirectional(GRU(100, dropout=0.15)))(x) x = (Dense(16))(x) x = (Dropout(0.2))(x) preds = (Dense(1, activation='tanh'))(x) self.model = Model(sequence_input, preds, name='ConvBiGRUModelA') self.compile_model() def compile_model(self): self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae']) def fit(self, x, y, batch_size, epochs, verbose, callbacks): x_preprocessed = np.array([[t[0] for t in row] for row in x]) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) y_preprocessed = np.array(y) self.model.fit(x_preprocessed, y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks) def predict(self, x): x_preprocessed = np.array([[t[0] for t in row] for row in x]) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) return self.model.predict(x_preprocessed)
def fit_model(X_train, y_train): #define model model = Model(inputs=[admiss_data], outputs=main_output) # print(model.summary()) # adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy']) # class_weight = { 0: 1., 1: cw, # 1: 20. } histories = my_callbacks.Histories() #model fit model.fit([X_train], y_train, epochs=n_epochs, batch_size=n_batch_size, validation_data=([[X_val], y_val]), class_weight=class_weight, callbacks=[histories]) model.save('base_nn.h5') return model
def train(self, classdict, nb_topics, *args, **kwargs): """ Train the autoencoder. :param classdict: training data :param nb_topics: number of topics, i.e., the number of encoding dimensions :param args: arguments to be passed to keras model fitting :param kwargs: arguments to be passed to keras model fitting :return: None :type classdict: dict :type nb_topics: int """ self.nb_topics = nb_topics self.generate_corpus(classdict) vecsize = len(self.dictionary) # define all the layers of the autoencoder input_vec = Input(shape=(vecsize, )) encoded = Dense(self.nb_topics, activation='relu')(input_vec) decoded = Dense(vecsize, activation='sigmoid')(encoded) # define the autoencoder model autoencoder = Model(input=input_vec, output=decoded) # define the encoder encoder = Model(input=input_vec, output=encoded) # define the decoder encoded_input = Input(shape=(self.nb_topics, )) decoder_layer = autoencoder.layers[-1] decoder = Model(input=encoded_input, output=decoder_layer(encoded_input)) # compile the autoencoder autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') # process training data embedvecs = np.array( reduce(add, [ map( lambda shorttext: self.retrieve_bow_vector( shorttext, normalize=True), classdict[classtype]) for classtype in classdict ])) # fit the model autoencoder.fit(embedvecs, embedvecs, *args, **kwargs) # store the autoencoder models self.autoencoder = autoencoder self.encoder = encoder self.decoder = decoder # flag setting self.trained = True # classes topic vector precomputation self.classtopicvecs = {} for label in classdict: self.classtopicvecs[label] = self.precalculate_liststr_topicvec( classdict[label])
def test_works(): x = Input(shape=(30, 1), name="input") e = GRU(128, return_sequences=True)(x) s = Slice("[-1,:]")(e) # s = Slice('[-1,:]')(e) # s = theano.printing.Print("s")(s) r = RepeatVector(30)(s) m = Merge(mode='concat', concat_axis=2)([r, x]) d = GRU(128, return_sequences=True)(m) p = Ptr_Layer(30)([x, e, d]) model = Model(input=x, output=p, name='test') # print(Sort(nb_out=5).get_output_shape_for((1,2,3))) inp = np.random.randint(size=(10000, 30, 1), low=0, high=100) indicies = np.argsort(inp[:, :, 0]) # print(indicies) target = np.array( [np.take(inp[i], indicies[i], axis=-2) for i in range(inp.shape[0])]) # print("Input") # print(inp) # print("Target") # print(target) model.compile(optimizer=optimizers.Adam(), loss='mse') model.fit(inp, target, nb_epoch=500, batch_size=100)
def train(): # extracting file saved by data_prep.py data = np.load('face_data.npz') x , y = data['x'], data['y'] #categorical conversion of data label y = keras.utils.to_categorical(y, 6) # using transfer learning to reduce the time required to train the algo resnet = VGGFace(model='resnet50',input_shape=(224, 224, 3)) layer_name = resnet.layers[-2].name #adding our own custom layers to make the model work on our datatset out = resnet.get_layer(layer_name).output out = Dense(6,activation='softmax')(out) resnet_4 = Model(resnet.input, out) # removing last layer of the model and adding my own layer to it for layer in resnet_4.layers[:-1]: layer.trainable = False resnet_4.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy']) #checking the final created dataset print (resnet_4.summary()) # training the model we have created with our own dataset resnet_4.fit(x, y,batch_size=10,epochs=10,shuffle=True) #saving the trained model so that it can be used afterwards resnet_4.save("C:\\Users\\hseth\\Desktop\\face recogination\\model_save_face.h5") # checking the accuracy of the model on training data only as i used a very small dataset scores = resnet_4.evaluate(x, y, verbose=1) print('Test accuracy:', scores[1])
class ModelB(ModelBase): def __init__(self, wv, maxlen=50, max_num_deptag=50): super().__init__(wv, maxlen, max_num_deptag) embedding_layer = self.wv.model.wv.get_embedding_layer() sequence_input = Input(shape=(self.maxlen,), dtype='int32') mask = Masking(mask_value=-1)(sequence_input) embedded_sequences = embedding_layer(mask) x = Dense(50, activation='tanh')(embedded_sequences) x = (Dropout(0.2))(x) x = Flatten()(x) preds = Dense(1)(x) self.model = Model(sequence_input, preds, name='MLPModelB') self.compile_model() def compile_model(self): self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae']) def fit(self, x, y, batch_size, epochs, verbose, callbacks): x_preprocessed = np.array([[t[0] for t in row] for row in x]) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) y_preprocessed = np.array(y) self.model.fit(x_preprocessed, y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks) def predict(self, x): x_preprocessed = np.array([[t[0] for t in row] for row in x]) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) return self.model.predict(x_preprocessed)
def my_layer(): """test one specify layer""" a = Input(shape=(3, 3, 2)) b = WeightedAdd()(a) model = Model(inputs=a, outputs=b) data = np.ones((1, 3, 3, 2)) print(model.predict_on_batch(data)) model.compile(optimizer='Adam', loss=mean_squared_error) model.fit(data, data, epochs=1000) print(model.predict_on_batch(data))
class ModelE(ModelBase): def __init__(self, wv, maxlen=50, max_num_deptag=50): super().__init__(wv, maxlen, max_num_deptag) wv_layer = self.wv.model.wv.get_embedding_layer() wv_input = Input(shape=(self.maxlen,), dtype='int32') wv_mask = Masking(mask_value=-1)(wv_input) wv_sequences = wv_layer(wv_mask) deptag_input = Input(shape=(self.maxlen,), dtype='int32') deptag_mask = Masking(mask_value=0)(deptag_input) deptag_sequences = Embedding(input_dim=max_num_deptag, output_dim=10, input_length=50)(deptag_mask) x = concatenate([wv_sequences, deptag_sequences]) x = (Conv1D(filters=128, kernel_size=3, activation='relu'))(x) # x = (AveragePooling1D(pool_size=3))(x) x = (Dropout(0.2))(x) x = (Conv1D(filters=32, kernel_size=17, activation='relu'))(x) x = (Dropout(0.2))(x) x = (GlobalMaxPooling1D())(x) x = (Dense(16))(x) x = (Dropout(0.2))(x) preds = (Dense(1, activation='tanh'))(x) self.model = Model(inputs=[wv_input, deptag_input], outputs=preds, name='OnlyConvolutions') self.compile_model() def compile_model(self): self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae']) def fit(self, x, y, batch_size, epochs, verbose, callbacks): x1_preprocessed = np.array([[t[0] for t in row] for row in x]) x1_preprocessed = sequence.pad_sequences(x1_preprocessed, maxlen=self.maxlen, value=-1) x2_preprocessed = np.array([[t[1] for t in row] for row in x]) x2_preprocessed = sequence.pad_sequences(x2_preprocessed, maxlen=self.maxlen, value=-1) y_preprocessed = np.array(y) self.model.fit([x1_preprocessed, x2_preprocessed], y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks) def predict(self, x): x1_preprocessed = np.array([[t[0] for t in row] for row in x]) x1_preprocessed = sequence.pad_sequences(x1_preprocessed, maxlen=self.maxlen, value=-1) x2_preprocessed = np.array([[t[1] + 1 for t in row] for row in x]) x2_preprocessed = sequence.pad_sequences(x2_preprocessed, maxlen=self.maxlen, value=0) return self.model.predict([x1_preprocessed, x2_preprocessed])
class CNNSigmoid(ModelBase): def __init__(self, wv, maxlen=50, max_num_deptag=50): super().__init__(wv, maxlen, max_num_deptag) embedding_layer = self.wv.model.wv.get_embedding_layer() sequence_input = Input(shape=(self.maxlen, ), dtype='int32') mask = Masking(mask_value=-1)(sequence_input) embedded_sequences = embedding_layer(mask) x = (Conv1D(filters=128, kernel_size=3, activation='relu'))(embedded_sequences) # x = (AveragePooling1D(pool_size=3))(x) x = (Dropout(0.2))(x) x = (Conv1D(filters=32, kernel_size=17, activation='relu'))(x) x = (Dropout(0.2))(x) x = (GlobalMaxPooling1D())(x) x = (Dense(16))(x) x = (Dropout(0.2))(x) preds = Dense(1, activation='sigmoid')(x) self.model = Model(sequence_input, preds, name='CNNSigmoid') self.compile_model() def compile_model(self): self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae']) def fit(self, x, y, batch_size, epochs, verbose, callbacks): x_preprocessed = np.array(x) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) y_preprocessed = np.array(y) self.model.fit(x_preprocessed, y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks) def predict(self, x): x_preprocessed = np.array(x) x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1) return self.model.predict(x_preprocessed)
def embedding_binary_classification(): docs = [ 'Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!', 'Weak', 'Poor effort!', 'not good', 'poor work', 'Could have done better.' ] # define class labels labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] vocab_size = 50 encoded_docs = [one_hot(d, vocab_size) for d in docs] # one_hot编码到[1,n],不包括0 print(encoded_docs) max_length = 4 padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post') print(padded_docs) input = Input(shape=(4, )) x = Embedding(vocab_size, 8, input_length=max_length)(input) # 这一步对应的参数量为50*8 x = Flatten()(x) x = Dense(1, activation='sigmoid')(x) model = Model(inputs=input, outputs=x) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.summary() model.fit(padded_docs, labels, epochs=100, verbose=0) loss, accuracy = model.evaluate(padded_docs, labels, verbose=0) print('loss: {0},accuracy:{1}'.format(loss, accuracy)) loss_test, accuracy_test = model.evaluate(padded_docs, labels, verbose=0) print('loss_test: {0},accuracy_test:{1}'.format(loss_test, accuracy_test)) test = one_hot('Weak', 50) padded_test = pad_sequences([test], maxlen=max_length, padding='post') print(model.predict(padded_test))
def train(model: Model, x_train, y_train, x_test, y_test, batch_size=128, epochs=20): time = int(datetime.now().timestamp()) name = "{}_{}".format("conv", time) chkp_path = "./checkpoints/{}".format(name) os.makedirs(chkp_path, exist_ok=True) model.compile(loss=K.categorical_crossentropy, optimizer=Adam(), metrics=[metrics.categorical_accuracy]) model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=[ TensorBoard(log_dir='/tmp/tensorflow/{}'.format(name)), ModelCheckpoint(os.path.join( chkp_path, "weights-improvement-{epoch:02d}-{val_categorical_accuracy:.2f}.hdf5" ), monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='auto') ]) export_model(tf.train.Saver(), ['conv2d_1_input'], 'dense_2/Softmax', name) score = model.evaluate(x_test, y_test) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def train_auto_encoder(self, train_x, test_x, input_dim, out_model_file='encoder_cnn.h5', monitor='val_loss', patience=4): ''' ''' early_stop = EarlyStopping(monitor=monitor, patience=patience) checkpoint = ModelCheckpoint(out_model_file, monitor='val_loss', verbose=1, save_best_only=True, mode='min') input_data = Input(shape=(input_dim,), name='Input') encoder = Dense(512, activation='relu', name='Encoder1')(input_data) encoder = Dense(256, activation='relu', name='Encoder2')(encoder) encoder = Dense(128, activation='relu', name='Encoder3')(encoder) decoder = Dense(256, activation='relu', name='Decoder1')(encoder) decoder = Dense(512, activation='relu', name='Decoder2')(decoder) decoder = Dense(input_dim, activation='linear', name='Output')(decoder) autoencoder = Model(input_data, decoder) autoencoder.compile(optimizer='adam', loss='mse') autoencoder.fit(train_x, train_x, epochs=200, batch_size=512, callbacks=[checkpoint, early_stop], shuffle=True, validation_data=(test_x, test_x), verbose=1) autoencoder.save(out_model_file) return encoder
def sda_training(features, labels): encoder_dims = [1600, 1024, 768] stacked_encoder = [] int_labels = label_to_category(labels=labels, type='training') X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=10) y_train = to_categorical([int_labels[i] for i in y_train]) y_test = to_categorical([int_labels[i] for i in y_test]) for encoder_dim in encoder_dims: input_dim = X_train.shape[1] input_img = Input(shape=(input_dim,)) n_layer = noise.GaussianNoise(0.3)(input_img) encode = Dense(encoder_dim, activation='sigmoid')(n_layer) decode = Dense(input_dim, activation='sigmoid')(encode) ae = Model(input_img, decode) ae.compile(optimizer='adam', loss='mape') ae.fit(X_train, X_train, epochs=10, batch_size=32, shuffle=True, validation_data=(X_test, X_test)) encoder = Model(input_img, encode) X_train = encoder.predict(X_train) X_test = encoder.predict(X_test) stacked_encoder.append(encoder.layers[-1])
def test_lkrelu(self): batch_size = 32 num_classes = 10 (x_train, y_train), (x_test, y_test) = load_cifar10() inputs = Input(shape=x_train.shape[1:]) x = Conv2D(self.width, (3, 3))(inputs) x = LKReLU()(x) x = Flatten()(x) x = Dense(num_classes, activation='softmax', name='fc1000')(x) model = Model(inputs=inputs, outputs=x) print(model.summary()) opt = keras.optimizers.sgd() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=self.metrics) log_dir = 'summaries/width-lkrelu-{}-cifar10-{}-{}'.format( self.width, self.seed, datetime.datetime.now()) model.fit( x_train, y_train, batch_size=batch_size, epochs=self.epochs, validation_data=(x_test, y_test), shuffle=False, callbacks=[ TensorBoard(log_dir=log_dir), # ModelCheckpoint( # 'checkpoints/width-lkrelu-cifar10-{epoch:02d}-{val_loss:.2f}.hdf5') ]) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])
def vgg16_model(img_width, img_height, nb_epoch, nb_classes): base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3)) # load dataset (X_train, y_train), (X_test, y_test) = cifar10.load_data() y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) # Extract the last layer from third block of vgg16 model last = base_model.get_layer('block5_pool').output # Add classification layers on top of it x = Flatten()(last) x = BatchNormalization()(x) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) output = Dense(10, activation='softmax')(x) model = Model(base_model.input, output) # model.summary() # model compile & fit model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-3, momentum=0.9), metrics=['accuracy']) model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=nb_epoch, batch_size=100, verbose=1) return model
class CTCModel: def __init__(self, inputs, outputs, greedy=True, beam_width=100, top_paths=1, padding=-1, charset=None): """ A override ou réécrire. C'est dans cette fonction qu'il faudra affecter self.inputs et self.outputs avec les listes des layers d'entrée et de sortie du réseau """ self.model_train = None self.model_pred = None self.model_eval = None self.inputs = inputs self.outputs = outputs self.greedy = greedy self.beam_width = beam_width self.top_paths = top_paths self.padding = padding self.charset = charset def compile(self, optimizer): """ A appeler une fois le modèle créé. Compile le modèle en ajoutant la loss CTC :param optimizer: L'optimizer a utiliser pendant l'apprentissage """ # Calcul du CTC labels = Input(name='labels', shape=[None]) input_length = Input(name='input_length', shape=[1]) label_length = Input(name='label_length', shape=[1]) # Lambda layer for computing the loss function loss_out = Lambda(self.ctc_loss_lambda_func, output_shape=(1, ), name='CTCloss')(self.outputs + [labels, input_length, label_length]) # Lambda layer for the decoding function out_decoded_dense = Lambda(self.ctc_complete_decoding_lambda_func, output_shape=(None, None), name='CTCdecode', arguments={ 'greedy': self.greedy, 'beam_width': self.beam_width, 'top_paths': self.top_paths }, dtype="float32")(self.outputs + [input_length]) #Lambda layer for computing the label error rate out_analysis = Lambda( self.ctc_complete_analysis_lambda_func, output_shape=(None, ), name='CTCanalysis', arguments={ 'greedy': self.greedy, 'beam_width': self.beam_width, 'top_paths': self.top_paths }, dtype="float32")(self.outputs + [labels, input_length, label_length]) # create Keras models self.model_init = Model(inputs=self.inputs, outputs=self.outputs) self.model_train = Model(inputs=self.inputs + [labels, input_length, label_length], outputs=loss_out) self.model_pred = Model(inputs=self.inputs + [input_length], outputs=out_decoded_dense) self.model_eval = Model(inputs=self.inputs + [labels, input_length, label_length], outputs=out_analysis) # Compile models self.model_train.compile(loss={ 'CTCloss': lambda yt, yp: yp }, optimizer=optimizer) self.model_pred.compile(loss={ 'CTCdecode': lambda yt, yp: yp }, optimizer=optimizer) self.model_eval.compile(loss={ 'CTCanalysis': lambda yt, yp: yp }, optimizer=optimizer) def get_model_train(self): """ :return: Modèle utilisé en interne pour l'entraînement """ return self.model_train def get_model_pred(self): """ :return: Modèle utilisé en interne pour la prédiction """ return self.model_pred def get_model_eval(self): """ :return: Model used to evaluate a data set """ return self.model_eval def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None): """ Permet de lire les données sur un device (CPU) et en parallèle de s'entraîner sur un autre device (GPU) Les données d'entrée doivent être de la forme : [input_sequences, label_sequences, inputs_lengths, labels_length] :param: Paramètres identiques à ceux de keras.engine.Model.fit() :return: L'objet History correspondant à l'entrainement """ out = self.model_train.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) self.model_pred.set_weights(self.model_train.get_weights()) self.model_eval.set_weights(self.model_train.get_weights()) return out def predict(self, x, batch_size=None, verbose=0): """ CTC prediction Inputs: x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features) x_len = 1D array with the length of each data in batch_size y = Input data as a 2D Tensor (batch_size, max_label_len) y_len = 1D array with the length of each labeling label_array = list of labels pred = return predictions from the ctc (from model_pred) eval = return an analysis of ctc prediction (from model_eval) Outputs: a list containing: out_pred = output of model_pred out_eval = output of model_eval """ #model_out = self.model_pred.evaluate(x=x, y=np.zeros(x[0].shape[0]), batch_size=batch_size, verbose=verbose) model_out = self.model_pred.predict(x, batch_size=batch_size, verbose=verbose) return model_out def predict2(self, x, batch_size=None, verbose=0, steps=None): """ The same function as in the Keras Model but with a different function predict_loop for dealing with variable length predictions Generates output predictions for the input samples. Computation is done in batches. # Arguments x: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple outputs). batch_size: Integer. If unspecified, it will default to 32. verbose: Verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of `None`. # Returns Numpy array(s) of predictions. # Raises ValueError: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ #[x, x_len] = x # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 if x is None and steps is None: raise ValueError('If predicting from data tensors, ' 'you should specify the `steps` ' 'argument.') # Validate user data. x = _standardize_input_data(x, self.model_pred._feed_input_names, self.model_pred._feed_input_shapes, check_batch_axis=False) if self.model_pred.stateful: if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples. ' 'Batch size: ' + str(batch_size) + '.') # Prepare inputs, delegate logic to `_predict_loop`. if self.model_pred.uses_learning_phase and not isinstance( K.learning_phase(), int): #ins = [x, x_len] + [0.] ins = x + [0.] else: #ins = [x, x_len] ins = x self.model_pred._make_predict_function() f = self.model_pred.predict_function out_pred = self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose, steps=steps) list_pred = [] for elmt in out_pred: pred = [] for val in elmt: if val != -1: pred.append(val) list_pred.append(pred) return list_pred @staticmethod def ctc_loss_lambda_func(args): """ Function for computing the ctc loss (can be put in a Lambda layer) :param args: y_pred, labels, input_length, label_length :return: CTC loss """ y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length) @staticmethod def ctc_complete_decoding_lambda_func(args, **arguments): """ Complete CTC decoding using Keras (function K.ctc_decode) :param args: y_pred, input_length :param arguments: greedy, beam_width, top_paths :return: K.ctc_decode with dtype='float32' """ y_pred, input_length = args my_params = arguments assert (K.backend() == 'tensorflow') return K.cast(K.ctc_decode(y_pred, tf.squeeze(input_length), greedy=my_params['greedy'], beam_width=my_params['beam_width'], top_paths=my_params['top_paths'])[0][0], dtype='float32') @staticmethod def ctc_complete_analysis_lambda_func(args, **arguments): """ Complete CTC analysis using Keras and tensorflow WARNING : tf is required :param args: y_pred, labels, input_length, label_len :param arguments: greedy, beam_width, top_paths :return: ler = label error rate """ y_pred, labels, input_length, label_len = args my_params = arguments assert (K.backend() == 'tensorflow') batch = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) input_length = tf.to_int32(tf.squeeze(input_length)) greedy = my_params['greedy'] beam_width = my_params['beam_width'] top_paths = my_params['top_paths'] if greedy: (decoded, log_prob) = ctc.ctc_greedy_decoder(inputs=batch, sequence_length=input_length) else: (decoded, log_prob) = ctc.ctc_beam_search_decoder( inputs=batch, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) cast_decoded = tf.cast(decoded[0], tf.float32) sparse_y = K.ctc_label_dense_to_sparse( labels, tf.cast(tf.squeeze(label_len), tf.int32)) ed_tensor = tf_edit_distance(cast_decoded, sparse_y, norm=True) ler_per_seq = Kreshape_To1D(ed_tensor) return K.cast(ler_per_seq, dtype='float32') def _predict_loop(self, f, ins, max_len=20, max_value=-1, batch_size=32, verbose=0, steps=None): """Abstract method to loop over some data in batches. # Arguments f: Keras function returning a list of tensors. ins: list of tensors to be fed to `f`. batch_size: integer batch size. verbose: verbosity mode. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. # Returns Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ num_samples = self.model_pred._check_num_samples( ins, batch_size, steps, 'steps') if steps is not None: # Step-based predictions. # Since we do not know how many samples # we will see, we cannot pre-allocate # the returned Numpy arrays. # Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [] for step in range(steps): batch_outs = f(ins) if not isinstance(batch_outs, list): batch_outs = [batch_outs] if step == 0: for batch_out in batch_outs: unconcatenated_outs.append([]) for i, batch_out in enumerate(batch_outs): unconcatenated_outs[i].append(batch_out) if len(unconcatenated_outs) == 1: return np.concatenate(unconcatenated_outs[0], axis=0) return [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ] else: # Sample-based predictions. outs = [] batches = _make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if ins and isinstance(ins[-1], float): # Do not slice the training phase flag. ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: ins_batch = _slice_arrays(ins, batch_ids) batch_outs = f(ins_batch) if not isinstance(batch_outs, list): batch_outs = [batch_outs] if batch_index == 0: # Pre-allocate the results arrays. for batch_out in batch_outs: #shape = (num_samples, ) + batch_out.shape[1:] # WARNING 10) shape = (num_samples, max_len) outs.append(np.zeros(shape, dtype=batch_out.dtype)) #outs.append(np.zeros(shape, dtype=batch_out.dtype))#batch_out.dtype))# WARNING CHANGE FROM THE MAIN CODE for i, batch_out in enumerate(batch_outs): #outs[i][batch_start:batch_end] = batch_out # WARNING outs[i][batch_start:batch_end] = sequence.pad_sequences( batch_out, value=float(max_value), maxlen=max_len, dtype=batch_out.dtype, padding="post") if len(outs) == 1: return outs[0] return outs def save_model(self, path_dir, charset=None): """ Save a model in path_dir save model_train, model_pred and model_eval in json save inputs and outputs in json save model CTC parameters in a pickle """ model_json = self.model_train.to_json() with open(path_dir + "/model_train.json", "w") as json_file: json_file.write(model_json) model_json = self.model_pred.to_json() with open(path_dir + "/model_pred.json", "w") as json_file: json_file.write(model_json) model_json = self.model_eval.to_json() with open(path_dir + "/model_eval.json", "w") as json_file: json_file.write(model_json) model_json = self.model_init.to_json() with open(path_dir + "/model_init.json", "w") as json_file: json_file.write(model_json) param = { 'greedy': self.greedy, 'beam_width': self.beam_width, 'top_paths': self.top_paths, 'charset': self.charset } output = open(path_dir + "/model_param.pkl", 'wb') p = pickle.Pickler(output) p.dump(param) output.close() def load_model(self, path_dir, optimizer, initial_epoch=None): """ Load a model in path_dir load model_train, model_pred and model_eval from json load inputs and outputs from json load model CTC parameters from a pickle """ json_file = open(path_dir + '/model_train.json', 'r') loaded_model_json = json_file.read() json_file.close() self.model_train = model_from_json(loaded_model_json) json_file = open(path_dir + '/model_pred.json', 'r') loaded_model_json = json_file.read() json_file.close() self.model_pred = model_from_json(loaded_model_json, custom_objects={"tf": tf}) json_file = open(path_dir + '/model_eval.json', 'r') loaded_model_json = json_file.read() json_file.close() self.model_eval = model_from_json(loaded_model_json, custom_objects={ "tf": tf, "ctc": ctc, "tf_edit_distance": tf_edit_distance, "Kreshape_To1D": Kreshape_To1D }) json_file = open(path_dir + '/model_init.json', 'r') loaded_model_json = json_file.read() json_file.close() self.model_init = model_from_json(loaded_model_json, custom_objects={"tf": tf}) self.inputs = self.model_init.inputs self.outputs = self.model_init.outputs input = open(path_dir + "/model_param.pkl", 'rb') p = pickle.Unpickler(input) param = p.load() input.close() self.greedy = param['greedy'] if 'greedy' in param.keys( ) else self.greedy self.beam_width = param['beam_width'] if 'beam_width' in param.keys( ) else self.beam_width self.top_paths = param['top_paths'] if 'top_paths' in param.keys( ) else self.top_paths self.charset = param['charset'] if 'charset' in param.keys( ) else self.charset self.compile(optimizer) if initial_epoch: file_weight = path_dir + 'weights.' + "%02d" % ( initial_epoch) + '.hdf5' print(file_weight) if os.path.exists(file_weight): self.model_train.load_weights(file_weight) self.model_pred.set_weights(self.model_train.get_weights()) self.model_eval.set_weights(self.model_train.get_weights()) else: print("Weights for epoch ", initial_epoch, " can not be loaded.") else: print("Training will be start at the beginning.")
class AttentionSumReader(object): def __init__(self, word_dict, embedding_matrix, d_len, q_len, embedding_dim, hidden_size, num_layers, weight_path, use_lstm=False): """ 初始化模型 b ... batch_size t ... d_len f ... hidden_size*2 i ... candidate_len """ self.weight_path = weight_path self.word_dict = word_dict self.vocab_size = len(embedding_matrix) self.d_len = d_len self.q_len = q_len self.A_len = 10 logging.info("Embedding matrix shape:%d x %d" % (len(embedding_matrix), embedding_dim)) self.rnn_cell = LSTM if use_lstm else GRU self.cell_name = "LSTM" if use_lstm else "GRU" # 模型的输入 q_input = Input(batch_shape=(None, self.q_len), dtype="int32", name="q_input") d_input = Input(batch_shape=( None, self.d_len, ), dtype="int32", name="d_input") context_mask = Input(batch_shape=(None, self.d_len), dtype="float32", name="context_mask") candidates_bi = Input(batch_shape=(None, self.A_len), dtype="int32", name="candidates_bi") # 问题的编码模型 # output shape: (None, max_q_length, embedding_dim) q_encode = Embedding( input_dim=self.vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], mask_zero=True, )(q_input) for i in range(1, num_layers): q_encode = Bidirectional(self.rnn_cell( units=hidden_size, name="{}-{}-{}".format("q-encoder", self.cell_name, i), kernel_initializer="glorot_uniform", recurrent_initializer="glorot_uniform", bias_initializer='zeros', return_sequences=True), merge_mode="concat", dtype="float32")(q_encode) # q_encoder output shape: (None, hidden_size * 2) # TODO: 用最后一步的隐层状态表示q q_encode = Bidirectional(self.rnn_cell( units=hidden_size, name="{}-{}-{}".format("q-encoder", self.cell_name, num_layers), kernel_initializer="glorot_uniform", recurrent_initializer="glorot_uniform", bias_initializer='zeros', return_sequences=False), merge_mode="concat", dtype="float32")(q_encode) # 上下文文档的编码模型 # output shape: (None, max_d_length, embedding_dim) d_encode = Embedding(input_dim=self.vocab_size, output_dim=embedding_dim, weights=[embedding_matrix], mask_zero=True, input_length=self.d_len)(d_input) # d_encoder output shape: (None, max_d_length, hidden_size * 2) for i in range(1, num_layers + 1): d_encode = Bidirectional(self.rnn_cell( units=hidden_size, name="{}-{}-{}".format("d-encoder", self.cell_name, i), kernel_initializer="glorot_uniform", recurrent_initializer="glorot_uniform", bias_initializer='zeros', return_sequences=True), merge_mode="concat", dtype="float32")(d_encode) # noinspection PyUnusedLocal def my_dot(x): """注意力点乘函数,原始版本""" c = [ tf.reduce_sum(tf.multiply(x[0][:, inx, :], x[1]), -1, keep_dims=True) for inx in range(self.d_len) ] return tf.concat(c, -1) def my_dot_v2(x): """注意力点乘函数,快速版本""" d_btf, q_bf = x res = K.batch_dot(tf.expand_dims(q_bf, -1), d_btf, (1, 2)) return K.reshape(res, [-1, self.d_len]) mem_attention_pre_soft_bt = Lambda( my_dot_v2, name="attention")([d_encode, q_encode]) mem_attention_pre_soft_masked_bt = Multiply(name="mask")( [mem_attention_pre_soft_bt, context_mask]) mem_attention_bt = Activation( activation="softmax", name="softmax")(mem_attention_pre_soft_masked_bt) # 注意力求和,attention-sum过程 # TODO: Get rid of sentence-by-sentence processing? # TODO: Rewrite into matrix notation instead of scans? def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs): word_ixs_in_sentence = tf.where(tf.equal(sentence_ixs, word_ix)) return tf.reduce_sum( tf.gather(sentence_attention_probs, word_ixs_in_sentence)) # noinspection PyUnusedLocal def sum_probs_single_sentence(prev, cur): candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t = cur result = tf.scan(fn=lambda previous, x: sum_prob_of_word( x, sentence_ixs_t, sentence_attention_probs_t), elems=[candidate_indices_i], initializer=K.constant(0., dtype="float32"), swap_memory=True) return result def sum_probs_batch(candidate_indices_bi, sentence_ixs_bt, sentence_attention_probs_bt): result = tf.scan(fn=sum_probs_single_sentence, elems=[ candidate_indices_bi, sentence_ixs_bt, sentence_attention_probs_bt ], initializer=K.variable([0] * self.A_len, dtype="float32"), swap_memory=True) return result # output shape: (None, i) i = max_candidate_length = 10 y_hat = Lambda(lambda x: sum_probs_batch(x[0], x[1], x[2]), name="attention_sum")( [candidates_bi, d_input, mem_attention_bt]) self.model = Model( inputs=[q_input, d_input, context_mask, candidates_bi], outputs=y_hat) plot_model(self.model, to_file=__file__ + ".png", show_shapes=True, show_layer_names=True) self.model.summary() # noinspection PyUnusedLocal def train(self, train_data, valid_data, batch_size, epochs, opt_name, lr, grad_clip): """ 模型训练。 """ def save_weight_on_epoch_end(epoch, e_logs): filename = "{}weight-epoch{}-{}-{}.h5".format( self.weight_path, time.strftime("%Y-%m-%d-(%H-%M)", time.localtime()), epoch, e_logs['val_acc']) self.model.save_weights(filepath=filename) checkpointer = LambdaCallback(on_epoch_end=save_weight_on_epoch_end) # tensorboard = TensorBoard(log_dir="./logs", histogram_freq=1, write_images=True) earlystopping = EarlyStopping(monitor="val_loss", patience=3, verbose=1) # 对输入进行预处理 questions_ok, documents_ok, context_mask, candidates_ok, y_true = self.preprocess_input_sequences( train_data) v_questions, v_documents, v_context_mask, v_candidates, v_y_true = self.preprocess_input_sequences( valid_data) if opt_name == "SGD": optimizer = SGD(lr=lr, decay=1e-6, clipvalue=grad_clip) elif opt_name == "ADAM": optimizer = Adam(lr=lr, clipvalue=grad_clip) else: raise NotImplementedError("Other Optimizer Not Implemented.-_-||") self.model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]) # 载入之前训练的权重 self.load_weight() data = { "q_input": questions_ok, "d_input": documents_ok, "context_mask": context_mask, "candidates_bi": candidates_ok } v_data = { "q_input": v_questions, "d_input": v_documents, "context_mask": v_context_mask, "candidates_bi": v_candidates } logs = self.model.fit(x=data, y=y_true, batch_size=batch_size, epochs=epochs, validation_data=(v_data, v_y_true), callbacks=[checkpointer, earlystopping]) def test(self, test_data, batch_size): # 对输入进行预处理 questions_ok, documents_ok, context_mask, candidates_ok, y_true = self.preprocess_input_sequences( test_data) data = { "q_input": questions_ok, "d_input": documents_ok, "context_mask": context_mask, "candidates_bi": candidates_ok } y_pred = self.model.predict(x=data, batch_size=batch_size) acc_num = np.count_nonzero( np.equal(np.argmax(y_pred, axis=-1), np.zeros(len(y_pred)))) test_acc = acc_num / len(y_pred) logging.info("Test accuracy is {}".format(test_acc)) return acc_num, test_acc def load_weight(self, weight_path=None): weight_file = self.weight_path if not weight_path else weight_path if os.path.exists(weight_file + "weight.h5"): logging.info("Load pre-trained weights:{}".format(weight_file + "weight.h5")) self.model.load_weights(filepath=weight_file + "weight.h5", by_name=True) @staticmethod def union_shuffle(data): d, q, a, A = data c = list(zip(d, q, a, A)) random.shuffle(c) return zip(*c) def preprocess_input_sequences(self, data, shuffle=True): """ 预处理输入: shuffle PAD/TRUNC到固定长度的序列 y_true是长度为self.A_len的向量,index=0为正确答案,one-hot编码 """ documents, questions, answer, candidates = self.union_shuffle( data) if shuffle else data d_lens = [len(i) for i in documents] questions_ok = pad_sequences(questions, maxlen=self.q_len, dtype="int32", padding="post", truncating="post") documents_ok = pad_sequences(documents, maxlen=self.d_len, dtype="int32", padding="post", truncating="post") context_mask = K.eval( tf.sequence_mask(d_lens, self.d_len, dtype=tf.float32)) candidates_ok = pad_sequences(candidates, maxlen=self.A_len, dtype="int32", padding="post", truncating="post") y_true = np.zeros_like(candidates_ok) y_true[:, 0] = 1 return questions_ok, documents_ok, context_mask, candidates_ok, y_true
def fit_cnn(): X_train = train_embed y_train = np.array(train_labels) X_test = test_embed y_test = test_labels X_train = X_train.reshape( (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)) X_test = X_test.reshape( (X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)) y_train, y_test = convert_one_hot(y_train, y_test) sequence_length = train_embed.shape[1] # 60 embedding_dim = train_embed.shape[2] filter_sizes = [2, 3, 4, 5, 6, 7, 8] num_filters = 64 drop = 0.6 input_shape = train_embed[0].shape epochs = 300 batch_size = 32 inputs = Input(shape=(sequence_length, embedding_dim, 1), dtype='float32') #batch_norm = BatchNormalization(input_shape = input_shape)(inputs) conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_4 = Conv2D(num_filters, kernel_size=(filter_sizes[4], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_5 = Conv2D(num_filters, kernel_size=(filter_sizes[5], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) conv_6 = Conv2D(num_filters, kernel_size=(filter_sizes[6], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', input_shape=input_shape)(inputs) maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1, 1), padding='valid')(conv_0) maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1, 1), padding='valid')(conv_1) maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1, 1), padding='valid')(conv_2) maxpool_3 = MaxPool2D(pool_size=(sequence_length - filter_sizes[3] + 1, 1), strides=(1, 1), padding='valid')(conv_3) maxpool_4 = MaxPool2D(pool_size=(sequence_length - filter_sizes[4] + 1, 1), strides=(1, 1), padding='valid')(conv_4) maxpool_5 = MaxPool2D(pool_size=(sequence_length - filter_sizes[5] + 1, 1), strides=(1, 1), padding='valid')(conv_5) maxpool_6 = MaxPool2D(pool_size=(sequence_length - filter_sizes[6] + 1, 1), strides=(1, 1), padding='valid')(conv_6) concatenated_tensor = Concatenate(axis=1)( [maxpool_0, maxpool_1, maxpool_2, maxpool_3]) flatten = Flatten()(concatenated_tensor) dropout = Dropout(drop)(flatten) output = Dense(units=3, activation='softmax')(dropout) # this creates a model that includes model = Model(inputs=inputs, outputs=output) adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) print("Traning Model...") model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_test, y_test)) model.save("model/cnn.h5") return model
def start_training(self): # Vectorize the data. input_texts = [] target_texts = [] input_characters = set() target_characters = set() lines = open(self.data_path, encoding='UTF-8').read().split('\n') for line in lines[:min(self.num_samples, len(lines) - 1)]: input_text, target_text = line.split('\t') # We use "tab" as the "start sequence" character # for the targets, and "\n" as "end sequence" character. target_text = '\t' + target_text + '\n' input_texts.append(input_text) target_texts.append(target_text) for char in input_text: if char not in input_characters: input_characters.add(char) for char in target_text: if char not in target_characters: target_characters.add(char) input_characters = sorted(list(input_characters)) target_characters = sorted(list(target_characters)) num_encoder_tokens = len(input_characters) num_decoder_tokens = len(target_characters) max_encoder_seq_length = max([len(txt) for txt in input_texts]) max_decoder_seq_length = max([len(txt) for txt in target_texts]) print('Number of samples:', len(input_texts)) print('Number of unique input tokens:', num_encoder_tokens) print('Number of unique output tokens:', num_decoder_tokens) print('Max sequence length for inputs:', max_encoder_seq_length) print('Max sequence length for outputs:', max_decoder_seq_length) input_token_index = dict([(char, i) for i, char in enumerate(input_characters)]) target_token_index = dict([ (char, i) for i, char in enumerate(target_characters) ]) encoder_input_data = np.zeros( (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') decoder_input_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') decoder_target_data = np.zeros( (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): for t, char in enumerate(input_text): encoder_input_data[i, t, input_token_index[char]] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_target_data by one timestep decoder_input_data[i, t, target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. decoder_target_data[i, t - 1, target_token_index[char]] = 1. # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(self.latent_dim, return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(self.latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # # Run training model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.summary() tbCallBack = callbacks.TensorBoard(log_dir='./data/Graph', histogram_freq=0, write_graph=True, write_images=True) model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=self.batch_size, epochs=self.epochs, validation_split=0.2, verbose=1, callbacks=[tbCallBack]) # # # Save model model.save('./data/s2s2.h5')
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0) y_train = keras.utils.to_categorical(y_train, 4) y_test = keras.utils.to_categorical(y_test, 4) resnet = VGGFace(model='resnet50',input_shape=(224, 224, 3)) layer_name = resnet.layers[-2].name out = resnet.get_layer(layer_name).output out = Dense(4,activation='softmax')(out) resnet_4 = Model(resnet.input, out) for layer in resnet_4.layers[:-1]: layer.trainable = False resnet_4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print (resnet_4.summary()) resnet_4.fit(x_train, y_train,batch_size=16,epochs=5,validation_data=(x_test, y_test),shuffle=True) scores = resnet_4.evaluate(x_test, y_test, verbose=1) print('Test accuracy:', scores[1])
out1, out2, out3 = c3d(inputs) model_c3d = Model(inputs=inputs, outputs=[out1, out2, out3]) # model_c3d.summary() model_c3d.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # %% Network training print('Not using data augmentation.') logging.debug("Running training...") hist = model_c3d.fit(X_train_c3d, [y_train_frame, y_train_frame, y_train_frame], batch_size=32, epochs=100, verbose=1, shuffle=True, callbacks=[mc]) # %% Prediction logging.debug("Running test...") model_c3d.load_weights('/home/kdh/Desktop/c3da/checkpoint/c3da_weights.h5') proba_c3d = model_c3d.predict(X_test_c3d, batch_size=4, verbose=1) y_pred = [np.argmax(prob) for prob in proba_c3d[2]] y_true = [np.argmax(true) for true in y_test_frame] count = 0 for i in range(len(y_pred)): if y_test[i] == y_pred[i]: count += 1
for layer in range(num_layers - 1): final_model.layers[layer].trainable = False final_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # get training data rootdir_train = '/training' filenames = load_filenames('all_train', rootdir_train) #print(filenames) X, Y = compile_images(filenames, (250, 250, 3)) # fit updated model on senators final_model.fit(X, Y, batch_size=8, epochs=10, verbose=1, validation_split=0.2) # test rootdir_test = '/test' filenames_test = load_filenames('all_test', rootdir_test) X_test, Y_test = compile_images(filenames_test, (250, 250, 3)) print("Test results: ", final_model.test_on_batch(X_test, Y_test)) predictions = final_model.predict(X_test) for i, f in enumerate(filnames_test): print(f, " with truth value: ", Y_test[i], predictions[i]) final_model.save("/output/transfer_modle120.h5")
print('model compile time: {}'.format(time.time() - start_time)) print('') ############################################################################# # TRAINING batch_size = 256 nb_epoch = 50 # Model saving callback checkpointer = ModelCheckpoint(filepath=WEIGHTS_PRESENCE_FILEPATH, verbose=1, save_best_only=True) # Early stopping early_stopping = EarlyStopping(monitor='val_loss', patience=5) history = model_presence_a.fit(data_images_train, data_presence_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(data_images_val, data_presence_val), shuffle=True, callbacks=[checkpointer, early_stopping]) with open(HISTORY_PRESENCE_FILEPATH, 'w') as f_out: json.dump(history.history, f_out)
print('model compile time: {}'.format(time.time() - start_time)) print('') ############################################################################# # TRAINING batch_size = 48 nb_epoch = 100 # Model saving callback checkpointer = ModelCheckpoint(filepath=WEIGHTS_SEGMENT_FILEPATH, verbose=1, save_best_only=True) # Early stopping early_stopping = EarlyStopping(monitor='val_loss', patience=5) history = model_segment.fit(data_images_train, data_masks_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, shuffle=True, validation_data=(data_images_val, data_masks_val), callbacks=[checkpointer, early_stopping]) with open(HISTORY_SEGMENT_FILEPATH, 'w') as f_out: json.dump(history.history, f_out)
def main(i, RNN_TYPE): # TODO: change to GRU, Recurrent, and SimpleRNN # print(i, RNN_TYPE) RNN = recurrent.LSTM if RNN_TYPE == "gru": # print("Starting a GRU: ") RNN = recurrent.GRU file_name = "history_gru_" + str(i) + ".csv" elif RNN_TYPE == "recurrent": # print("Starting a Recurrent Unit: ") RNN = recurrent.Recurrent file_name = "history_recurrent_" + str(i) + ".csv" elif RNN_TYPE == "simplernn": # print("Starting a SimpleRNN: ") RNN = recurrent.SimpleRNN file_name = "history_simple_" + str(i) + ".csv" else: # print("Starting an LSTM") file_name = "history_lstm_" + str(i) + ".csv" EMBED_HIDDEN_SIZE = 100 SENT_HIDDEN_SIZE = 100 QUERY_HIDDEN_SIZE = 100 BATCH_SIZE = 50 EPOCHS = 100 # print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, # EMBED_HIDDEN_SIZE, # SENT_HIDDEN_SIZE, # QUERY_HIDDEN_SIZE)) # print(os.getcwd()) file_list = (os.getcwd()) base_file = os.getcwd() + "/tasks_1-20_v1-2/en/" file_list = (os.listdir(base_file)) # print(file_list) test_file = "" train_file = "" for file in file_list: if file.startswith("qa" + str(i) + "_"): if file.endswith("_test.txt"): test_file = file # print(test_file) elif file.endswith("_train.txt"): train_file = file # print(train_file) print(train_file) print(test_file) f_train = open(base_file + train_file) f_test = open(base_file + test_file) # try: # path = get_file('babi-tasks-v1-2.tar.gz', # origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz') # except: # print('Error downloading dataset, please download it manually:\n' # '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n' # '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz') # raise # tar = tarfile.open(path) # # Default QA1 with 1000 samples # # challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt' # # QA1 with 10,000 samples # # challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt' # # QA2 with 1000 samples # challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt' # # QA2 with 10,000 samples # # challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt' # train = get_stories(tar.extractfile(challenge.format('train'))) # test = get_stories(tar.extractfile(challenge.format('test'))) # print("training stories:") train = get_stories(f_train) # print(len(train)) # print("testing stories:") test = get_stories(f_test) # print(len(test)) vocab = set() for story, q, answer in train + test: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # check_existence(vocab) # get_word_vectors_from_pretr_embeddings(train, test, vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 print("Vocabulary size: ", vocab_size) word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) tx, txq, ty = vectorize_stories(test, word_idx,story_maxlen, query_maxlen) print('vocab = {}'.format(vocab)) print('x.shape = {}'.format(x.shape)) print('xq.shape = {}'.format(xq.shape)) print('y.shape = {}'.format(y.shape)) print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen)) print('Build model...') pre_trained_emb_weights = get_pre_trained_emb(vocab) sentence = layers.Input(shape=(story_maxlen,), dtype='float32') encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) encoded_sentence = layers.Dropout(0.3)(encoded_sentence) question = layers.Input(shape=(query_maxlen,), dtype='float32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Training') history = model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05) pandas.DataFrame(history.history).to_csv("__pre_"+file_name) loss, acc = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE) pandas.DataFrame([str(loss)+"_"+ str(acc)]).to_csv("__test_"+RNN_TYPE+"_"+str(i)+".csv") print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
def main(_): # The data, shuffled and split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 inputs = Input(shape=(32, 32, 3)) out1, out2 = simple_cnn(inputs) model = Model(inputs=inputs, outputs=[out1, out2]) # model.summary() opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) model.compile( optimizer=opt, loss=['categorical_crossentropy', 'categorical_crossentropy'], metrics=['accuracy']) if not data_augmentation: print('Not using data augmentation.') model.fit(x_train, [y_train, y_train], batch_size=batch_size, epochs=epochs, validation_data=(x_test, [y_test, y_test]), shuffle=True) else: print('Using real-time data augmentation.') def train_generator(x, y, batch_size): train_datagen = ImageDataGenerator( width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True) # randomly flip images generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield (x_batch, [y_batch, y_batch]) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(generator=train_generator(x_train, y_train, batch_size), steps_per_epoch=int(y_train.shape[0] / batch_size), epochs=epochs, validation_data=(x_test, [y_test, y_test]), callbacks=[]) # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path) # Score trained model. scores = model.evaluate(x_test, [y_test, y_test], verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1])
# When we see that the validation loss stopped improving, so that we can start # learning faster and then get more precision rp_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.000001, verbose=1) callbacks.append(rp_callback) # In[ ]: model.compile(optimizer=optimizer, loss=loss, metrics=metrics) model.fit(x=train_dataset, epochs=50, steps_per_epoch=len(train_gen), validation_data=valid_dataset, validation_steps=len(valid_gen), callbacks=callbacks) # In[ ]: # Save the model now = datetime.now().strftime('%b%d_%H-%M-%S') print(str(now)) model_name = os.path.join(models_dir, str(now)) classification_name = str(prediction_dir) + '/' +str(now) model.save(model_name)
def train_label_none_label_classification(label_folder, non_label_folder, model_file=None): c = Config() # Build or load model if model_file is None: # create model img_input = Input(shape=(28, 28, 3)) # prediction = model_cnn_2_layer.nn_classify_label_non_label(img_input) # prediction = model_cnn_3_layer.nn_classify_label_non_label(img_input) prediction = nn_cnn_3_layer.nn_classify_label_non_label(img_input) model = Model(inputs=img_input, outputs=prediction) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) else: model = load_model(model_file) model.summary() # Load and normalize data x_train, y_train, x_test, y_test = load_train_validation_data(label_folder, non_label_folder) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train[:, :, :, 0] -= c.img_channel_mean[0] x_train[:, :, :, 1] -= c.img_channel_mean[1] x_train[:, :, :, 2] -= c.img_channel_mean[2] x_test[:, :, :, 0] -= c.img_channel_mean[0] x_test[:, :, :, 1] -= c.img_channel_mean[1] x_test[:, :, :, 2] -= c.img_channel_mean[2] x_train /= 255 x_test /= 255 print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # x_train.reshape(x_train.shape[0], 28, 28, 3) # x_test.reshape(x_test.shape[0], 28, 28, 3) # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, 2) y_test = keras.utils.to_categorical(y_test, 2) # Checkpointing is to save the network weights only when there is an improvement in classification accuracy # on the validation dataset (monitor=’val_acc’ and mode=’max’). file_path = "weights-improvement-{epoch:04d}-{val_acc:.4f}.hdf5" checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit(x_train, y_train, batch_size=128, epochs=100, verbose=1, callbacks=callbacks_list, validation_data=(x_test, y_test) ) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) model.save('final_model.h5')
embedded_sequences = embedding_layer(sequence_input) cnns = [] for filter_length in filter_lengths: x = Conv1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', W_constraint=maxnorm(3), W_regularizer=l2(0.0001), subsample_length=1)(embedded_sequences) x = MaxPooling1D(pool_length=MAX_SEQUENCE_LENGTH - filter_length + 1)(x) x = Flatten()(x) cnns.append(x) x = merge(cnns, mode='concat') x = Dropout(0.2)(x) x = Dense(128, activation='relu')(x) preds = Dense(len(labels_index), activation='softmax')(x) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # happy learning! model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=5, batch_size=128)
def ent_rel_pred_nn(cv_dat, drop, max_len, embedding_length, rdf2vec_model, rev_char_dict): print "We are into the training block" train_x, train_y, test_x, test_y = cv_dat print train_x.shape, test_x.shape print "getting validation data" train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.1, random_state=666, stratify=train_y) print "Validation shapes:" + str(val_x.shape) print "Data distributions:" print Counter([np.argmax(a) for a in train_y]) print Counter([np.argmax(a) for a in test_y]) print Counter([np.argmax(a) for a in val_y]) #get the rdf2vec_vectors for all train_rdf2vec = get_rdf_vecs_phrases(train_x, rdf2vec_model, rev_char_dict) val_rdf2vec = get_rdf_vecs_phrases(val_x, rdf2vec_model, rev_char_dict) train_x = np.array(sequence.pad_sequences(train_x, maxlen=max_len), dtype=np.int) test_x = np.array(sequence.pad_sequences(test_x, maxlen=max_len), dtype=np.int) val_x = np.array(sequence.pad_sequences(val_x, maxlen=max_len), dtype=np.int) print "Training the neural net now" print train_x[0] #define the neural net model input_vec = Input(shape=(max_len, ), dtype='int32', name="inp_vec") embedded_l = Embedding(embedding_length, 128, mask_zero=False, input_length=max_len, trainable=True)(input_vec) lstm_1 = LSTM(128, return_sequences=False, dropout_W=0.3, dropout_U=0.3)(embedded_l) dense_1 = Dense(512, activation='tanh', kernel_initializer="glorot_uniform")(lstm_1) drop_l = Dropout(drop)(dense_1) dense_2 = Dense(256, activation='tanh', kernel_initializer="glorot_uniform")(drop_l) drop_l_2 = Dropout(drop)(dense_2) #batch_norm = BatchNormalization()(drop_l) output_l = Dense(2, activation='softmax', name="output_layer")(drop_l_2) output_rdf2vec = Dense(500, activation='softmax', name="output_layer_2")(drop_l_2) model = Model([input_vec], output=[output_l, output_rdf2vec]) adam = Adam(lr=0.0001) #weighted categorical crooss entropy w_array = np.ones((2, 2)) w_array[0, 1] = 1.2 ncce = functools.partial(w_categorical_crossentropy, weights=w_array) ncce.__name__ = 'w_categorical_crossentropy' model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'], loss_weights={ 'output_layer': 1., 'output_layer_2': 0.4 }) earlystop = EarlyStopping(monitor='val_output_layer_loss', min_delta=0.0001, patience=3, verbose=1, mode='auto') callbacks_list = [earlystop] print model.summary() model.fit([train_x], [train_y, train_rdf2vec], batch_size=128, epochs=30, verbose=1, shuffle=True, callbacks=callbacks_list, validation_data=[[val_x], [val_y, val_rdf2vec]]) model_json = model.to_json() with open(model_j, "w") as json_file: json_file.write(model_json) model.save_weights(model_wgt) print "Saved the model to disk." test_predictions = model.predict([test_x], verbose=False) test_pred = [np.argmax(pred) for pred in test_predictions[0]] test_y = [np.argmax(label) for label in test_y] f1_value = f1_score(test_y, test_pred, average="macro") print f1_value return f1_value
model = Model(inputs=base_model.input, outputs=preds) for layer in model.layers[:18]: layer.trainable = False for layer in model.layers[19:]: layer.trainable = True tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, batch_size=32, epochs=3, validation_split=0.3, callbacks=[tensorboard]) model.save('emotion_cnn_transfer-vgg.model') # get the predictions for the test data predicted_classes = model.predict(X_test) predicted_classes = predicted_classes.argmax(axis=-1) predicted_classes = keras.utils.to_categorical(predicted_classes, num_classes) # get the indices to be plotted correct = np.where(predicted_classes == y_test)[0] incorrect = np.where(predicted_classes != y_test)[0] target_names = ["Class {}".format(i) for i in range(num_classes)]
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) batch_size = 128 nb_epoch = 10 data_augmentation = True # Model saving callback #checkpointer = ModelCheckpoint(filepath='stochastic_depth_cifar10.hdf5', verbose=1, save_best_only=True) if not data_augmentation: print('Not using data augmentation.') history = model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(x_test, y_test), shuffle=True, callbacks=[]) else: print('Using real-time data augmentation.') # realtime data augmentation datagen_train = ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=0, width_shift_range=0.125, height_shift_range=0.125,