def train_top_model(): # Load the bottleneck features and labels train_features = np.load( open(output_dir + 'bottleneck_features_train.npy', 'rb')) train_labels = np.load( open(output_dir + 'bottleneck_labels_train.npy', 'rb')) validation_features = np.load( open(output_dir + 'bottleneck_features_validation.npy', 'rb')) validation_labels = np.load( open(output_dir + 'bottleneck_labels_validation.npy', 'rb')) # Create the top model for the inception V3 network, a single Dense layer # with softmax activation. top_input = Input(shape=train_features.shape[1:]) top_output = Dense(5, activation='softmax')(top_input) model = Model(top_input, top_output) # Train the model using the bottleneck features and save the weights. model.compile(optimizer=SGD(lr=1e-4, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) csv_logger = CSVLogger(output_dir + 'top_model_training.csv') model.fit(train_features, train_labels, epochs=top_epochs, batch_size=batch_size, validation_data=(validation_features, validation_labels), callbacks=[csv_logger]) model.save_weights(top_model_weights_path)
class SiameseModel: def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False): n_hidden = 50 input_dim = 300 # unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force bias_initializer="zeros". This is recommended in Jozefowicz et al. # he_normal: Gaussian initialization scaled by fan_in (He et al., 2014) if use_cudnn_lstm: # Use CuDNNLSTM instead of LSTM, because it is faster lstm = layers.CuDNNLSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') else: lstm = layers.LSTM(n_hidden, unit_forget_bias=True, kernel_initializer='he_normal', kernel_regularizer='l2', name='lstm_layer') # Building the left branch of the model: inputs are variable-length sequences of vectors of size 128. left_input = Input(shape=(None, input_dim), name='input_1') # left_masked_input = layers.Masking(mask_value=0)(left_input) left_output = lstm(left_input) # Building the right branch of the model: when you call an existing layer instance, you reuse its weights. right_input = Input(shape=(None, input_dim), name='input_2') # right_masked_input = layers.Masking(mask_value=0)(right_input) right_output = lstm(right_input) # Builds the classifier on top l1_norm = lambda x: 1 - K.abs(x[0] - x[1]) merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0], name='L1_distance')([left_output, right_output]) predictions = layers.Dense(1, activation='tanh', name='Similarity_layer')(merged) #sigmoid # Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs. self.model = Model([left_input, right_input], predictions) self.__compile() print(self.model.summary()) if plot_model_architecture: from tensorflow.python.keras.utils import plot_model plot_model(self.model, to_file='siamese_architecture.png') def __compile(self): optimizer = Adadelta( ) # gradient clipping is not there in Adadelta implementation in keras # optimizer = 'adam' self.model.compile(loss='mse', optimizer=optimizer, metrics=[pearson_correlation]) def fit(self, left_data, right_data, targets, validation_data, epochs=5, batch_size=128): # The paper employ early stopping based on a validation, but they didn't mention parameters. early_stopping_monitor = EarlyStopping( monitor='val_pearson_correlation', mode='max', patience=20) # callbacks = [early_stopping_monitor] callbacks = [] history = self.model.fit( [left_data, right_data], targets, validation_data=validation_data, epochs=epochs, batch_size=batch_size #) , callbacks=callbacks) self.visualize_metric(history.history, 'loss') self.visualize_metric(history.history, 'pearson_correlation') self.load_activation_model() def visualize_metric(self, history_dic, metric_name): plt.plot(history_dic[metric_name]) legend = ['train'] if 'val_' + metric_name in history_dic: plt.plot(history_dic['val_' + metric_name]) legend.append('test') plt.title('model ' + metric_name) plt.ylabel(metric_name) plt.xlabel('epoch') plt.legend(legend, loc='upper left') plt.show() def predict(self, left_data, right_data): return self.model.predict([left_data, right_data]) def evaluate(self, left_data, right_data, targets, batch_size=128): return self.model.evaluate([left_data, right_data], targets, batch_size=batch_size) def load_activation_model(self): self.activation_model = Model( inputs=self.model.input[0], outputs=self.model.get_layer('lstm_layer').output) def visualize_activation(self, data): activations = self.activation_model.predict(data) plt.figure(figsize=(10, 100), dpi=80) plt.imshow(activations, cmap='Blues') plt.grid() plt.xticks(ticks=range(0, 50)) plt.yticks(ticks=range(0, data.shape[0])) plt.show() def visualize_specific_activation(self, data, dimension_idx): activations = self.activation_model.predict(data) if dimension_idx >= activations.shape[1]: raise ValueError('dimension_idx must be less than %d' % activations.shape[1]) fig = plt.figure(figsize=(10, 1), dpi=80) ax = fig.add_subplot(111) plt.title('dimension_idx = %d' % dimension_idx) weights = activations[:, dimension_idx] plt.yticks(ticks=[0, 1]) plt.plot(weights, np.zeros_like(weights), 'o') for i, txt in enumerate(weights): ax.annotate((i + 1), (weights[i], 0)) plt.show() def save(self, model_folder='./model/'): # serialize model to JSON model_json = self.model.to_json() with open(model_folder + 'model.json', 'w') as json_file: json_file.write(model_json) # serialize weights to HDF5 self.model.save_weights(model_folder + 'model.h5') print('Saved model to disk') def save_pretrained_weights( self, model_wieghts_path='./model/pretrained_weights.h5'): self.model.save_weights(model_wieghts_path) print('Saved pretrained weights to disk') def load(self, model_folder='./model/'): # load json and create model json_file = open(model_folder + 'model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights(model_folder + 'model.h5') print('Loaded model from disk') self.model = loaded_model # loaded model should be compiled self.__compile() self.load_activation_model() def load_pretrained_weights( self, model_wieghts_path='./model/pretrained_weights.h5'): # load weights into new model self.model.load_weights(model_wieghts_path) print('Loaded pretrained weights from disk') self.__compile()
class BaseModel(object): """Base Model Interface Methods ---------- fit(train_data, valid_data, epohcs, batch_size, **kwargs) predict(X) evaluate(X, y) Examples ---------- >>> model = Model("example", inference, "model.h5") >>> model.fit([X_train, y_train], [X_val, y_val]) """ def __init__(self, name, fn, model_path): """Constructor for BaseModel Parameters ---------- name : str Name of this model fn : function Inference function, y = fn(X) model_path : str Path to a model.h5 """ X = Input(shape=[28, 28, 1]) y = fn(X) self.model = Model(X, y, name=name) self.model.compile("adam", "categorical_crossentropy", ["accuracy"]) self.model.summary() self.path = model_path self.name = name ##self.load() def fit(self, train_data, valid_data, epochs=10, batchsize=128, **kwargs): """Training function Evaluate at each epoch against validation data Save the best model according to the validation loss Parameters ---------- train_data : tuple, (X_train, y_train) X_train.shape == (N, H, W, C) y_train.shape == (N, N_classes) valid_data : tuple (X_val, y_val) epochs : int Number of epochs to train batchsize : int Minibatch size **kwargs Keywords arguments for `fit_generator` """ callback_best_only = ModelCheckpoint(self.path, save_best_only=True) train_gen, val_gen = train_generator() X_train, y_train = train_data X_val, y_val = valid_data N = X_train.shape[0] print("[DEBUG] N -> {}", X_train.shape) N_val = X_val.shape[0] self.model.fit_generator(train_gen.flow(X_train, y_train, batchsize), steps_per_epoch=N / batchsize, validation_data=val_gen.flow( X_val, y_val, batchsize), validation_steps=N_val / batchsize, epochs=epochs, callbacks=[callback_best_only], **kwargs) def save(self): """Save weights Should not be used manually """ self.model.save_weights(self.path) def freeze(self, export_dir): """ Save Freeze Model """ tf.saved_model.simple_save( K.get_session(), os.path.join(export_dir, str(int(time.time()))), inputs={'inputs': self.model.input}, outputs={t.name: t for t in self.model.outputs}) def load(self): """Load weights from self.path """ if os.path.isfile(self.path): self.model.load_weights(self.path) print("Model loaded") else: print("No model is found") def predict(self, X): """Return probabilities for each classes Parameters ---------- X : array-like (N, H, W, C) Returns ---------- y : array-like (N, N_classes) Probability array """ return self.model.predict(X) def evaluate(self, X, y): """Return an accuracy Parameters ---------- X : array-like (N, H, W, C) y : array-like (N, N_classes) Returns ---------- acc : float Accuracy """ return self.model.evaluate(X, y)
def main(cvset=0, n_features=5000, batch_size=1000, p_drop=0.5, latent_dim=2, n_epoch=5000, run_iter=0, exp_name='nagent', model_id='nagent_model'): train_dict, val_dict, full_dict, dir_pth = dataIO(cvset=0, n_features=n_features, exp_name=exp_name, train_size=25000) #Architecture parameters ------------------------------ input_dim = train_dict['X'].shape[1] print(input_dim) fc_dim = 50 fileid = model_id + \ '_cv_' + str(cvset) + \ '_ng_' + str(n_features) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') print(fileid) n_agents = 1 #Model definition ----------------------------------------------- M = {} M['in_ae'] = Input(shape=(input_dim, ), name='in_ae') M['mask_ae'] = Input(shape=(input_dim, ), name='mask_ae') for i in range(n_agents): M['dr_ae_' + str(i)] = Dropout(p_drop, name='dr_ae_' + str(i))(M['in_ae']) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc01_ae_' + str(i))(M['dr_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) M['fc04_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc04_ae_' + str(i))(M['fc03_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc05_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization(scale=False, center=False, epsilon=1e-10, momentum=0., name='ld_ae_' + str(i))( M['fc05_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc06_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['fc08_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc08_ae_c' + str(i))( M['fc07_ae_' + str(i)]) M['fc09_ae_' + str(i)] = Dense(fc_dim, activation='elu', name='fc09_ae_' + str(i))(M['fc08_ae_' + str(i)]) M['ou_ae_' + str(i)] = Dense(input_dim, activation='linear', name='ou_ae_' + str(i))(M['fc09_ae_' + str(i)]) AE = Model(inputs=[M['in_ae'], M['mask_ae']], outputs=[M['ou_ae_' + str(i)] for i in range(n_agents)]) def masked_mse(X, Y, mask): loss_val = tf.reduce_mean( tf.multiply(tf.math.squared_difference(X, Y), mask)) def masked_loss(y_true, y_pred): return loss_val return masked_loss #Create loss dictionary loss_dict = { 'ou_ae_' + str(i): masked_mse(M['in_ae'], M['ou_ae_0'], M['mask_ae']) for i in range(n_agents) } #Loss weights dictionary loss_wt_dict = {'ou_ae_' + str(i): 1.0 for i in range(n_agents)} #Add loss definitions to the model AE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Custom logging cb_obj = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') train_input_dict = { 'in_ae': train_dict['X'], 'mask_ae': train_dict['mask'] } train_output_dict = { 'ou_ae_' + str(i): train_dict['X'] for i in range(n_agents) } val_input_dict = {'in_ae': val_dict['X'], 'mask_ae': val_dict['mask']} val_output_dict = { 'ou_ae_' + str(i): val_dict['X'] for i in range(n_agents) } #Model training start_time = timeit.default_timer() AE.fit(train_input_dict, train_output_dict, batch_size=batch_size, initial_epoch=0, epochs=n_epoch, validation_data=(val_input_dict, val_output_dict), verbose=2, callbacks=[cb_obj]) elapsed = timeit.default_timer() - start_time print('-------------------------------') print('Training time:', elapsed) print('-------------------------------') #Save weights AE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') #Generate summaries summary = {} for i in range(n_agents): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) summary['z'] = encoder.predict(full_dict['X']) sio.savemat(dir_pth['result'] + fileid + '-summary.mat', summary) return
cpu_relocation=False, cpu_merge=True) parallel_model.compile(optimizer=Adam(lr=1e-3), loss='mean_squared_error', metrics=[r_squared]) history = parallel_model.fit_generator( generator=training_generator, validation_data=validation_generator, epochs=epochs, use_multiprocessing=False, callbacks=[tensorboard], workers=4) # check weights # https://github.com/keras-team/keras/issues/11313 weights = keras.backend.batch_get_value(model.weights) parallel_weights = keras.backend.batch_get_value(parallel_model.weights) if all([np.all(w == ow) for w, ow in zip(weights, original_weights)]): print('Weights in the template model have not changed') else: print('Weights in the template model have changed') if all([np.all(w == pw) for w, pw in zip(weights, parallel_weights)]): print('Weights in the template and parallel model are equal') else: print('Weights in the template and parallel model are different') # save weights os.makedirs(weights_dir, exist_ok=True) model.save_weights(os.path.join(weights_dir, 'pretrained_weight.h5'))
def train_frcnn(options): if options.parser == 'pascal_voc': from utils import voc_parser as get_data elif options.parser == 'simple': from utils import simple_parser as get_data else: raise ValueError( "Command line option parser must be one of 'pascal_voc' or 'simple'" ) # pass the settings from the command line, and persist them in the config object C = Config() C.use_horizontal_flips = bool(options.horizontal_flips) C.use_vertical_flips = bool(options.vertical_flips) C.rot_90 = bool(options.rot_90) C.model_path = options.output_weight_path.format(options.network) C.num_rois = int(options.num_rois) if options.network == 'resnet50': C.network = 'resnet50' from utils import rpn_res as rpn from utils import classifier_res as classifier_func from utils import get_img_output_length_res as get_img_output_length from utils import nn_base_res as nn_base elif options.network == 'vgg': C.network = 'vgg' from utils import rpn_vgg as rpn from utils import classifier_vgg as classifier_func from utils import get_img_output_length_vgg as get_img_output_length from utils import nn_base_vgg as nn_base else: print('Not a valid model') raise ValueError # check if weight path was passed via command line if options.input_weight_path: C.base_net_weights = options.input_weight_path else: # set the path to weights based on backend and model C.base_net_weights = get_weight_path(options.network) all_imgs, classes_count, class_mapping = get_data(options.path) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) C.class_mapping = class_mapping inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) config_output_filename = options.config_filename with open(config_output_filename, 'wb') as config_f: pickle.dump(C, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(config_output_filename)) # random.shuffle(all_imgs) train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = get_anchor_gt(train_imgs, classes_count, C, get_img_output_length, K.backend(), mode='train') data_gen_val = get_anchor_gt(val_imgs, classes_count, C, get_img_output_length, K.backend(), mode='val') if K.backend() == "theano": input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = rpn(shared_layers, num_anchors) classifier = classifier_func(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) try: print('loading weights from {}'.format(C.base_net_weights)) model_rpn.load_weights(C.base_net_weights + "rpn.h5", by_name=True) model_classifier.load_weights(C.base_net_weights + "classifier.h5", by_name=True) except Exception as e: model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True) print('Exception: {}'.format(e)) optimizer = Adam(lr=1e-5, decay=2e-7) optimizer_classifier = Adam(lr=1e-5, decay=2e-7) model_rpn.compile( optimizer=optimizer, loss=[rpn_loss_cls(num_anchors), rpn_loss_regr(num_anchors)]) model_classifier.compile( optimizer=optimizer_classifier, loss=[class_loss_cls, class_loss_regr(len(classes_count) - 1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = options.epoch_length num_epochs = int(options.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf print('Starting training') for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. ' 'Check RPN settings or keep training.') X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = rpn_to_roi(P_rpn[0], P_rpn[1], C, K.backend(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = calc_iou(R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if C.num_rois > 1: if len(pos_samples) < C.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, C.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(selected_neg_samples) else: sel_samples = random.choice(selected_pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] progbar.update(iter_num + 1, [('rpn_cls', losses[iter_num, 0]), ('rpn_regr', losses[iter_num, 1]), ('detector_cls', losses[iter_num, 2]), ('detector_regr', losses[iter_num, 3])]) iter_num += 1 if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if C.verbose: print( f'Total loss decreased from {best_loss:.3f} to {curr_loss:.3f}, saving weights to ' f'{C.model_path}') best_loss = curr_loss model_classifier.save_weights(C.model_path + "classifier.h5") model_rpn.save_weights(C.model_path + "rpn.h5") break except Exception as e: print('Exception: {}'.format(e)) continue print('Training complete, exiting.')
class textgenrnn: META_TOKEN = '<s>' config = { 'rnn_layers': 2, 'rnn_size': 128, 'rnn_bidirectional': False, 'max_length': 40, 'max_words': 10000, 'dim_embeddings': 100, 'word_level': False, 'single_text': False } default_config = config.copy() def __init__(self, weights_path=None, vocab_path=None, config_path=None, name="textgenrnn_tf"): if weights_path is None: weights_path = resource_filename(__name__, 'textgenrnn_weights.hdf5') if vocab_path is None: vocab_path = resource_filename(__name__, 'textgenrnn_vocab.json') if config_path is not None: with open(config_path, 'r', encoding='utf8', errors='ignore') as json_file: self.config = json.load(json_file) self.config.update({'name': name}) self.default_config.update({'name': name}) with open(vocab_path, 'r', encoding='utf8', errors='ignore') as json_file: self.vocab = json.load(json_file) self.tokenizer = Tokenizer(filters='', lower=False, char_level=True) self.tokenizer.word_index = self.vocab self.num_classes = len(self.vocab) + 1 self.model = textgenrnn_model(self.num_classes, cfg=self.config, weights_path=weights_path) self.indices_char = dict((self.vocab[c], c) for c in self.vocab) def generate(self, n=1, return_as_list=False, prefix=None, temperature=[1.0, 0.5, 0.2, 0.2], max_gen_length=300, interactive=False, top_n=3, progress=True): gen_texts = [] iterable = trange(n) if progress and n > 1 else range(n) for _ in iterable: gen_text, _ = textgenrnn_generate( self.model, self.vocab, self.indices_char, temperature, self.config['max_length'], self.META_TOKEN, self.config['word_level'], self.config.get('single_text', False), max_gen_length, interactive, top_n, prefix) if not return_as_list: print("{}\n".format(gen_text)) gen_texts.append(gen_text) if return_as_list: return gen_texts def generate_samples(self, n=3, temperatures=[0.2, 0.5, 1.0], **kwargs): for temperature in temperatures: print('#' * 20 + '\nTemperature: {}\n'.format(temperature) + '#' * 20) self.generate(n, temperature=temperature, progress=False, **kwargs) def train_on_texts(self, texts, context_labels=None, batch_size=128, num_epochs=50, verbose=1, new_model=False, gen_epochs=1, train_size=1.0, max_gen_length=300, validation=True, dropout=0.0, via_new_model=False, save_epochs=0, multi_gpu=False, **kwargs): if new_model and not via_new_model: self.train_new_model(texts, context_labels=context_labels, num_epochs=num_epochs, gen_epochs=gen_epochs, train_size=train_size, batch_size=batch_size, dropout=dropout, validation=validation, save_epochs=save_epochs, multi_gpu=multi_gpu, **kwargs) return if context_labels: context_labels = LabelBinarizer().fit_transform(context_labels) if 'prop_keep' in kwargs: train_size = prop_keep if self.config['word_level']: texts = [text_to_word_sequence(text, filters='') for text in texts] # calculate all combinations of text indices + token indices indices_list = [ np.meshgrid(np.array(i), np.arange(len(text) + 1)) for i, text in enumerate(texts) ] indices_list = np.block(indices_list) # If a single text, there will be 2 extra indices, so remove them # Also remove first sequences which use padding if self.config['single_text']: indices_list = indices_list[self.config['max_length']:-2, :] indices_mask = np.random.rand(indices_list.shape[0]) < train_size if multi_gpu: num_gpus = len(K.tensorflow_backend._get_available_gpus()) batch_size = batch_size * num_gpus gen_val = None val_steps = None if train_size < 1.0 and validation: indices_list_val = indices_list[~indices_mask, :] gen_val = generate_sequences_from_texts(texts, indices_list_val, self, context_labels, batch_size) val_steps = max( int(np.floor(indices_list_val.shape[0] / batch_size)), 1) indices_list = indices_list[indices_mask, :] num_tokens = indices_list.shape[0] assert num_tokens >= batch_size, "Fewer tokens than batch_size." level = 'word' if self.config['word_level'] else 'character' print("Training on {:,} {} sequences.".format(num_tokens, level)) steps_per_epoch = max(int(np.floor(num_tokens / batch_size)), 1) gen = generate_sequences_from_texts(texts, indices_list, self, context_labels, batch_size) base_lr = 4e-3 # scheduler function must be defined inline. def lr_linear_decay(epoch): return (base_lr * (1 - (epoch / num_epochs))) if context_labels is not None: if new_model: weights_path = None else: weights_path = "{}_weights.hdf5".format(self.config['name']) self.save(weights_path) self.model = textgenrnn_model(self.num_classes, dropout=dropout, cfg=self.config, context_size=context_labels.shape[1], weights_path=weights_path) model_t = self.model if multi_gpu: # Do not locate model/merge on CPU since sample sizes are small. parallel_model = multi_gpu_model(self.model, gpus=num_gpus, cpu_merge=False) parallel_model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=4e-3, rho=0.99)) model_t = parallel_model print("Training on {} GPUs.".format(num_gpus)) model_t.fit_generator(gen, steps_per_epoch=steps_per_epoch, epochs=num_epochs, callbacks=[ LearningRateScheduler(lr_linear_decay), generate_after_epoch(self, gen_epochs, max_gen_length), save_model_weights(self, num_epochs, save_epochs) ], verbose=verbose, max_queue_size=10, validation_data=gen_val, validation_steps=val_steps) # Keep the text-only version of the model if using context labels if context_labels is not None: self.model = Model(inputs=self.model.input[0], outputs=self.model.output[1]) def train_new_model(self, texts, context_labels=None, num_epochs=50, gen_epochs=1, batch_size=128, dropout=0.0, train_size=1.0, validation=True, save_epochs=0, multi_gpu=False, **kwargs): self.config = self.default_config.copy() self.config.update(**kwargs) print("Training new model w/ {}-layer, {}-cell {}LSTMs".format( self.config['rnn_layers'], self.config['rnn_size'], 'Bidirectional ' if self.config['rnn_bidirectional'] else '')) # If training word level, must add spaces around each punctuation. # https://stackoverflow.com/a/3645946/9314418 if self.config['word_level']: punct = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\\n\\t\'‘’“”’–—' for i in range(len(texts)): texts[i] = re.sub('([{}])'.format(punct), r' \1 ', texts[i]) texts[i] = re.sub(' {2,}', ' ', texts[i]) # Create text vocabulary for new texts # if word-level, lowercase; if char-level, uppercase self.tokenizer = Tokenizer(filters='', lower=self.config['word_level'], char_level=(not self.config['word_level'])) self.tokenizer.fit_on_texts(texts) # Limit vocab to max_words max_words = self.config['max_words'] self.tokenizer.word_index = { k: v for (k, v) in self.tokenizer.word_index.items() if v <= max_words } if not self.config.get('single_text', False): self.tokenizer.word_index[self.META_TOKEN] = len( self.tokenizer.word_index) + 1 self.vocab = self.tokenizer.word_index self.num_classes = len(self.vocab) + 1 self.indices_char = dict((self.vocab[c], c) for c in self.vocab) # Create a new, blank model w/ given params self.model = textgenrnn_model(self.num_classes, dropout=dropout, cfg=self.config) # Save the files needed to recreate the model with open('{}_vocab.json'.format(self.config['name']), 'w', encoding='utf8') as outfile: json.dump(self.tokenizer.word_index, outfile, ensure_ascii=False) with open('{}_config.json'.format(self.config['name']), 'w', encoding='utf8') as outfile: json.dump(self.config, outfile, ensure_ascii=False) self.train_on_texts(texts, new_model=True, via_new_model=True, context_labels=context_labels, num_epochs=num_epochs, gen_epochs=gen_epochs, train_size=train_size, batch_size=batch_size, dropout=dropout, validation=validation, save_epochs=save_epochs, multi_gpu=multi_gpu, **kwargs) def save(self, weights_path="textgenrnn_weights_saved.hdf5"): self.model.save_weights(weights_path) def load(self, weights_path): self.model = textgenrnn_model(self.num_classes, cfg=self.config, weights_path=weights_path) def reset(self): self.config = self.default_config.copy() self.__init__(name=self.config['name']) def train_from_file(self, file_path, header=True, delim="\n", new_model=False, context=None, is_csv=False, **kwargs): context_labels = None if context: texts, context_labels = textgenrnn_texts_from_file_context( file_path) else: texts = textgenrnn_texts_from_file(file_path, header, delim, is_csv) print("{:,} texts collected.".format(len(texts))) if new_model: self.train_new_model(texts, context_labels=context_labels, **kwargs) else: self.train_on_texts(texts, context_labels=context_labels, **kwargs) def train_from_largetext_file(self, file_path, new_model=True, **kwargs): with open(file_path, 'r', encoding='utf8', errors='ignore') as f: texts = [f.read()] if new_model: self.train_new_model(texts, single_text=True, **kwargs) else: self.train_on_texts(texts, single_text=True, **kwargs) def generate_to_file(self, destination_path, **kwargs): texts = self.generate(return_as_list=True, **kwargs) with open(destination_path, 'w') as f: for text in texts: f.write("{}\n".format(text)) def encode_text_vectors(self, texts, pca_dims=50, tsne_dims=None, tsne_seed=None, return_pca=False, return_tsne=False): # if a single text, force it into a list: if isinstance(texts, str): texts = [texts] vector_output = Model(inputs=self.model.input, outputs=self.model.get_layer('attention').output) encoded_vectors = [] maxlen = self.config['max_length'] for text in texts: if self.config['word_level']: text = text_to_word_sequence(text, filters='') text_aug = [self.META_TOKEN] + list(text[0:maxlen]) encoded_text = textgenrnn_encode_sequence(text_aug, self.vocab, maxlen) encoded_vector = vector_output.predict(encoded_text) encoded_vectors.append(encoded_vector) encoded_vectors = np.squeeze(np.array(encoded_vectors), axis=1) if pca_dims is not None: assert len(texts) > 1, "Must use more than 1 text for PCA" pca = PCA(pca_dims) encoded_vectors = pca.fit_transform(encoded_vectors) if tsne_dims is not None: tsne = TSNE(tsne_dims, random_state=tsne_seed) encoded_vectors = tsne.fit_transform(encoded_vectors) return_objects = encoded_vectors if return_pca or return_tsne: return_objects = [return_objects] if return_pca: return_objects.append(pca) if return_tsne: return_objects.append(tsne) return return_objects def similarity(self, text, texts, use_pca=True): text_encoded = self.encode_text_vectors(text, pca_dims=None) if use_pca: texts_encoded, pca = self.encode_text_vectors(texts, return_pca=True) text_encoded = pca.transform(text_encoded) else: texts_encoded = self.encode_text_vectors(texts, pca_dims=None) cos_similairity = cosine_similarity(text_encoded, texts_encoded)[0] text_sim_pairs = list(zip(texts, cos_similairity)) text_sim_pairs = sorted(text_sim_pairs, key=lambda x: -x[1]) return text_sim_pairs
print("Error trying to load checkpoint.") print(error) x_data={'encoder_input':encoder_input_data, 'decoder_input':decoder_input_data} y_data={'decoder_output':decoder_output_data} model_train.fit(x=x_data,y=y_data,batch_size=512,validation_split=0.005,callbacks=callbacks) modelname1='MachineTranslationTrain' modelname2='MachineTranslationEncoder' modelname3='MachineTranslationDecoder' model_train.save('{}.keras'.format(modelname1)) model_encoder.save('{}.keras'.format(modelname2)) model_decoder.save('{}.keras'.format(modelname3)) with open('model_encoder.json', 'w', encoding='utf8') as f: f.write(model_encoder.to_json()) model_encoder.save_weights('model_encoder_weights.h5') with open('model_decoder.json', 'w', encoding='utf8') as f: f.write(model_decoder.to_json()) model_decoder.save_weights('model_decoder_weights.h5') with open('model_train.json', 'w', encoding='utf8') as f: f.write(model_train.to_json()) model_train.save_weights('model_train_weights.h5') #Translate Texts def translate(input_text,true_output_text=None): input_tokens=tokenizer_src.text_to_tokens(text=input_text,reverse=True,padding=True) initial_state=model_encoder.predict(input_tokens) max_tokens=tokenizer_dest.max_tokens shape=(1,max_tokens) decoder_input_data=np.zeros(shape=shape,dtype=np.int) token_int=token_start
def tune_model(): # Build the Inception V3 network. base_model = inception_v3.InceptionV3(include_top=False, weights='imagenet', pooling='avg') print('Model loaded.') # build a classifier model to put on top of the convolutional model top_input = Input(shape=base_model.output_shape[1:]) top_output = Dense(5, activation='softmax')(top_input) top_model = Model(top_input, top_output) # Note that it is necessary to start with a fully-trained classifier, # including the top classifier, in order to successfully do fine-tuning. top_model.load_weights(top_model_weights_path) # add the model on top of the convolutional base model = Model(inputs=base_model.inputs, outputs=top_model(base_model.outputs)) # Set all layers up to 'mixed8' to non-trainable (weights will not be updated) last_train_layer = model.get_layer(name='mixed8') for layer in model.layers[:model.layers.index(last_train_layer)]: layer.trainable = False # Compile the model with a SGD/momentum optimizer and a very slow learning rate. model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) # Prepare data augmentation configuration train_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') loss = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size) print('Model validation performance before fine-tuning:', loss) csv_logger = CSVLogger(output_dir + 'model_tuning.csv') # fine-tune the model model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=tune_epochs, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size, workers=4, callbacks=[csv_logger]) model.save_weights(tuned_weights_path)
'y_26', trainable=True, restore_output_weights=restore_output_weights)( x, **kwargs) x = Upsample(128, trainable=True)([x, x_52], **kwargs) x, fmap_52 = YOLOBlock(128, 3 * (5 + n_cls), 'y_52', trainable=True, restore_output_weights=restore_output_weights)( x, **kwargs) return fmap_52, fmap_26, fmap_13 return call if __name__ == '__main__': inputs = Input(shape=(416, 416, 3)) # model = yolo_v3(inputs) yolo_v3_net = YOLOv3Net(n_cls=80, restore_weights=True, trainable_backbone=True, use_spp=True, restore_output_weights=True) model = Model(inputs, outputs=yolo_v3_net(inputs)) model.save_weights('coco_init_weights_spp.h5') print( f'Restored {_weight_loader.cnt} of {len(_weight_loader.weights)} weights.' )
def main(batch_size=100, n_paired_per_batch=100, cvset=0, p_dropT=0.5, p_dropE=0.1, stdE=0.05, fc_dimT=[50,50,50,50],fc_dimE=[60,60,60,60],latent_dim=3, recon_strT=1.0, recon_strE=0.1, cpl_str=10.0, n_epoch=2000, steps_per_epoch = 500, run_iter=0, model_id='crossval_noadaptloss',exp_name='patchseq_v2_noadapt'): train_dat, val_dat, train_ind_T, train_ind_E, val_ind, dir_pth = dataset_50fold(exp_name=exp_name,cvset=cvset) train_generator = DatagenTE(dataset=train_dat, batch_size=batch_size, n_paired_per_batch=n_paired_per_batch, steps_per_epoch=steps_per_epoch) chkpt_save_period = 1e7 #Architecture parameters ------------------------------ input_dim = [train_dat['T'].shape[1],train_dat['E'].shape[1]] #'_fcT_' + '-'.join(map(str, fc_dimT)) + \ #'_fcE_' + '-'.join(map(str, fc_dimE)) + \ fileid = model_id + \ '_rT_' + str(recon_strT) + \ '_rE_' + str(recon_strE) + \ '_cs_' + str(cpl_str) + \ '_pdT_' + str(p_dropT) + \ '_pdE_' + str(p_dropE) + \ '_sdE_' + str(stdE) + \ '_bs_' + str(batch_size) + \ '_np_' + str(n_paired_per_batch) + \ '_se_' + str(steps_per_epoch) +\ '_ne_' + str(n_epoch) + \ '_cv_' + str(cvset) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') print(fileid) out_actfcn = ['elu','linear'] def add_gauss_noise(x): '''Injects additive gaussian noise independently into each element of input x''' x_noisy = x + tf.random.normal(shape=tf.shape(x), mean=0., stddev=stdE, dtype = tf.float32) return tf.keras.backend.in_train_phase(x_noisy, x) #Model inputs ----------------------------------------- M = {} M['in_ae_0'] = Input(shape=(input_dim[0],), name='in_ae_0') M['in_ae_1'] = Input(shape=(input_dim[1],), name='in_ae_1') M['ispaired_ae_0'] = Input(shape=(1,), name='ispaired_ae_0') M['ispaired_ae_1'] = Input(shape=(1,), name='ispaired_ae_1') #Transcriptomics arm--------------------------------------------------------------------------------- M['dr_ae_0'] = Dropout(p_dropT, name='dr_ae_0')(M['in_ae_0']) X = 'dr_ae_0' for j, units in enumerate(fc_dimT): Y = 'fc'+ format(j,'02d') +'_ae_0' M[Y] = Dense(units, activation='elu', name=Y)(M[X]) X = Y M['ldx_ae_0'] = Dense(latent_dim, activation='linear',name='ldx_ae_0')(M[X]) M['ld_ae_0'] = BatchNormalization(scale = False, center = False ,epsilon = 1e-10, momentum = 0.99, name='ld_ae_0')(M['ldx_ae_0']) X = 'ld_ae_0' for j, units in enumerate(reversed(fc_dimT)): Y = 'fc'+ format(j+len(fc_dimT),'02d') +'_ae_0' M[Y] = Dense(units, activation='elu', name=Y)(M[X]) X = Y M['ou_ae_0'] = Dense(input_dim[0], activation=out_actfcn[0], name='ou_ae_0')(M[X]) #Electrophysiology arm-------------------------------------------------------------------------------- M['no_ae_1'] = Lambda(add_gauss_noise,name='no_ae_1')(M['in_ae_1']) M['dr_ae_1'] = Dropout(p_dropE, name='dr_ae_1')(M['no_ae_1']) X = 'dr_ae_1' for j, units in enumerate(fc_dimE): Y = 'fc'+ format(j,'02d') +'_ae_1' M[Y] = Dense(units, activation='elu', name=Y)(M[X]) X = Y M['ldx_ae_1'] = Dense(latent_dim, activation='linear',name='ldx_ae_1')(M[X]) M['ld_ae_1'] = BatchNormalization(scale = False, center = False ,epsilon = 1e-10, momentum = 0.99, name='ld_ae_1')(M['ldx_ae_1']) X = 'ld_ae_1' for j, units in enumerate(reversed(fc_dimE)): Y = 'fc'+ format(j+len(fc_dimE),'02d') +'_ae_1' M[Y] = Dense(units, activation='elu', name=Y)(M[X]) X = Y M['ou_ae_1'] = Dense(input_dim[1], activation=out_actfcn[1], name='ou_ae_1')(M[X]) cplAE = Model(inputs=[M['in_ae_0'], M['in_ae_1'], M['ispaired_ae_0'], M['ispaired_ae_1']], outputs=[M['ou_ae_0'], M['ou_ae_1'],M['ld_ae_0'], M['ld_ae_1']]) def coupling_loss(zi, pairedi, zj, pairedj): '''Minimum singular value based loss. \n SVD is calculated over all datapoints \n MSE is calculated over only `paired` datapoints''' batch_size = tf.shape(zi)[0] paired_i = tf.reshape(pairedi, [tf.shape(pairedi)[0],]) paired_j = tf.reshape(pairedj, [tf.shape(pairedj)[0],]) zi_paired = tf.boolean_mask(zi, tf.equal(paired_i, 1.0)) zj_paired = tf.boolean_mask(zj, tf.equal(paired_j, 1.0)) vars_j_ = tf.square(tf.linalg.svd(zj - tf.reduce_mean(zj, axis=0), compute_uv=False))/tf.cast(batch_size - 1, tf.float32) vars_j = tf.where(tf.math.is_nan(vars_j_), tf.zeros_like(vars_j_) + tf.cast(1e-1,dtype=tf.float32), vars_j_) L_ij = tf.compat.v1.losses.mean_squared_error(zi_paired, zj_paired)/tf.maximum(tf.reduce_min(vars_j, axis=None),tf.cast(1e-2,dtype=tf.float32)) def loss(y_true, y_pred): #Adaptive version:#tf.multiply(tf.stop_gradient(L_ij), L_ij) return L_ij return loss #Create loss dictionary loss_dict = {'ou_ae_0': mse, 'ou_ae_1': mse, 'ld_ae_0': coupling_loss(zi=M['ld_ae_0'], pairedi=M['ispaired_ae_0'],zj=M['ld_ae_1'], pairedj=M['ispaired_ae_1']), 'ld_ae_1': coupling_loss(zi=M['ld_ae_1'], pairedi=M['ispaired_ae_1'],zj=M['ld_ae_0'], pairedj=M['ispaired_ae_0'])} #Loss weights dictionary loss_wt_dict = {'ou_ae_0': recon_strT, 'ou_ae_1': recon_strE, 'ld_ae_0': cpl_str, 'ld_ae_1': cpl_str} #Add loss definitions to the model cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Checkpoint function definitions checkpoint_cb = ModelCheckpoint(filepath=(dir_pth['checkpoint']+fileid + '-checkpoint-' + '{epoch:04d}' + '.h5'), verbose=1, save_best_only=False, save_weights_only=True, mode='auto', period=chkpt_save_period) val_in = {'in_ae_0': val_dat['T'], 'in_ae_1': val_dat['E'], 'ispaired_ae_0': val_dat['T_ispaired'], 'ispaired_ae_1': val_dat['E_ispaired']} val_out = {'ou_ae_0': val_dat['T'], 'ou_ae_1': val_dat['E'], 'ld_ae_0': np.zeros((val_dat['T'].shape[0], latent_dim)), 'ld_ae_1': np.zeros((val_dat['E'].shape[0], latent_dim))} #Custom callback object log_cb = CSVLogger(filename=dir_pth['logs']+fileid+'.csv') last_checkpoint_epoch = 0 start_time = timeit.default_timer() cplAE.fit_generator(train_generator, validation_data=(val_in,val_out), epochs=n_epoch, max_queue_size=100, use_multiprocessing=False, workers=1, initial_epoch=last_checkpoint_epoch, verbose=2, callbacks=[checkpoint_cb,log_cb]) elapsed = timeit.default_timer() - start_time print('-------------------------------') print('Training time:',elapsed) print('-------------------------------') #Saving weights cplAE.save_weights(dir_pth['result']+fileid+'-modelweights'+'.h5') matsummary = {} matsummary['cvset'] = cvset matsummary['val_ind'] = val_ind matsummary['train_ind_T'] = train_ind_T matsummary['train_ind_E'] = train_ind_E #Trained model predictions i = 0 encoder = Model(inputs=M['in_ae_'+str(i)], outputs=M['ld_ae_'+str(i)]) matsummary['z_val_'+str(i)] = encoder.predict({'in_ae_'+str(i): val_dat['T']}) matsummary['z_train_'+str(i)] = encoder.predict({'in_ae_'+str(i): train_dat['T']}) i = 1 encoder = Model(inputs=M['in_ae_'+str(i)], outputs=M['ld_ae_'+str(i)]) matsummary['z_val_'+str(i)] = encoder.predict({'in_ae_'+str(i): val_dat['E']}) matsummary['z_train_'+str(i)] = encoder.predict({'in_ae_'+str(i): train_dat['E']}) sio.savemat(dir_pth['result']+fileid+'-summary', matsummary) return
y_data = {'decoder_output': decoder_output_data} validation_split = 10000 / len(encoder_input_data) print (validation_split) model_train.fit(x=x_data, y=y_data, batch_size=512, epochs=10, validation_split=validation_split, ) mark_start = 'starttt' mark_end = 'enddd' token_start = tokenizer_vitn.word_index[mark_start.strip()] token_end = tokenizer_vitn.word_index[mark_end.strip()] model_train.save_weights('nmt_train_model.h5') model_train.save('nmt_train_model.h5') def translate(input_text,true_output_text = None): input_tokens = tokenizer_eng.text_to_tokens(text=input_text,reverse=True,padding=True) initial_state = model_encoder.predict(input_tokens) max_tokens = tokenizer_vitn.max_tokens shape = (1, max_tokens) decoder_input_data = np.zeros(shape=shape, dtype=np.int) token_int = token_start output_text = '' count_tokens = 0 while token_int != token_end and count_tokens < max_tokens: decoder_input_data[0, count_tokens] = token_int
class JointEmbeddingModel: def __init__(self, config): self.data_dir = config.data_dir self.model_name = config.model_name self.methname_len = config.methname_len # the max length of method name self.apiseq_len = config.apiseq_len self.tokens_len = config.tokens_len self.desc_len = config.desc_len self.vocab_size = config.n_words # the size of vocab self.embed_dims = config.embed_dims self.lstm_dims = config.lstm_dims self.hidden_dims = config.hidden_dims self.margin = 0.05 self.init_embed_weights_methodname = config.init_embed_weights_methodname self.init_embed_weights_tokens = config.init_embed_weights_tokens self.init_embed_weights_desc = config.init_embed_weights_desc self.methodname = Input(shape=(self.methname_len, ), dtype='int32', name='methodname') self.apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq') self.tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens') self.desc_good = Input(shape=(self.desc_len, ), dtype='int32', name='desc_good') self.desc_bad = Input(shape=(self.desc_len, ), dtype='int32', name='desc_bad') # create path to store model Info if not os.path.exists(self.data_dir + 'model/' + self.model_name): os.makedirs(self.data_dir + 'model/' + self.model_name) def build(self): # 1 -- CodeNN methodname = Input(shape=(self.methname_len, ), dtype='int32', name='methodname') apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq') tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens') # methodname # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_methodname ) if self.init_embed_weights_methodname is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_methodname') methodname_embedding = embedding(methodname) # dropout dropout = Dropout(0.25, name='dropout_methodname_embed') methodname_dropout = dropout(methodname_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_methodname_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_methodname_bw') methodname_fw = fw_rnn(methodname_dropout) methodname_bw = bw_rnn(methodname_dropout) dropout = Dropout(0.25, name='dropout_methodname_rnn') methodname_fw_dropout = dropout(methodname_fw) methodname_bw_dropout = dropout(methodname_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') methodname_pool = Concatenate(name='concat_methodname_lstm')( [maxpool(methodname_fw_dropout), maxpool(methodname_bw_dropout)]) activation = Activation('tanh', name='active_methodname') methodname_repr = activation(methodname_pool) # apiseq # embedding layer embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, mask_zero=False, name='embedding_apiseq') apiseq_embedding = embedding(apiseq) # dropout dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, go_backwards=True, name='lstm_apiseq_bw') apiseq_fw = fw_rnn(apiseq_dropout) apiseq_bw = bw_rnn(apiseq_dropout) dropout = Dropout(0.25, name='dropout_apiseq_rnn') apiseq_fw_dropout = dropout(apiseq_fw) apiseq_bw_dropout = dropout(apiseq_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = Concatenate(name='concat_apiseq_lstm')( [maxpool(apiseq_fw_dropout), maxpool(apiseq_bw_dropout)]) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_tokens ) if self.init_embed_weights_tokens is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_tokens') tokens_embedding = embedding(tokens) # dropout dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_tokens_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_tokens_bw') tokens_fw = fw_rnn(tokens_dropout) tokens_bw = bw_rnn(tokens_dropout) dropout = Dropout(0.25, name='dropout_tokens_rnn') tokens_fw_dropout = dropout(tokens_fw) tokens_bw_dropout = dropout(tokens_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = Concatenate(name='concat_tokens_lstm')( [maxpool(tokens_fw_dropout), maxpool(tokens_bw_dropout)]) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) # fusion methodname, apiseq, tokens merge_methname_api = Concatenate(name='merge_methname_api')( [methodname_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')( [merge_methname_api, tokens_repr]) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[methodname, apiseq, tokens], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() # 2 -- description desc = Input(shape=(self.desc_len, ), dtype='int32', name='desc') # desc # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_desc ) if self.init_embed_weights_desc is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_desc') desc_embedding = embedding(desc) # dropout dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_desc_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_desc_bw') desc_fw = fw_rnn(desc_dropout) desc_bw = bw_rnn(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_rnn') desc_fw_dropout = dropout(desc_fw) desc_bw_dropout = dropout(desc_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = Concatenate(name='concat_desc_lstm')( [maxpool(desc_fw_dropout), maxpool(desc_bw_dropout)]) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([methodname, apiseq, tokens]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methodname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: K.maximum(1e-6, self.margin - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) self.training_model = Model(inputs=[ self.methodname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') self.training_model.summary() def compile(self, optimizer, **kwargs): self.code_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.desc_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.training_model.compile( loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs) self.sim_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs) def fit(self, x, **kwargs): y = np.zeros(shape=x[0].shape[:1], dtype=np.float32) return self.training_model.fit(x, y, **kwargs) def repr_code(self, x, **kwargs): return self.code_repr_model.predict(x, **kwargs) def repr_desc(self, x, **kwargs): return self.desc_repr_model.predict(x, **kwargs) def predict(self, x, **kwargs): return self.sim_model.predict(x, **kwargs) def save(self, code_model_file, desc_model_file, **kwargs): self.code_repr_model.save_weights(code_model_file, **kwargs) self.desc_repr_model.save_weights(desc_model_file, **kwargs) def load(self, code_model_file, desc_model_file, **kwargs): self.code_repr_model.load_weights(code_model_file, **kwargs) self.desc_repr_model.load_weights(desc_model_file, **kwargs)
class Neural: def __init__(self, size_window_left, size_window_right, number_samples, threshold, number_epochs, learning_patterns_per_id, optimizer_function, loss_function, dense_layers, output_evolution_error_figures): self.size_windows_left = size_window_left self.size_window_right = size_window_right self.number_samples = number_samples self.threshold = threshold self.number_epochs = number_epochs self.learning_patterns_per_id = learning_patterns_per_id self.optimizer_function = optimizer_function self.loss_function = loss_function self.output_evolution_error_figures = output_evolution_error_figures self.neural_network = None self.dense_layers = dense_layers def create_neural_network(self): input_size = Input(shape=(self.size_windows_left + self.size_window_right + 1,)) # Please do not change this layer self.neural_network = Dense(20, )(input_size) self.neural_network = Dropout(0.2)(self.neural_network) for i in range(self.dense_layers - 1): self.neural_network = Dense(20)(self.neural_network) self.neural_network = Dropout(0.5)(self.neural_network) # Please do not change this layer self.neural_network = Dense(1, activation='sigmoid')(self.neural_network) self.neural_network = Model(input_size, self.neural_network) self.neural_network.summary() self.neural_network.compile(optimizer=self.optimizer_function, loss=self.loss_function, metrics=['mean_squared_error']) def fit(self, x, y, x_validation, y_validation): first_test_training = self.neural_network.evaluate(x, y) first_test_validation = self.neural_network.evaluate(x_validation, y_validation) history = self.neural_network.fit(x, y, epochs=self.number_epochs, validation_data=(x_validation, y_validation), ) self.plotter_error_evaluate(history.history['mean_squared_error'], history.history['val_mean_squared_error'], first_test_training, first_test_validation) def plotter_error_evaluate(self, mean_square_error_training, mean_square_error_evaluate, first_error_training, first_error_evaluate): mean_square_error_training.insert(0, first_error_training[1]) mean_square_error_evaluate.insert(0, first_error_evaluate[1]) matplotlib.pyplot.plot(mean_square_error_training, 'b', marker='^', label="Treinamento") matplotlib.pyplot.plot(mean_square_error_evaluate, 'g', marker='o', label="Validação") matplotlib.pyplot.legend(loc="upper right") matplotlib.pyplot.xlabel('Quantidade de épocas') matplotlib.pyplot.ylabel('Erro Médio') matplotlib.pyplot.savefig( self.output_evolution_error_figures + "fig_Mean_square_error_" + str(datetime.datetime.now()) + ".pdf") def predict_values(self, x): return self.neural_network.predict(x) def save_models(self, model_architecture_file, model_weights_file): model_json = self.neural_network.to_json() with open(model_architecture_file, "w") as json_file: json_file.write(model_json) self.neural_network.save_weights(model_weights_file) print("Saved model {} {}".format(model_architecture_file, model_weights_file)) def load_models(self, model_architecture_file, model_weights_file): json_file = open(model_architecture_file, 'r') loaded_model_json = json_file.read() json_file.close() self.neural_network = model_from_json(loaded_model_json) self.neural_network.load_weights(model_weights_file) print("Loaded model {} {}".format(model_architecture_file, model_weights_file)) @staticmethod def get_samples_vectorized(sample): sample_vectorized = [] for i in range(len(sample)): sample_vectorized.append(float(sample[i][2])) return sample_vectorized, sample[5][2] def predict(self, x): x_axis = [] y_axis = [] results_predicted = [] for i in range(len(x)): x_temp, y_temp = self.get_samples_vectorized(x[i]) x_axis.append(x_temp) y_axis.append(y_temp) predicted = self.neural_network.predict(x_axis) for i in range(len(predicted)): if predicted[i] > self.threshold or y_axis[i] > 0.8: results_predicted.append(x[i][5]) return results_predicted
class A2C(Agent): """Advantage Actor-Critic (A2C) A2C is a synchronous version of A3C which gives equal or better performance. For more information on A2C refer to the OpenAI blog post: https://blog.openai.com/baselines-acktr-a2c/. The A3C algorithm is described in "Asynchronous Methods for Deep Reinforcement Learning" (Mnih et al., 2016) Since this algorithm is on-policy, it can and should be trained with multiple simultaneous environment instances. The parallelism decorrelates the agents' data into a more stationary process which aids learning. """ def __init__(self, model, actions, optimizer=None, policy=None, test_policy=None, gamma=0.99, instances=8, nsteps=1, value_loss=0.5, entropy_loss=0.01): """ TODO: Describe parameters """ self.actions = actions self.optimizer = Adam(lr=3e-3) if optimizer is None else optimizer self.memory = memory.OnPolicy(steps=nsteps, instances=instances) if policy is None: # Create one policy per instance, with varying exploration parameters self.policy = [Greedy()] + [ GaussianEpsGreedy(eps, 0.1) for eps in np.arange(0, 1, 1 / (instances - 1)) ] else: self.policy = policy self.test_policy = Greedy() if test_policy is None else test_policy self.gamma = gamma self.instances = instances self.nsteps = nsteps self.value_loss = value_loss self.entropy_loss = entropy_loss self.training = True # Create output model layers based on number of actions raw_output = model.layers[-1].output actor = Dense(actions, activation='softmax')( raw_output) # Actor (Policy Network) critic = Dense(1, activation='linear')( raw_output) # Critic (Value Network) output_layer = Concatenate()([actor, critic]) self.model = Model(inputs=model.input, outputs=output_layer) def a2c_loss(targets_actions, y_pred): # Unpack input targets, actions = targets_actions[:, 0], targets_actions[:, 1:] # Unpack probs, values = y_pred[:, :-1], y_pred[:, -1] # Compute advantages and logprobabilities adv = targets - values logprob = tf.math.log( tf.reduce_sum(probs * actions, axis=1, keepdims=False) + 1e-10) # Compute composite loss loss_policy = -adv * logprob loss_value = self.value_loss * tf.square(adv) entropy = self.entropy_loss * tf.reduce_sum( probs * tf.math.log(probs + 1e-10), axis=1, keepdims=False) return tf.reduce_mean(loss_policy + loss_value + entropy) self.model.compile(optimizer=self.optimizer, loss=a2c_loss) def save(self, filename, overwrite=False): """Saves the model parameters to the specified file.""" self.model.save_weights(filename, overwrite=overwrite) def act(self, state, instance=0): """Returns the action to be taken given a state.""" qvals = self.model.predict(np.array([state]))[0][:-1] if self.training: return self.policy[instance].act(qvals) if isinstance( self.policy, list) else self.policy.act(qvals) else: return self.test_policy[instance].act(qvals) if isinstance( self.test_policy, list) else self.test_policy.act(qvals) def push(self, transition, instance=0): """Stores the transition in memory.""" self.memory.put(transition, instance) def train(self, step): """Trains the agent for one step.""" if len(self.memory) < self.instances: return state_batch, action_batch, reward_batches, end_state_batch, not_done_mask = self.memory.get( ) # Compute the value of the last next states target_qvals = np.zeros(self.instances) non_final_last_next_states = [ es for es in end_state_batch if es is not None ] if len(non_final_last_next_states) > 0: non_final_mask = list(map(lambda s: s is not None, end_state_batch)) target_qvals[non_final_mask] = self.model.predict_on_batch( np.array(non_final_last_next_states))[:, -1].squeeze() # Compute n-step discounted return # If episode ended within any sampled nstep trace - zero out remaining rewards for n in reversed(range(self.nsteps)): rewards = np.array([b[n] for b in reward_batches]) target_qvals *= np.array([t[n] for t in not_done_mask]) target_qvals = rewards + (self.gamma * target_qvals) # Prepare loss data: target Q-values and actions taken (as a mask) ran = np.arange(self.instances) targets_actions = np.zeros((self.instances, self.actions + 1)) targets_actions[ran, 0] = target_qvals targets_actions[ran, np.array(action_batch) + 1] = 1 self.model.train_on_batch(np.array(state_batch), targets_actions)
class GAN(): def __init__(self, model_yaml, train_yaml): """ Args: model_yaml: dictionnary with the model parameters train_yaml: dictionnary the tran parameters """ self.sigma_val = 0 self.model_yaml = model_yaml self.img_rows = 28 self.img_cols = 28 self.channels = 1 self.img_shape = (self.img_rows, self.img_cols, self.channels) if "dict_band_x" not in train_yaml: self.dict_band_X = None self.dict_band_label = None self.dict_rescale_type = None else: self.dict_band_X = train_yaml["dict_band_x"] self.dict_band_label = train_yaml["dict_band_label"] self.dict_rescale_type = train_yaml["dict_rescale_type"] self.s1bands = train_yaml["s1bands"] self.s2bands = train_yaml["s2bands"] # self.latent_dim = 100 # PATH self.model_name = model_yaml["model_name"] self.model_dir = train_yaml["training_dir"] + self.model_name + "/" self.this_training_dir = self.model_dir + "training_{}/".format( train_yaml["training_number"]) self.saving_image_path = self.this_training_dir + "saved_training_images/" self.saving_logs_path = self.this_training_dir + "logs/" self.checkpoint_dir = self.this_training_dir + "checkpoints/" self.previous_checkpoint = train_yaml["load_model"] # TRAIN PARAMETER self.normalization = train_yaml["normalization"] self.epoch = train_yaml["epoch"] self.batch_size = train_yaml["batch_size"] # self.sess = sess self.learning_rate = train_yaml["lr"] self.fact_g_lr = train_yaml["fact_g_lr"] self.beta1 = train_yaml["beta1"] self.val_directory = train_yaml["val_directory"] self.fact_s2 = train_yaml["s2_scale"] self.fact_s1 = train_yaml["s1_scale"] self.data_X, self.data_y, self.scale_dict_train = load_data( train_yaml["train_directory"], x_shape=model_yaml["input_shape"], label_shape=model_yaml["dim_gt_image"], normalization=self.normalization, dict_band_X=self.dict_band_X, dict_band_label=self.dict_band_label, dict_rescale_type=self.dict_rescale_type, fact_s2=self.fact_s2, fact_s1=self.fact_s1, s2_bands=self.s2bands, s1_bands=self.s1bands, lim=train_yaml["lim_train_tile"]) self.val_X, self.val_Y, scale_dict_val = load_data( self.val_directory, x_shape=model_yaml["input_shape"], label_shape=model_yaml["dim_gt_image"], normalization=self.normalization, dict_band_X=self.dict_band_X, dict_band_label=self.dict_band_label, dict_rescale_type=self.dict_rescale_type, dict_scale=self.scale_dict_train, fact_s2=self.fact_s2, fact_s1=self.fact_s1, s2_bands=self.s2bands, s1_bands=self.s1bands, lim=train_yaml["lim_val_tile"]) print("Loading the data done dataX {} dataY {}".format( self.data_X.shape, self.data_y.shape)) self.gpu = train_yaml["n_gpu"] self.num_batches = self.data_X.shape[0] // self.batch_size self.model_yaml = model_yaml self.im_saving_step = train_yaml["im_saving_step"] self.w_saving_step = train_yaml["weights_saving_step"] self.val_metric_step = train_yaml["metric_step"] # REDUCE THE DISCRIMINATOR PERFORMANCE self.val_lambda = train_yaml["lambda"] self.real_label_smoothing = tuple(train_yaml["real_label_smoothing"]) self.fake_label_smoothing = tuple(train_yaml["fake_label_smoothing"]) self.sigma_init = train_yaml["sigma_init"] self.sigma_step = train_yaml['sigma_step'] self.sigma_decay = train_yaml["sigma_decay"] self.ite_train_g = train_yaml["train_g_multiple_time"] self.max_im = 10 self.strategy = tf.distribute.MirroredStrategy() print('Number of devices: {}'.format( self.strategy.num_replicas_in_sync)) self.buffer_size = self.data_X.shape[0] self.global_batch_size = self.batch_size * self.strategy.num_replicas_in_sync with self.strategy.scope(): self.d_optimizer = Adam(self.learning_rate, self.beta1) self.g_optimizer = Adam(self.learning_rate * self.fact_g_lr, self.beta1) self.build_model() self.model_writer = tf.summary.create_file_writer( self.saving_logs_path) #self.strategy = tf.distribute.MirroredStrategy() def build_model(self): # strategy = tf.distribute.MirroredStrategy() # print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) # We use the discriminator self.discriminator = self.build_discriminator(self.model_yaml) self.discriminator.compile(loss='binary_crossentropy', optimizer=self.d_optimizer, metrics=['accuracy']) self.generator = self.build_generator(self.model_yaml, is_training=True) print("Input G") g_input = Input(shape=(self.data_X.shape[1], self.data_X.shape[2], self.data_X.shape[3]), name="g_build_model_input_data") G = self.generator(g_input) print("G", G) # For the combined model we will only train the generator self.discriminator.trainable = False D_input = tf.concat([G, g_input], axis=-1) print("INPUT DISCRI ", D_input) # The discriminator takes generated images as input and determines validity D_output_fake = self.discriminator(D_input) # print(D_output) # The combined model (stacked generator and discriminator) # TO TRAIN WITH MULTIPLE GPU self.combined = Model(g_input, [D_output_fake, G], name="Combined_model") self.combined.compile(loss=['binary_crossentropy', L1_loss], loss_weights=[1, self.val_lambda], optimizer=self.g_optimizer) print("[INFO] combined model loss are : ".format( self.combined.metrics_names)) def build_generator(self, model_yaml, is_training=True): def build_resnet_block(input, id=0): """Define the ResNet block""" x = Conv2D(model_yaml["dim_resnet"], model_yaml["k_resnet"], padding=model_yaml["padding"], strides=tuple(model_yaml["stride"]), name="g_block_{}_conv1".format(id))(input) x = BatchNormalization(momentum=model_yaml["bn_momentum"], trainable=is_training, name="g_block_{}_bn1".format(id))(x) x = ReLU(name="g_block_{}_relu1".format(id))(x) x = Dropout(rate=model_yaml["do_rate"], name="g_block_{}_do".format(id))(x) x = Conv2D(model_yaml["dim_resnet"], model_yaml["k_resnet"], padding=model_yaml["padding"], strides=tuple(model_yaml["stride"]), name="g_block_{}_conv2".format(id))(x) x = BatchNormalization(momentum=model_yaml["bn_momentum"], trainable=is_training, name="g_block_{}_bn2".format(id))(x) x = Add(name="g_block_{}_add".format(id))([x, input]) x = ReLU(name="g_block_{}_relu2".format(id))(x) return x img_input = Input(shape=(self.data_X.shape[1], self.data_X.shape[2], self.data_X.shape[3]), name="g_input_data") if model_yaml["last_activation"] == "tanh": print("use tanh keras") last_activ = lambda x: tf.keras.activations.tanh(x) else: last_activ = model_yaml["last_activation"] x = img_input for i, param_lay in enumerate( model_yaml["param_before_resnet"] ): # build the blocks before the Resnet Blocks x = Conv2D(param_lay[0], param_lay[1], strides=tuple(model_yaml["stride"]), padding=model_yaml["padding"], name="g_conv{}".format(i))(x) x = BatchNormalization(momentum=model_yaml["bn_momentum"], trainable=is_training, name="g_{}_bn".format(i))(x) x = ReLU(name="g_{}_lay_relu".format(i))(x) for j in range(model_yaml["nb_resnet_blocs"]): # add the Resnet blocks x = build_resnet_block(x, id=j) for i, param_lay in enumerate(model_yaml["param_after_resnet"]): x = Conv2D(param_lay[0], param_lay[1], strides=tuple(model_yaml["stride"]), padding=model_yaml["padding"], name="g_conv_after_resnetblock{}".format(i))(x) x = BatchNormalization( momentum=model_yaml["bn_momentum"], trainable=is_training, name="g_after_resnetblock{}_bn2".format(i))(x) x = ReLU(name="g_after_resnetblock_relu_{}".format(i))(x) # The last layer x = Conv2D(model_yaml["last_layer"][0], model_yaml["last_layer"][1], strides=tuple(model_yaml["stride"]), padding=model_yaml["padding"], name="g_final_conv", activation=last_activ)(x) model_gene = Model(img_input, x, name="Generator") model_gene.summary() return model_gene def build_discriminator(self, model_yaml, is_training=True): discri_input = Input(shape=tuple([256, 256, 12]), name="d_input") if model_yaml["d_activation"] == "lrelu": d_activation = lambda x: tf.nn.leaky_relu( x, alpha=model_yaml["lrelu_alpha"]) else: d_activation = model_yaml["d_activation"] if model_yaml["add_discri_noise"]: x = GaussianNoise(self.sigma_val, input_shape=self.model_yaml["dim_gt_image"], name="d_GaussianNoise")(discri_input) else: x = discri_input for i, layer_index in enumerate(model_yaml["dict_discri_archi"]): layer_val = model_yaml["dict_discri_archi"][layer_index] layer_key = model_yaml["layer_key"] layer_param = dict(zip(layer_key, layer_val)) pad = layer_param["padding"] vpadding = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) # the last dimension is 12 x = tf.pad( x, vpadding, model_yaml["discri_opt_padding"], name="{}_padding_{}".format( model_yaml["discri_opt_padding"], layer_index)) # the type of padding is defined the yaml, # more infomration in https://www.tensorflow.org/api_docs/python/tf/pad # # x = ZeroPadding2D( # padding=(layer_param["padding"], layer_param["padding"]), name="d_pad_{}".format(layer_index))(x) x = Conv2D(layer_param["nfilter"], layer_param["kernel"], padding="valid", activation=d_activation, strides=(layer_param["stride"], layer_param["stride"]), name="d_conv{}".format(layer_index))(x) if i > 0: x = BatchNormalization(momentum=model_yaml["bn_momentum"], trainable=is_training, name="d_bn{}".format(layer_index))(x) # x = Flatten(name="flatten")(x) # for i, dlayer_idx in enumerate(model_yaml["discri_dense_archi"]): # dense_layer = model_yaml["discri_dense_archi"][dlayer_idx] # x = Dense(dense_layer, activation=d_activation, name="dense_{}".format(dlayer_idx))(x) if model_yaml["d_last_activ"] == "sigmoid": x_final = tf.keras.layers.Activation('sigmoid', name="d_last_activ")(x) else: x_final = x model_discri = Model(discri_input, x_final, name="discriminator") model_discri.summary() return model_discri def produce_noisy_input(self, input, sigma_val): if self.model_yaml["add_discri_white_noise"]: # print("[INFO] On each batch GT label we add Gaussian Noise before training discri on labelled image") new_gt = GaussianNoise(sigma_val, input_shape=self.model_yaml["dim_gt_image"], name="d_inputGN")(input) if self.model_yaml["add_relu_after_noise"]: new_gt = tf.keras.layers.Activation( lambda x: tf.keras.activations.tanh(x), name="d_before_activ")(new_gt) else: new_gt = input return new_gt def define_callback(self): # Define Tensorboard callbacks self.g_tensorboard_callback = TensorBoard( log_dir=self.saving_logs_path, histogram_freq=0, batch_size=self.batch_size, write_graph=True, write_grads=True) self.g_tensorboard_callback.set_model(self.combined) def train_gpu(self): valid = np.ones( (self.batch_size, 30, 30, 1)) # because of the shape of the discri fake = np.zeros((self.batch_size, 30, 30, 1)) print("valid shape {}".format(valid.shape)) if self.previous_checkpoint is not None: print("LOADING the model from step {}".format( self.previous_checkpoint)) start_epoch = int(self.previous_checkpoint) + 1 self.load_from_checkpoint(self.previous_checkpoint) else: # create_safe_directory(self.saving_logs_path) create_safe_directory(self.saving_image_path) train_dataset = tf.data.Dataset.from_tensor_slices( (self.data_X, self.data_y)).shuffle(self.batch_size).batch( self.global_batch_size) train_dist_dataset = self.strategy.experimental_distribute_dataset( train_dataset) def train(self): # Adversarial ground truths valid = np.ones((self.global_batch_size, 30, 30, 1)) # because of the shape of the discri fake = np.zeros((self.global_batch_size, 30, 30, 1)) #print("valid shape {}".format(valid.shape)) if self.previous_checkpoint is not None: print("LOADING the model from step {}".format( self.previous_checkpoint)) start_epoch = int(self.previous_checkpoint) + 1 self.load_from_checkpoint(self.previous_checkpoint) else: # create_safe_directory(self.saving_logs_path) create_safe_directory(self.saving_image_path) start_epoch = 0 train_dataset = tf.data.Dataset.from_tensor_slices( (self.data_X, self.data_y)).shuffle(self.batch_size).batch( self.global_batch_size) # loop for epoch sigma_val = self.sigma_init # dict_metric={"epoch":[],"d_loss_real":[],"d_loss_fake":[],"d_loss":[],"g_loss":[]} d_loss_real = [100, 100] # init losses d_loss_fake = [100, 100] d_loss = [100, 100] l_val_name_metrics, l_val_value_metrics = [], [] start_time = time.time() for epoch in range(start_epoch, self.epoch): # print("starting epoch {}".format(epoch)) for idx, (batch_input, batch_gt) in enumerate(train_dataset): #print(batch_input) ## TRAIN THE DISCRIMINATOR d_noise_real = random.uniform( self.real_label_smoothing[0], self.real_label_smoothing[1]) # Add noise on the loss d_noise_fake = random.uniform( self.fake_label_smoothing[0], self.fake_label_smoothing[1]) # Add noise on the loss # Create a noisy gt images batch_new_gt = self.produce_noisy_input(batch_gt, sigma_val) # Generate a batch of new images # print("Make a prediction") gen_imgs = self.generator.predict( batch_input) # .astype(np.float32) D_input_real = tf.concat([batch_new_gt, batch_input], axis=-1) D_input_fake = tf.concat([gen_imgs, batch_input], axis=-1) print("shape d train") print(valid.shape, D_input_fake.shape) d_loss_real = self.discriminator.train_on_batch( D_input_real, d_noise_real * valid) d_loss_fake = self.discriminator.train_on_batch( D_input_fake, d_noise_fake * fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) g_loss = self.combined.train_on_batch(batch_input, [valid, batch_gt]) # Plot the progress print("%d iter %d [D loss: %f, acc.: %.2f%%] [G loss: %f %f]" % (epoch, self.num_batches * epoch + idx, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1])) if epoch % self.im_saving_step == 0 and idx < self.max_im: # to save some generated_images gen_imgs = self.generator.predict(batch_input) save_images(gen_imgs, self.saving_image_path, ite=idx) # LOGS to print in Tensorboard if epoch % self.val_metric_step == 0: l_val_name_metrics, l_val_value_metrics = self.val_metric() name_val_metric = [ "val_{}".format(name) for name in l_val_name_metrics ] name_logs = self.combined.metrics_names + [ "g_loss_tot", "d_loss_real", "d_loss_fake", "d_loss_tot", "d_acc_real", "d_acc_fake", "d_acc_tot" ] val_logs = g_loss + [ g_loss[0] + 100 * g_loss[1], d_loss_real[0], d_loss_fake[0], d_loss[0], d_loss_real[1], d_loss_fake[1], d_loss[1] ] # The metrics #print(type(batch_gt),type(gen_imgs)) l_name_metrics, l_value_metrics = compute_metric( batch_gt.numpy(), gen_imgs) assert len(val_logs) == len( name_logs ), "The name and value list of logs does not have the same lenght {} vs {}".format( name_logs, val_logs) write_log_tf2( self.model_writer, name_logs + l_name_metrics + name_val_metric + ["time_in_sec"], val_logs + l_value_metrics + l_val_value_metrics + [start_time - time.time()], epoch) if epoch % self.sigma_step == 0: # update simga sigma_val = sigma_val * self.sigma_decay # save the models if epoch % self.w_saving_step == 0: self.save_model(epoch) def save_model(self, step): print("Saving model at {} step {}".format(self.checkpoint_dir, step)) checkpoint_dir = self.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.isfile("{}model_generator.yaml".format( self.checkpoint_dir)): gene_yaml = self.generator.to_yaml() with open("{}model_generator.yaml".format(self.checkpoint_dir), "w") as yaml_file: yaml_file.write(gene_yaml) if not os.path.isfile("{}model_combined.yaml".format( self.checkpoint_dir)): comb_yaml = self.combined.to_yaml() with open("{}model_combined.yaml".format(self.checkpoint_dir), "w") as yaml_file: yaml_file.write(comb_yaml) if not os.path.isfile("{}model_discri.yaml".format( self.checkpoint_dir)): discri_yaml = self.discriminator.to_yaml() with open("{}model_discri.yaml".format(self.checkpoint_dir), "w") as yaml_file: yaml_file.write(discri_yaml) self.generator.save_weights("{}model_gene_i{}.h5".format( self.checkpoint_dir, step)) self.discriminator.save_weights("{}model_discri_i{}.h5".format( self.checkpoint_dir, step)) self.combined.save_weights("{}model_combined_i{}.h5".format( self.checkpoint_dir, step)) def load_from_checkpoint(self, step): assert os.path.isfile("{}model_discri_i{}.h5".format( self.checkpoint_dir, step)), "No file at {}".format("{}model_discri_i{}.h5".format( self.checkpoint_dir, step)) self.discriminator.load_weights("{}model_discri_i{}.h5".format( self.checkpoint_dir, step)) self.generator.load_weights("{}model_gene_i{}.h5".format( self.checkpoint_dir, step)) self.combined.load_weights("{}model_combined_i{}.h5".format( self.checkpoint_dir, step)) def load_generator(self, path_yaml, path_weight): # load YAML and create model yaml_file = open(path_yaml, 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() loaded_model = model_from_yaml(loaded_model_yaml) # load weights into new model loaded_model.load_weights(path_weight) print("Loaded model from disk") return loaded_model def val_metric(self): test_dataset = tf.data.Dataset.from_tensor_slices( (self.val_X, self.val_Y)).batch(self.val_X.shape[0]) #test_dist_dataset = self.strategy.experimental_distribute_dataset(test_dataset) for i, (x, y) in enumerate(test_dataset): #print("eval on {}".format(i)) val_pred = self.generator.predict(x) #print("type {} {}".format(type(y),type(val_pred))) label = y return compute_metric(label.numpy(), val_pred) def predict_on_iter(self, batch, path_save, l_image_id=None, un_rescale=True): """given an iter load the model at this iteration, returns the a predicted_batch but check if image have been saved at this directory :param dataset: :param batch could be a string : path to the dataset or an array corresponding to the batch we are going to predict on """ if type(batch) == type( "u" ): # the param is an string we load the bathc from this directory #print("We load our data from {}".format(batch)) l_image_id = find_image_indir(batch + XDIR, "npy") batch, _ = load_data(batch, x_shape=self.model_yaml["input_shape"], label_shape=self.model_yaml["dim_gt_image"], normalization=self.normalization, dict_band_X=self.dict_band_X, dict_band_label=self.dict_band_label, dict_rescale_type=self.dict_rescale_type, dict_scale=self.scale_dict_train, fact_s2=self.fact_s2, fact_s1=self.fact_s1, s2_bands=self.s2bands, s1_bands=self.s1bands, clip_s2=False) else: if l_image_id is None: print("We defined our own index for image name") l_image_id = [i for i in range(batch.shape[0])] assert len(l_image_id) == batch.shape[ 0], "Wrong size of the name of the images is {} should be {} ".format( len(l_image_id), batch.shape[0]) if os.path.isdir(path_save): print( "[INFO] the directory where to store the image already exists") data_array, path_tile, _ = load_from_dir( path_save, self.model_yaml["dim_gt_image"]) return data_array else: create_safe_directory(path_save) batch_res = self.generator.predict(batch) # if un_rescale: # remove the normalization made on the data # _, batch_res, _ = rescale_array(batch, batch_res, dict_group_band_X=self.dict_band_X, # dict_group_band_label=self.dict_band_label, # dict_rescale_type=self.dict_rescale_type, # dict_scale=self.scale_dict_train, invert=True, fact_scale2=self.fact_s2, # fact_scale1=self.fact_s1,clip_s2=False) assert batch_res.shape[0] == batch.shape[ 0], "Wrong prediction should have shape {} but has shape {}".format( batch_res.shape, batch.shape) if path_save is not None: # we store the data at path_save for i in range(batch_res.shape[0]): np.save( "{}_image_{}".format(path_save, l_image_id[i].split("/")[-1]), batch_res[i, :, :, :]) return batch_res
try: os.mkdir(os.getcwd() + os.sep + 'out' + os.sep + model_name) except: pass # save features and labels h5f_data = h5py.File(features_path, 'w') h5f_data.create_dataset('dataset_1', data=np.array(features)) h5f_label = h5py.File(labels_path, 'w') h5f_label.create_dataset('dataset_1', data=np.array(le_labels)) h5f_data.close() h5f_label.close() # save model and weights model_json = model.to_json() with open(model_path + str(test_size) + ".json", "w") as json_file: json_file.write(model_json) # save weights model.save_weights(model_path + str(test_size) + ".h5") print("saved model and weights to disk..") print("features and labels saved..") # end time end = time.time() print("end time - {}".format( datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
class Seq2SeqAtt(object): model_name = 'seq2seq-qa-glove' def __init__(self): self.model = None self.encoder_model = None self.decoder_model = None self.target_word2idx = None self.target_idx2word = None self.max_decoder_seq_length = None self.max_encoder_seq_length = None self.num_decoder_tokens = None self.glove_model = GloveModel() @staticmethod def get_architecture_file_path(model_dir_path): return os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-architecture.json') @staticmethod def get_weight_file_path(model_dir_path): return os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-weights.h5') def load_glove_model(self, data_dir_path): self.glove_model.load_model(data_dir_path) def load_model(self, model_dir_path): self.target_word2idx = np.load( model_dir_path + '/' + Seq2SeqAtt.model_name + '-target-word2idx.npy').item() self.target_idx2word = np.load( model_dir_path + '/' + Seq2SeqAtt.model_name + '-target-idx2word.npy').item() context = np.load(model_dir_path + '/' + Seq2SeqAtt.model_name + '-config.npy').item() self.max_encoder_seq_length = context['input_max_seq_length'] self.max_decoder_seq_length = context['target_max_seq_length'] self.num_decoder_tokens = context['num_target_tokens'] self.create_model() self.model.load_weights(Seq2SeqAtt.get_weight_file_path(model_dir_path)) def create_model(self): resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR']) tf.contrib.distribute.initialize_tpu_system(resolver) strategy = tf.contrib.distribute.TPUStrategy(resolver) with strategy.scope(): hidden_size = 256 enc_timesteps = self.max_encoder_seq_length #timesteps = self.max_encoder_seq_length #perhaps making timesteps size of max sequence length would work?????"" dec_timesteps = self.max_decoder_seq_length print(f"embedding size: {self.glove_model.embedding_size}") # encoder_inputs = Input(shape=(None, self.glove_model.embedding_size), name='encoder_inputs') # decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs') encoder_inputs = Input(shape=(enc_timesteps, self.glove_model.embedding_size), name='encoder_inputs') decoder_inputs = Input(shape=(dec_timesteps, self.num_decoder_tokens), name='decoder_inputs') # Encoder GRU encoder_gru = Bidirectional(GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru'), name='bidirectional_encoder') encoder_out, encoder_fwd_state, encoder_back_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = GRU(hidden_size*2, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru( decoder_inputs, initial_state=Concatenate(axis=-1)([encoder_fwd_state, encoder_back_state]) ) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(self.num_decoder_tokens, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) self.model.compile(optimizer=tf.train.RMSPropOptimizer(learning_rate=0.01) loss='categorical_crossentropy') self.model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, enc_timesteps, self.glove_model.embedding_size), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state = encoder_gru(encoder_inf_inputs) self.encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_fwd_state, encoder_inf_back_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, self.num_decoder_tokens), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, dec_timesteps, 2*hidden_size), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, 2*hidden_size), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru( decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer([encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) self.decoder_model = Model(inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) def fit(self, data_set, model_dir_path, epochs=None, batch_size=None, test_size=None, random_state=None, save_best_only=False, max_target_vocab_size=None): if batch_size is None: batch_size = 64 if epochs is None: epochs = 100 if test_size is None: test_size = 0.2 if random_state is None: random_state = 42 if max_target_vocab_size is None: max_target_vocab_size = 5000 data_set_seq2seq = SQuADSeq2SeqEmbTupleSamples(data_set, self.glove_model.word2em, self.glove_model.embedding_size, max_target_vocab_size=max_target_vocab_size) data_set_seq2seq.save(model_dir_path, 'qa-glove-att') x_train, x_test, y_train, y_test = data_set_seq2seq.split(test_size=test_size, random_state=random_state) print(len(x_train)) print(len(x_test)) self.max_encoder_seq_length = data_set_seq2seq.input_max_seq_length self.max_decoder_seq_length = data_set_seq2seq.target_max_seq_length self.num_decoder_tokens = data_set_seq2seq.num_target_tokens print(f'max_encoder_seq_length: {self.max_encoder_seq_length}') print(f'max_decoder_seq_length: {self.max_decoder_seq_length}') print(f'num_decoder_tokens: {self.num_decoder_tokens}') weight_file_path = self.get_weight_file_path(model_dir_path) architecture_file_path = self.get_architecture_file_path(model_dir_path) self.create_model() with open(architecture_file_path, 'w') as f: f.write(self.model.to_json()) train_gen = generate_batch(data_set_seq2seq, x_train, y_train, batch_size) test_gen = generate_batch(data_set_seq2seq, x_test, y_test, batch_size) train_num_batches = len(x_train) // batch_size test_num_batches = len(x_test) // batch_size checkpoint = ModelCheckpoint(filepath=weight_file_path, save_best_only=save_best_only) #########COLAB########## #TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR'] #tensorflow.logging.set_verbosity(tensorflow.logging.INFO) #self.model = tensorflow.contrib.tpu.keras_to_tpu_model( # self.model, # strategy=tensorflow.contrib.tpu.TPUDistributionStrategy( # tensorflow.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER))) ####################### history = self.model.fit_generator(generator=train_gen, steps_per_epoch=train_num_batches, epochs=epochs, verbose=1, validation_data=test_gen, validation_steps=test_num_batches, callbacks=[checkpoint]) self.model.save_weights(weight_file_path) np.save(os.path.join(model_dir_path, Seq2SeqAtt.model_name + '-history.npy'), history.history) return history def reply(self, paragraph, question): input_seq = [] input_emb = [] input_text = paragraph.lower() + ' question ' + question.lower() for word in nltk.word_tokenize(input_text): if not in_white_list(word): continue emb = self.glove_model.encode_word(word) input_emb.append(emb) input_seq.append(input_emb) input_seq = pad_sequences(input_seq, self.max_encoder_seq_length) states_value = self.encoder_model.predict(input_seq) target_seq = np.zeros((1, 1, self.num_decoder_tokens)) target_seq[0, 0, self.target_word2idx['START']] = 1 target_text = '' target_text_len = 0 terminated = False while not terminated: output_tokens, h, c = self.decoder_model.predict([target_seq] + states_value) sample_token_idx = np.argmax(output_tokens[0, -1, :]) sample_word = self.target_idx2word[sample_token_idx] target_text_len += 1 if sample_word != 'START' and sample_word != 'END': target_text += ' ' + sample_word if sample_word == 'END' or target_text_len >= self.max_decoder_seq_length: terminated = True target_seq = np.zeros((1, 1, self.num_decoder_tokens)) target_seq[0, 0, sample_token_idx] = 1 states_value = [h, c] return target_text.strip() def test_run(self, ds, index=None): if index is None: index = 0 paragraph, question, actual_answer = ds.get_data(index) predicted_answer = self.reply(paragraph, question) print({'predict': predicted_answer, 'actual': actual_answer})
class Pix2Pix(): def __init__(self): # Input shape self.img_rows = 256 self.img_cols = 256 self.channels = 3 self.img_shape = (self.img_rows, self.img_cols, self.channels) # Configure data loader self.dataset_name = 'facades' self.data_loader = DataLoader(dataset_name=self.dataset_name, img_res=(self.img_rows, self.img_cols)) # Calculate output shape of D (PatchGAN) patch = int(self.img_rows / 2**4) self.disc_patch = (patch, patch, 1) # Number of filters in the first layer of G and D self.gf = 64 self.df = 64 optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) #------------------------- # Construct Computational # Graph of Generator #------------------------- # Build the generator self.generator = self.build_generator() # Input images and their conditioning images img_A = Input(shape=self.img_shape) img_B = Input(shape=self.img_shape) # By conditioning on B generate a fake version of A fake_A = self.generator(img_B) # For the combined model we will only train the generator #self.discriminator.trainable = False # Discriminators determines validity of translated images / condition pairs valid = self.discriminator([fake_A, img_B]) self.combined = Model(inputs=[img_A, img_B], outputs=[valid, fake_A]) self.combined.compile(loss=['mse', 'mae'], loss_weights=[1, 100], optimizer=optimizer) valid.trainable = False #self.combined.load_weights("Weights/199.h5") def build_generator(self): layer_per_block = [4, 4, 4, 4, 4, 15, 4, 4, 4, 4, 4] tiramisu = Tiramisu(layer_per_block) tiramisu.summary() #d0 = Input(shape=self.img_shape) return tiramisu def build_discriminator(self): def d_layer(layer_input, filters, f_size=4, bn=True): """Discriminator layer""" d = Conv2D(filters, kernel_size=f_size, strides=2, padding='same')(layer_input) d = LeakyReLU(alpha=0.2)(d) if bn: d = BatchNormalization(momentum=0.8)(d) return d img_A = Input(shape=self.img_shape) img_B = Input(shape=self.img_shape) # Concatenate image and conditioning image by channels to produce input combined_imgs = Concatenate(axis=-1)([img_A, img_B]) d1 = d_layer(combined_imgs, self.df, bn=False) d2 = d_layer(d1, self.df*2) d3 = d_layer(d2, self.df*4) d4 = d_layer(d3, self.df*8) validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4) return Model([img_A, img_B], validity) def train(self, epochs, batch_size=1, sample_interval=50): start_time = datetime.datetime.now() # Adversarial loss ground truths valid = np.ones((batch_size,) + self.disc_patch) fake = np.zeros((batch_size,) + self.disc_patch) for epoch in range(epochs): for batch_i, (imgs_A, imgs_B) in enumerate(self.data_loader.load_batch(batch_size)): # --------------------- # Train Discriminator # --------------------- print(imgs_A.shape) # Condition on B and generate a translated version fake_A = self.generator.predict(imgs_B) # Train the discriminators (original images = real / generated = Fake) d_loss_real = self.discriminator.train_on_batch([imgs_A, imgs_B], valid) d_loss_fake = self.discriminator.train_on_batch([fake_A, imgs_B], fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # ----------------- # Train Generator # ----------------- # Train the generators g_loss = self.combined.train_on_batch([imgs_A, imgs_B], [valid, imgs_A]) elapsed_time = datetime.datetime.now() - start_time # Plot the progress print ("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %3d%%] [G loss: %f] time: %s" % (epoch, epochs, batch_i, self.data_loader.n_batches, d_loss[0], 100*d_loss[1], g_loss[0], elapsed_time)) # If at save interval => save generated image samples if batch_i % sample_interval == 0: self.sample_images(epoch, batch_i) self.combined.save_weights("Weights/"+str(epoch)+".h5") def img_to_frame(self,imgA,imgB,fakeA): no_images = imgA.shape[0] img_height = imgA.shape[1] img_width = imgA.shape[2] pad = 20 title_pad=20 pad_top = pad+title_pad frame=np.zeros((no_images*(img_height+pad_top),no_images*(img_width+pad),3)) count=0 gen_imgs = np.concatenate([imgB, fakeA, imgA]) gen_imgs = 0.5 * gen_imgs + 0.5 titles = ['Condition', 'Generated', 'Original'] for r in range(no_images): for c in range(no_images): im = gen_imgs[count] count=count+1 y0 = r*(img_height+pad_top) + pad//2 x0 = c*(img_width+pad) + pad//2 # print(frame[y0:y0+img_height,x0:x0+img_width,:].shape) frame[y0:y0+img_height,x0:x0+img_width,:] = im*255 frame = cv2.putText(frame, titles[r], (x0, y0-title_pad//4), cv2.FONT_HERSHEY_COMPLEX, .5, (255,255,255)) return frame def sample_images(self, epoch, batch_i): os.makedirs('images/%s' % self.dataset_name, exist_ok=True) os.makedirs('images/dehazed', exist_ok=True) os.makedirs('images/haze', exist_ok=True) os.makedirs('images/original',exist_ok=True) r, c = 3, 3 imgs_A, imgs_B, or_A, or_B = self.data_loader.load_data(batch_size=3, is_testing=True) fake_A = self.generator.predict(imgs_B) cv2.imwrite("images/dehazed"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(fake_A[0]*0.5+0.5)*255) cv2.imwrite("images/haze"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(or_B[0]*0.5+0.5)*255) cv2.imwrite("images/original"+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".jpg",(or_A[0]*0.5+0.5)*255) frame=self.img_to_frame(imgs_A,imgs_B,fake_A) cv2.imwrite("images/"+self.dataset_name+"/"+"Img:"+str(epoch)+"_"+str(batch_i)+".png",frame)
class DeepVelocity(object): def __init__(self, lr=0.00017654, lat_input_shape=(64, ), screen_input_shape=( 64, 64, ), structured_input_shape=(2, ), verbose=False): """ https://keras.io/getting-started/functional-api-guide/#multi-input-and-multi-output-models https://keras.io/gett ing-started/functional-api-guide/#shared-layers https://blog.keras.io/building-autoencoders-in-keras.html """ # Gross hack, change later? self.lr = lr if verbose: print("Network structured input shape is", structured_input.get_shape()) print("Network screen input shape is", screen_input.get_shape()) print("Network latent input shape is", lat_input.get_shape()) # Create the two state encoding legs structured_input_a = Input(shape=structured_input_shape) lat_input_a = Input(shape=lat_input_shape) screen_input_a = Input(shape=screen_input_shape, ) structured_input_b = Input(shape=structured_input_shape) lat_input_b = Input(shape=lat_input_shape) screen_input_b = Input(shape=screen_input_shape) eng_state_a = [structured_input_a, lat_input_a, screen_input_a] eng_state_b = [structured_input_b, lat_input_b, screen_input_b] # We want to broadcast the structured input (x, y) into their own # channels, each with the same dimension as the screen input # We can then concatenate, then convolve over the whole tensor x = RepeatVector(64 * 64)(structured_input_a) x = Reshape((64, 64, 2))(x) structured_output_a = x x = RepeatVector(64 * 64)(structured_input_b) x = Reshape((64, 64, 2))(x) structured_output_b = x # Similar with the latent vector, except it will simply be repeated # column wise x = RepeatVector(64)(lat_input_a) x = Reshape((64, 64, 1))(x) lat_output_a = x x = RepeatVector(64)(lat_input_b) x = Reshape((64, 64, 1))(x) lat_output_b = x # The screen is the correct shape, just add a channel dimension x = Reshape((64, 64, 1))(screen_input_a) screen_output_a = x x = Reshape((64, 64, 1))(screen_input_b) screen_output_b = x x = concatenate([ screen_output_a, structured_output_a, lat_output_a, screen_output_b, structured_output_b, lat_output_b ], axis=-1) print("Hello, World!", x.shape) x = Conv2D(16, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("1", x.shape) x = Conv2D(32, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("2", x.shape) x = Conv2D(64, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("3", x.shape) x = Conv2D(128, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("4", x.shape) x = Conv2D(256, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("5", x.shape) x = Conv2D(512, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("6", x.shape) x = Conv2D(1024, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("7", x.shape) x = Conv2D(2, (1, 1))(x) x = Activation('linear')(x) x = AveragePooling2D()(x) print("8", x.shape) x = Activation("softmax")(x) print("9", x.shape) prob_output = Reshape((2, ))(x) print("10", prob_output.shape) self.probabilityNetwork = Model(inputs=eng_state_a + eng_state_b, outputs=[prob_output]) def compile(self): # print("LR: ",self.lr) # self.lr = 10**np.random.uniform(-2.2, -3.8) optimizer = Nadam(lr=self.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) # optimizer = SGD() # self.probabilityNetwork = make_parallel(self.probabilityNetwork, 2) self.probabilityNetwork.compile( optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc', 'mse', 'categorical_crossentropy']) def save_weights(self, path): self.probabilityNetwork.save_weights(path) def load(self, path): loc = os.path.join(self.path(), path) print("Loading weights", loc) self.probabilityNetwork.load_weights(loc) return self def save_model(self, path): self.probabilityNetwork.save(path) def load_model(self, path): # loc = os.path.join(self.path(), path) # print("Loading model", path) self.probabilityNetwork = load_model(path) return self def path(self): return os.path.dirname(os.path.realpath(__file__))
autoencoder.compile( optimizer = 'adadelta', loss = 'binary_crossentropy' ) autoencoder.fit( x_train, x_train, epochs = 50, batch_size = 256, shuffle = True, validation_data = (x_test,x_test) ) encoded_imgs = encoder.predict( x_test ) decoded_imgs = decoder.predict( encoded_imgs ) # Save model model_name = logs_path + "/ae" + datetime.datetime.now().strftime("%Y%m%d%H%M%S") with open( model_name+".yaml", "w" ) as model_yaml: model_yaml.write( autoencoder.to_yaml() ) autoencoder.save_weights( model_name+".h5" ) print "Model saved as '" + model_name + "'" # n = 10 # plt.figure( figsize=(20,4) ) # for i in range(n): # ax = plt.subplot( 2, n, i+1 ) # plt.imshow( x_test[i].reshape(28,28) ) # plt.gray() # ax.get_xaxis().set_visible( False ) # ax.get_yaxis().set_visible( False ) # ax = plt.subplot(2, n, i+1+n ) # plt.imshow( decoded_imgs[i].reshape(28,28) )
# vae.load_weights(args.weights) # else: # train the autoencoder total_records = len(gen_records) num_train = 0 num_val = 0 for key, _record in gen_records.items(): if _record['train'] == True: num_train += 1 else: num_val += 1 print("train: %d, val: %d" % (num_train, num_val)) print('total records: %d' % (total_records)) steps_per_epoch = num_train // cfg.BATCH_SIZE val_steps = num_val // cfg.BATCH_SIZE vae.fit_generator(train_gen, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=(val_gen, None)) # vae.fit(x_train, # epochs=epochs, # batch_size=batch_size, # validation_data=(x_test, None)) vae.save_weights('vae_cnn_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
def main(batch_size=150, p_drop=0.4, latent_dim=2, cpl_fn='minvar', cpl_str=1e-3, n_epoch=500, run_iter=0, model_id='cnn', exp_name='MNIST'): fileid = model_id + \ '_cf_' + cpl_fn + \ '_cs_' + str(cpl_str) + \ '_pd_' + str(p_drop) + \ '_bs_' + str(batch_size) + \ '_ld_' + str(latent_dim) + \ '_ne_' + str(n_epoch) + \ '_ri_' + str(run_iter) fileid = fileid.replace('.', '-') train_dat, train_lbl, val_dat, val_lbl, dir_pth = dataIO(exp_name=exp_name) #Architecture parameters ------------------------------ input_dim = train_dat.shape[1] n_arms = 2 fc_dim = 49 #Model definition ------------------------------------- M = {} M['in_ae'] = Input(shape=(28, 28, 1), name='in_ae') for i in range(n_arms): M['co1_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co1_ae_' + str(i))(M['in_ae']) M['mp1_ae_' + str(i)] = MaxPooling2D( (2, 2), padding='same', name='mp1_ae_' + str(i))(M['co1_ae_' + str(i)]) M['dr1_ae_' + str(i)] = Dropout(rate=p_drop, name='dr1_ae_' + str(i))( M['mp1_ae_' + str(i)]) M['fl1_ae_' + str(i)] = Flatten(name='fl1_ae_' + str(i))(M['dr1_ae_' + str(i)]) M['fc01_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc01_ae_' + str(i))(M['fl1_ae_' + str(i)]) M['fc02_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc02_ae_' + str(i))(M['fc01_ae_' + str(i)]) M['fc03_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc03_ae_' + str(i))(M['fc02_ae_' + str(i)]) if cpl_fn in ['mse']: M['ld_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='ld_ae_' + str(i))(M['fc03_ae_' + str(i)]) elif cpl_fn in ['mseBN', 'fullcov', 'minvar']: M['fc04_ae_' + str(i)] = Dense(latent_dim, activation='linear', name='fc04_ae_' + str(i))( M['fc03_ae_' + str(i)]) M['ld_ae_' + str(i)] = BatchNormalization( scale=False, center=False, epsilon=1e-10, momentum=0.99, name='ld_ae_' + str(i))(M['fc04_ae_' + str(i)]) M['fc05_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc05_ae_' + str(i))(M['ld_ae_' + str(i)]) M['fc06_ae_' + str(i)] = Dense(fc_dim, activation='relu', name='fc06_ae_' + str(i))(M['fc05_ae_' + str(i)]) M['fc07_ae_' + str(i)] = Dense(fc_dim * 4, activation='relu', name='fc07_ae_' + str(i))(M['fc06_ae_' + str(i)]) M['re1_ae_' + str(i)] = Reshape( (14, 14, 1), name='re1_ae_' + str(i))(M['fc07_ae_' + str(i)]) M['us1_ae_' + str(i)] = UpSampling2D( (2, 2), name='us1_ae_' + str(i))(M['re1_ae_' + str(i)]) M['co2_ae_' + str(i)] = Conv2D(10, (3, 3), activation='relu', padding='same', name='co2_ae_' + str(i))(M['us1_ae_' + str(i)]) M['ou_ae_' + str(i)] = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='ou_ae_' + str(i))(M['co2_ae_' + str(i)]) cplAE = Model(inputs=M['in_ae'], outputs=[M['ou_ae_' + str(i)] for i in range(n_arms)] + [M['ld_ae_' + str(i)] for i in range(n_arms)]) if cpl_fn in ['mse', 'mseBN']: cpl_fn_loss = mse elif cpl_fn == 'fullcov': cpl_fn_loss = fullcov elif cpl_fn == 'minvar': cpl_fn_loss = minvar assert type(cpl_fn) #Create loss dictionary loss_dict = { 'ou_ae_0': mse(M['in_ae'], M['ou_ae_0']), 'ou_ae_1': mse(M['in_ae'], M['ou_ae_1']), 'ld_ae_0': cpl_fn_loss(M['ld_ae_0'], M['ld_ae_1']), 'ld_ae_1': cpl_fn_loss(M['ld_ae_1'], M['ld_ae_0']) } #Loss weights dictionary loss_wt_dict = { 'ou_ae_0': 1.0, 'ou_ae_1': 1.0, 'ld_ae_0': cpl_str, 'ld_ae_1': cpl_str } #Add loss definitions to the model cplAE.compile(optimizer='adam', loss=loss_dict, loss_weights=loss_wt_dict) #Data feed train_input_dict = {'in_ae': train_dat} val_input_dict = {'in_ae': val_dat} train_output_dict = { 'ou_ae_0': train_dat, 'ou_ae_1': train_dat, 'ld_ae_0': np.empty((train_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((train_dat.shape[0], latent_dim)) } val_output_dict = { 'ou_ae_0': val_dat, 'ou_ae_1': val_dat, 'ld_ae_0': np.empty((val_dat.shape[0], latent_dim)), 'ld_ae_1': np.empty((val_dat.shape[0], latent_dim)) } log_cb = CSVLogger(filename=dir_pth['logs'] + fileid + '.csv') #Train model cplAE.fit(train_input_dict, train_output_dict, validation_data=(val_input_dict, val_output_dict), batch_size=batch_size, initial_epoch=0, epochs=n_epoch, verbose=2, shuffle=True, callbacks=[log_cb]) #Saving weights cplAE.save_weights(dir_pth['result'] + fileid + '-modelweights' + '.h5') matsummary = {} #Trained model prediction for i in range(n_arms): encoder = Model(inputs=M['in_ae'], outputs=M['ld_ae_' + str(i)]) matsummary['z_val_' + str(i)] = encoder.predict({'in_ae': val_dat}) matsummary['z_train_' + str(i)] = encoder.predict({'in_ae': train_dat}) matsummary['train_lbl'] = train_lbl matsummary['val_lbl'] = val_lbl sio.savemat(dir_pth['result'] + fileid + '-summary.mat', matsummary) return
def calc_steps(data_len, batchsize): return (data_len + batchsize - 1) // batchsize # Calculate the steps per epoch train_steps = calc_steps(len(train_path), 8) val_steps = calc_steps(len(val_path), 8) checkpointer = ModelCheckpoint('cp-{epoch:02d}-{val_loss:.4f}-od-resnet50.h5', verbose=1) # Train the model history = model.fit_generator( traingen, steps_per_epoch=train_steps, epochs=20, # Change this to a larger number to train for longer validation_data=valgen, validation_steps=val_steps, verbose=1, max_queue_size=5 # Change this number based on memory restrictions ) model.save('outlier_detector_resnet50.h5') model.save_weights('model_weights.h5') # Save the model architecture with open('model_architecture.json', 'w') as f: f.write(model.to_json())
class CartoonGAN(): def __init__(self, args): self.model_name = 'CartoonGAN' self.batch_size = args.batch_size self.epochs = args.epochs self.gpu = args.gpu_num self.image_channels = args.image_channels self.image_size = args.image_size self.init_epoch = args.init_epoch self.log_dir = args.log_dir self.lr = args.lr self.model_dir = args.model_dir self.weight = args.weight # method for generator def generator(self): input_shape = [self.image_size, self.image_size, self.image_channels] input_img = Input(shape=input_shape, name="input") # first block x = ReflectionPadding2D(3)(input_img) x = Conv2D(64, (7, 7), strides=1, use_bias=True, padding='valid', name="conv1")(x) x = InstanceNormalization(name="norm1")(x) x = Activation("relu")(x) # down-convolution channel = 128 for i in range(2): x = Conv2D(channel, (3, 3), strides=2, use_bias=True, padding='same', name="conv{}_1".format(i + 2))(x) x = Conv2D(channel, (3, 3), strides=1, use_bias=True, padding='same', name="conv{}_2".format(i + 2))(x) x = InstanceNormalization(name="norm{}".format(i + 2))(x) x = Activation("relu")(x) channel = channel * 2 # residual blocks x_res = x for i in range(8): x = ReflectionPadding2D(1)(x) x = Conv2D(256, (3, 3), strides=1, use_bias=True, padding='valid', name="conv{}_1".format(i + 4))(x) x = InstanceNormalization(name="norm{}_1".format(i + 4))(x) x = Activation("relu")(x) x = ReflectionPadding2D(1)(x) x = Conv2D(256, (3, 3), strides=1, use_bias=True, padding='valid', name="conv{}_2".format(i + 4))(x) x = InstanceNormalization(name="norm{}_2".format(i + 4))(x) x = Add()([x, x_res]) x_res = x # up-convolution for i in range(2): x = Conv2DTranspose(channel // 2, 3, 2, padding="same", output_padding=1, name="deconv{}_1".format(i + 1))(x) x = Conv2D(channel // 2, (3, 3), strides=1, use_bias=True, padding="same", name="deconv{}_2".format(i + 1))(x) x = InstanceNormalization(name="norm_deconv" + str(i + 1))(x) x = Activation("relu")(x) channel = channel // 2 # last block x = ReflectionPadding2D(3)(x) x = Conv2D(3, (7, 7), strides=1, use_bias=True, padding="valid", name="deconv3")(x) x = Activation("tanh")(x) model = Model(input_img, x, name='Cartoon_Generator') return model # method for discriminator def discriminator(self): input_shape = [self.image_size, self.image_size, self.image_channels] input_img = Input(shape=input_shape, name="input") # first block x = Conv2D(32, (3, 3), strides=1, use_bias=True, padding='same', name="conv1")(input_img) x = LeakyReLU(alpha=0.2)(x) # block loop channel = 64 for i in range(2): x = Conv2D(channel, (3, 3), strides=2, use_bias=True, padding='same', name="conv{}_1".format(i + 2))(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(channel * 2, (3, 3), strides=1, use_bias=True, padding='same', name="conv{}_2".format(i + 2))(x) x = InstanceNormalization()(x) x = LeakyReLU(alpha=0.2)(x) channel = channel * 2 # last block x = Conv2D(256, (3, 3), strides=1, use_bias=True, padding='same', name="conv4")(x) x = InstanceNormalization()(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(1, (3, 3), strides=1, use_bias=True, padding='same', activation='sigmoid', name="conv5")(x) model = Model(input_img, x, name='Cartoon_Discriminator') return model # vgg loss function def vgg_loss(self, y_true, y_pred): # get vgg model input_shape = [self.image_size, self.image_size, self.image_channels] img_input = Input(shape=input_shape, name="vgg_input") vgg19 = tf.keras.applications.vgg19.VGG19(weights='imagenet') vggmodel = Model(inputs=vgg19.input, outputs=vgg19.get_layer('block4_conv4').output) x = vggmodel(img_input) vgg = Model(img_input, x, name='VGG_for_Feature_Extraction') # get l1 loss for the content loss y_true = vgg(y_true) y_pred = vgg(y_pred) content_loss = tf.losses.absolute_difference(y_true, y_pred) return content_loss # compile each model def compile_model(self): # init summary writer for tensorboard self.callback1 = TensorBoard(self.log_dir + '/discriminator') self.callback2 = TensorBoard(self.log_dir + '/generator') self.callback3 = TensorBoard(self.log_dir + '/generated_images') # model stuff input_shape = [self.image_size, self.image_size, self.image_channels] adam1 = Adam(lr=self.lr) adam2 = Adam(lr=self.lr * 2) # init and add multi-gpu support try: self.discriminator = multi_gpu_model(self.discriminator(), gpus=self.gpu) except: self.discriminator = self.discriminator() try: self.generator = multi_gpu_model(self.generator(), gpus=self.gpu) except: self.generator = self.generator() # compile discriminator self.discriminator.compile(loss='binary_crossentropy', optimizer=adam1) # compile generator input_tensor = Input(shape=input_shape) generated_catroon_tensor = self.generator(input_tensor) self.discriminator.trainable = False # for here we only train the generator discriminator_output = self.discriminator(generated_catroon_tensor) self.train_generator = Model( input_tensor, outputs=[generated_catroon_tensor, discriminator_output]) # add multi-gpu support try: self.train_generator = multi_gpu_model(self.train_generator, gpus=self.gpu) except: pass self.train_generator.compile( loss=[self.vgg_loss, 'binary_crossentropy'], loss_weights=[float(self.weight), 1.0], optimizer=adam2) # set callback model self.callback1.set_model(self.discriminator) self.callback2.set_model(self.train_generator) self.callback3.set_model(self.train_generator) # method for training process def train(self): # start training flip = False variance = 1 / 127.5 start_time = time.time() for epoch in range(1, self.epochs + 1): # create batch generator at each epoch batch_generator = DataGenerator(image_size=self.image_size, batch_size=self.batch_size) batch_end = len(batch_generator) print('Epoch {}'.format(epoch)) # start training for each batch for idx, (photo, cartoon, smooth_cartoon, index) in enumerate(batch_generator): # these two tensors measure the output of generator and discriminator real = np.ones((self.batch_size, ) + (64, 64, 1)) fake = np.zeros((self.batch_size, ) + (64, 64, 1)) # check if it is the end of an epoch if index + 1 == batch_end: break # initial training or start training if epoch < self.init_epoch: g_loss = self.train_generator.train_on_batch( photo, [photo, real]) generated_img = self.generator.predict(photo) print( "Batch %d (initial training for generator), g_loss: %.5f, with time: %4.4f" % (idx, g_loss[2], time.time() - start_time)) start_time = time.time() write_log(self.callback2, 'g_loss', g_loss[2], idx + (epoch + 1) * len(batch_generator)) if idx % 20 == 0: write_images(self.callback3, generated_img, 'generated_imgs', idx + (epoch + 1) * len(batch_generator)) if epoch % 20 == 0 and K.eval( self.train_generator.optimizer.lr) > 0.0001: K.set_value( self.train_generator.optimizer.lr, K.eval(self.train_generator.optimizer.lr) * 0.99) else: # add noise to the input of discriminator if variance > 0.00001: variance = variance * 0.9999 gaussian = np.random.normal( 0, variance, (cartoon.shape[1], cartoon.shape[2])) cartoon[:, :, :, 0] = cartoon[:, :, :, 0] + gaussian cartoon[:, :, :, 1] = cartoon[:, :, :, 1] + gaussian cartoon[:, :, :, 2] = cartoon[:, :, :, 2] + gaussian gaussian = np.random.normal( 0, variance, (cartoon.shape[1], cartoon.shape[2])) smooth_cartoon[:, :, :, 0] = smooth_cartoon[:, :, :, 0] + gaussian smooth_cartoon[:, :, :, 1] = smooth_cartoon[:, :, :, 1] + gaussian smooth_cartoon[:, :, :, 2] = smooth_cartoon[:, :, :, 2] + gaussian # generate cartoonized images generated_img = self.generator.predict(photo) # to certain probability: flip the label of discriminator if idx % 9 == 0 or np.random.uniform(0, 1) < 0.05: real = fake fake = fake + 1 flip = True # train discriminator and adversarial loss real_loss = self.discriminator.train_on_batch( cartoon, real) smooth_loss = self.discriminator.train_on_batch( smooth_cartoon, fake) fake_loss = self.discriminator.train_on_batch( generated_img, fake) d_loss = (real_loss + smooth_loss + fake_loss) / 3 # train generator if flip: real = fake fake = fake - 1 flip = False g_loss = self.train_generator.train_on_batch( photo, [photo, real]) print( "Batch %d, d_loss: %.5f, g_loss: %.5f, with time: %4.4f" % (idx, d_loss, g_loss[2], time.time() - start_time)) start_time = time.time() # add losses to writer write_log(self.callback1, 'd_loss', d_loss, idx + (epoch + 1) * len(batch_generator)) write_log(self.callback2, 'g_loss', g_loss[2], idx + (epoch + 1) * len(batch_generator)) if idx % 20 == 0: write_images(self.callback3, generated_img, 'generated_imgs', idx + (epoch + 1) * len(batch_generator)) # change learning rate if epoch % 20 == 0 and K.eval( self.discriminator.optimizer.lr) > 0.0001: K.set_value( self.discriminator.optimizer.lr, K.eval(self.discriminator.optimizer.lr) * 0.95) if epoch % 20 == 0 and K.eval( self.train_generator.optimizer.lr) > 0.0001: K.set_value( self.train_generator.optimizer.lr, K.eval(self.train_generator.optimizer.lr) * 0.95) # save model if epoch % 50 == 0: self.generator.save_weights( self.model_dir + '/' + 'CartoonGan_generator_epoch_{}.h5'.format(epoch)) self.discriminator.save_weights( self.model_dir + '/' + 'CartoonGan_discriminator_epoch_{}.h5'.format(epoch)) self.train_generator.save_weights( self.model_dir + '/' + 'CartoonGan_train_generator_epoch_{}.h5'.format(epoch)) print('Done!') self.generator.save('CartoonGan_generator.h5')
class PerceptualModel(NNInterface): def __init__(self): super().__init__() self.__model = vgg16.VGG16(weights='imagenet') self.ref_model = self.get_dropout_model(0) self.tar_model = self.get_dropout_model(0) print(self.tar_model.summary()) def get_features_model(self, layer_name): layer = self.__model.get_layer(layer_name).output model = Model(self.__model.input, outputs=layer) return model def call(self, x, training=True, ref=True): x = vgg16.preprocess_input(x) if ref: return self.ref_model(x, training=training) else: return self.tar_model(x, training=training) def compute_output_shape(self, input_shape): return self.__model.compute_output_shape(input_shape) def freeze_layers(self, freeze_idx): for i, layer in enumerate(self.__model.layers): if freeze_idx > i: layer.trainable = False for i, layer in enumerate(self.__model.layers): print("layer {} is trainable {}".format(layer.name, layer.trainable)) def add_dropout(self): # Store the fully connected layers fc1 = self.__model.layers[-3] fc2 = self.__model.layers[-2] predictions = self.__model.layers[-1] # Create the dropout layers dropout1 = Dropout(0.5) dropout2 = Dropout(0.5) # Reconnect the layers x = dropout1(fc1.output) x = fc2(x) # x = dropout2(x) predictors = predictions(x) input = self.__model.input # Create a new model self.__model = Model(input, predictors) # self.__model.summary() def get_dropout_model(self, dropout_num): model = tf.keras.Sequential() dropout1 = Dropout(0.5) dropout2 = Dropout(0.5) for layer in self.__model.layers: model.add(layer) if layer.name == "fc1" and dropout_num > 0: model.add(dropout1) if layer.name == "fc2" and dropout_num > 1: model.add(dropout2) return model def save_model(self, iter_num, output_path): output_path = os.path.join(output_path, "ckpts") checkpoint_path = "weights_after_{}_iterations".format(iter_num) self.__model.save_weights(os.path.join(output_path, checkpoint_path)) def load_model(self, ckpt_path): self.__model.load_weights(ckpt_path)
callbacks=[], verbose=1) # --------------------------------------------------------------------------------------------------------------------- # -------------------------------------- # EXPORT MODEL ARCHITECTURE AND WEIGHTS | # -------------------------------------- # export model structure to json file: model_struct_json = model.to_json() filename = filepattern('model_allfreeze_', '.json') with open(filename, 'w') as f: f.write(model_struct_json) # export weights to an hdf5 file: w_filename = filepattern('weights_allfreeze_', '.h5') model.save_weights(w_filename) # --------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------- # VISUALIZE BASE ARCHITECTURE TO DECIDE WHICH LAYERS TO FREEZE | # ------------------------------------------------------------- # PUT BREAKPOINT HERE!!!!!!!!!!!!!!! print(list(show_architecture(base))) # INSERT DEBUGGER BREAKPOINT DIRECTLY ON THE NEXT COMMAND TO VIEW THE ARCHITECTURE AT RUNTIME # --------------------------------------------------------------------------------------------------------------------- # ------------------------ # STOP NEPTUNE EXPERIMENT | # ------------------------ npt.stop()
def main(): # Counting Dataset counting_dataset_path = 'counting_data_UCF' counting_dataset = list() train_labels = {} val_labels = {} for im_path in glob.glob(os.path.join(counting_dataset_path, '*.jpg')): counting_dataset.append(im_path) img = image.load_img(im_path) gt_file = im_path.replace('.jpg', '_ann.mat') h, w = img.size dmap, crowd_number = load_gt_from_mat(gt_file, (w, h)) train_labels[im_path] = dmap val_labels[im_path] = crowd_number counting_dataset_pyramid, train_labels_pyramid = multiscale_pyramid( counting_dataset, train_labels) # Ranking Dataset ranking_dataset_path = 'ranking_data' ranking_dataset = list() for im_path in glob.glob(os.path.join(ranking_dataset_path, '*.jpg')): ranking_dataset.append(im_path) # randomize the order of images before splitting np.random.shuffle(counting_dataset) split_size = int(round(len(counting_dataset) / 5)) splits_list = list() for t in range(5): splits_list.append(counting_dataset[t * split_size:t * split_size + split_size]) split_val_labels = {} mae_sum = 0.0 mse_sum = 0.0 # create folder to save results date = str(datetime.datetime.now()) d = date.split() d1 = d[0] d2 = d[1].split(':') results_folder = 'Results-' + d1 + '-' + d2[0] + '.' + d2[1] if not os.path.exists(results_folder): os.makedirs(results_folder) # 5-fold cross validation epochs = int(round(iterations / iterations_per_epoch)) n_fold = 5 for f in range(0, n_fold): print('\nFold ' + str(f)) # Model model = VGG16(include_top=False, weights='imagenet') transfer_layer = model.get_layer('block5_conv3') conv_model = Model(inputs=[model.input], outputs=[transfer_layer.output], name='vgg_partial') counting_input = Input(shape=(224, 224, 3), dtype='float32', name='counting_input') ranking_input = Input(shape=(224, 224, 3), dtype='float32', name='ranking_input') x = conv_model([counting_input, ranking_input]) counting_output = Conv2D(1, (3, 3), strides=(1, 1), padding='same', data_format=None, dilation_rate=(1, 1), activation='relu', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, name='counting_output')(x) # The ranking output is computed using SUM pool. Here I use # GlobalAveragePooling2D followed by a multiplication by 14^2 to do # this. ranking_output = Lambda( lambda i: 14.0 * 14.0 * i, name='ranking_output')(GlobalAveragePooling2D( name='global_average_pooling2d')(counting_output)) train_model = Model(inputs=[counting_input, ranking_input], outputs=[counting_output, ranking_output]) train_model.summary() # l2 weight decay for layer in train_model.layers: if hasattr(layer, 'kernel_regularizer'): layer.kernel_regularizer = regularizers.l2(5e-4) elif layer.name == 'vgg_partial': for l in layer.layers: if hasattr(l, 'kernel_regularizer'): l.kernel_regularizer = regularizers.l2(5e-4) optimizer = SGD(lr=0.0, decay=0.0, momentum=0.9, nesterov=False) loss = { 'counting_output': euclideanDistanceCountingLoss, 'ranking_output': pairwiseRankingHingeLoss } loss_weights = [1.0, 0.0] train_model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights) splits_list_tmp = splits_list.copy() # counting validation split split_val = splits_list_tmp[f] del splits_list_tmp[f] flat = itertools.chain.from_iterable(splits_list_tmp) # counting train split split_train = list(flat) # counting validation split labels split_val_labels = {k: val_labels[k] for k in split_val} counting_dataset_pyramid_split = [] train_labels_pyramid_split = [] for key in split_train: counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][0]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][1]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][2]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][3]) counting_dataset_pyramid_split.append( counting_dataset_pyramid[key][4]) train_labels_pyramid_split.append(train_labels_pyramid[key][0]) train_labels_pyramid_split.append(train_labels_pyramid[key][1]) train_labels_pyramid_split.append(train_labels_pyramid[key][2]) train_labels_pyramid_split.append(train_labels_pyramid[key][3]) train_labels_pyramid_split.append(train_labels_pyramid[key][4]) index_shuf = np.arange(len(counting_dataset_pyramid_split)) np.random.shuffle(index_shuf) counting_dataset_pyramid_split_shuf = [] train_labels_pyramid_split_shuf = [] for i in index_shuf: counting_dataset_pyramid_split_shuf.append( counting_dataset_pyramid_split[i]) train_labels_pyramid_split_shuf.append( train_labels_pyramid_split[i]) train_generator = DataGenerator(counting_dataset_pyramid_split_shuf, train_labels_pyramid_split_shuf, ranking_dataset, **params) lrate = LearningRateScheduler(step_decay) callbacks_list = [lrate] train_model.fit_generator(generator=train_generator, epochs=epochs, callbacks=callbacks_list) #test images tmp_model = train_model.get_layer('vgg_partial') test_input = Input(shape=(None, None, 3), dtype='float32', name='test_input') new_input = tmp_model(test_input) co = train_model.get_layer('counting_output')(new_input) test_output = Lambda(lambda i: K.sum(i, axis=(1, 2)), name='test_output')(co) test_model = Model(inputs=[test_input], outputs=[test_output]) predictions = np.empty((len(split_val), 1)) y_validation = np.empty((len(split_val), 1)) for i in range(len(split_val)): img = image.load_img(split_val[i], target_size=(224, 224)) img_to_array = image.img_to_array(img) img_to_array = preprocess_input(img_to_array) img_to_array = np.expand_dims(img_to_array, axis=0) pred_test = test_model.predict(img_to_array) predictions[i] = pred_test y_validation[i] = split_val_labels[split_val[i]] mean_abs_err = mae(predictions, y_validation) mean_sqr_err = mse(predictions, y_validation) # serialize model to JSON model_json = test_model.to_json() model_json_name = "test_model_" + str(f) + ".json" with open(model_json_name, "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model_h5_name = "test_model_" + str(f) + ".h5" test_model.save_weights(model_h5_name) print("Saved model to disk") print('\n######################') print('Results on TEST SPLIT:') print(' MAE: {}'.format(mean_abs_err)) print(' MSE: {}'.format(mean_sqr_err)) print("Took %f seconds" % (time.time() - s)) path1 = results_folder + '/test_split_results_fold-' + str(f) + '.txt' with open(path1, 'w') as f: f.write('mae: %f,\nmse: %f, \nTook %f seconds' % (mean_abs_err, mean_sqr_err, time.time() - s)) mae_sum = mae_sum + mean_abs_err mse_sum = mse_sum + mean_sqr_err print('\n################################') print('Average Results on TEST SPLIT:') print(' AVE MAE: {}'.format(mae_sum / n_fold)) print(' AVE MSE: {}'.format(mse_sum / n_fold)) print("Took %f seconds" % (time.time() - s)) path2 = results_folder + '/test_split_results_avg.txt' with open(path2, 'w') as f: f.write('avg_mae: %f, \navg_mse: %f, \nTook %f seconds' % (mae_sum / n_fold, mse_sum / n_fold, time.time() - s))
class DEC(object): def __init__(self, dims, n_clusters=10, alpha=1.0, init='glorot_uniform'): super(DEC, self).__init__() self.dims = dims self.input_dim = dims[0] self.n_stacks = len(self.dims) - 1 self.n_clusters = n_clusters self.alpha = alpha self.encoder = autoencoder(self.dims, init=init) # prepare DEC model clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')( self.encoder.output) self.model = Model(inputs=self.encoder.input, outputs=clustering_layer) def load_weights(self, weights): # load weights of DEC model self.model.load_weights(weights) def extract_features(self, x): return self.encoder.predict(x) def predict( self, x): # predict cluster labels using the output of clustering layer q = self.model.predict(x, verbose=0) return q.argmax(1) @staticmethod def target_distribution(q): weight = q**2 / q.sum(0) return (weight.T / weight.sum(1)).T def compile(self, optimizer='sgd', loss='kld'): self.model.compile(optimizer=optimizer, loss=loss) def fit(self, x, y=None, maxiter=2e4, batch_size=256, tol=1e-3, update_interval=140, save_dir='./results/temp'): print('Update interval', update_interval) save_interval = int(x.shape[0] / batch_size) * 5 # 5 epochs print('Save interval', save_interval) # Step 1: initialize cluster centers using k-means t1 = time() print('Initializing cluster centers with k-means.') kmeans = KMeans(n_clusters=self.n_clusters, n_init=20) y_pred = kmeans.fit_predict(self.encoder.predict(x)) y_pred_last = np.copy(y_pred) self.model.get_layer(name='clustering').set_weights( [kmeans.cluster_centers_]) # Step 2: deep clustering # logging file import csv logfile = open(save_dir + '/dec_log.csv', 'w') logwriter = csv.DictWriter( logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'loss']) logwriter.writeheader() loss = 0 index = 0 index_array = np.arange(x.shape[0]) for ite in range(int(maxiter)): if ite % update_interval == 0: q = self.model.predict(x, verbose=0) p = self.target_distribution( q) # update the auxiliary target distribution p # evaluate the clustering performance y_pred = q.argmax(1) if y is not None: acc = np.round(metrics.acc(y, y_pred), 5) nmi = np.round(metrics.nmi(y, y_pred), 5) ari = np.round(metrics.ari(y, y_pred), 5) loss = np.round(loss, 5) logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, loss=loss) logwriter.writerow(logdict) print( 'Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss) # check stop criterion delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / y_pred.shape[0] y_pred_last = np.copy(y_pred) if ite > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print('Reached tolerance threshold. Stopping training.') logfile.close() break # train on batch # if index == 0: # np.random.shuffle(index_array) idx = index_array[index * batch_size:min((index + 1) * batch_size, x.shape[0])] loss = self.model.train_on_batch(x=x[idx], y=p[idx]) index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0 # save intermediate model if ite % save_interval == 0: print('saving model to:', save_dir + '/DEC_model_' + str(ite) + '.h5') self.model.save_weights(save_dir + '/DEC_model_' + str(ite) + '.h5') ite += 1 # save the trained model logfile.close() print('saving model to:', save_dir + '/DEC_model_final.h5') self.model.save_weights(save_dir + '/DEC_model_final.h5') return y_pred
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray(shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000: seen = np.ndarray(shape=(1, ), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') input_layer = Input(shape=(None, None, 3)) prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray(shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray(shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray(shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride > 1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) conv_layer = (Conv2D(filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D(pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): out_index.append(len(all_layers) - 1) all_layers.append(None) prev_layer = all_layers[-1] elif section.startswith('net'): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. if len(out_index) == 0: out_index.append(len(all_layers) - 1) model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) print(model.summary()) if args.weights_only: model.save_weights('{}'.format(output_path)) print('Saved Keras weights to {}'.format(output_path)) else: model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format( count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))