def _inspect_model( model: Model, generate_train: Iterable, generate_valid: Iterable, batch_size: int, training_steps: int, inspect_show_labels: bool, image_path: str, ) -> Model: """Collect statistics on model inference and training times. Arguments: model: the model to inspect generate_train: training data generator function generate_valid: Validation data generator function batch_size: size of the mini-batches training_steps: number of optimization steps to take inspect_show_labels: if True, show layer labels on the architecture diagram image_path: file path of the architecture diagram Returns: The slightly optimized keras model """ if image_path: _plot_dot_model_in_color( model_to_dot(model, show_shapes=inspect_show_labels, expand_nested=True), image_path, inspect_show_labels) t0 = time.time() _ = model.fit(generate_train, steps_per_epoch=training_steps, validation_steps=1, validation_data=generate_valid) t1 = time.time() n = batch_size * training_steps train_speed = (t1 - t0) / n logging.info( f'Spent:{(t1 - t0):0.2f} seconds training, Samples trained on:{n} Per sample training speed:{train_speed:0.3f} seconds.' ) t0 = time.time() inference_steps = max(1, training_steps // 8) _ = model.predict(generate_valid, steps=inference_steps, verbose=1) t1 = time.time() inference_speed = (t1 - t0) / (batch_size * inference_steps) logging.info( f'Spent:{(t1 - t0):0.2f} seconds predicting, Samples inferred:{n} Per sample inference speed:{inference_speed:0.4f} seconds.' ) return model
def train(self, path, rowLimit, metricDict:dict = {}, distributedConfig=None): if distributedConfig is None: trainDataset, testDataset, trainSplit = self.prepareToTrain(distributedConfig, path, rowLimit) else: strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): trainDataset, testDataset, trainSplit = self.prepareToTrain(distributedConfig, path, rowLimit) try: model_to_dot(self.model, show_shapes=True).write(path=self.modelStructurePath, prog='dot', format='png') except: print('Could not plot model in dot format:', sys.exc_info()[0]) self.model.summary() if distributedConfig is None: history = self.model.fit(trainDataset, validation_data=testDataset, epochs=self.epochs) else: history = self.model.fit(trainDataset, validation_data=testDataset, epochs=self.epochs, steps_per_epoch=len(trainDataset) / self.epochs / self.getNumberOfWorkers( distributedConfig), validation_steps=self.validationSteps) self.model.save(self.getModelSaveLocation(strategy)) self.plot(history, metricDict) print("Evaluating trained model...") _, val = train_test_split(trainSplit, test_size=0.2) valDataset = self.bootstrapDataset(val, shuffle=False) evaluatedMetric = list(self.model.evaluate(valDataset, steps=self.validationSteps)) self.clearSlaveTempDir(strategy) return {'result': 'completed', 'metrics': evaluatedMetric}
data_input2_vali = np.expand_dims(images_vali[:, :, :, 1], axis=-1) data_input3_vali = np.expand_dims(images_vali[:, :, :, 2], axis=-1) data_input4_vali = np.expand_dims(images_vali[:, :, :, 3], axis=-1) data_output_vali = labels_vali model = model(data_input1, data_input2, data_input3, data_input4, data_output) model.summary() if not os.path.exists('model.png'): plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True) SVG(model_to_dot(model).create(prog='dot', format='svg')) #%% # model = keras.models.Sequential() # model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(width_image, width_image, 1), name='conv1')) # model.add(keras.layers.MaxPooling2D((2, 2), name='maxpool1')) # model.add(keras.layers.Conv2D(64, (3, 3), activation='relu', name='conv2')) # model.add(keras.layers.MaxPooling2D((2, 2), name='maxpool2')) # model.add(keras.layers.Conv2D(64, (3, 3), activation='relu', name='conv3')) # # """Let's display the architecture of our model so far.""" # # model.summary() # # """Above, you can see that the output of every Conv2D and MaxPooling2D layer is a 3D tensor of shape (height, width, channels). The width and height dimensions tend to shrink as you go deeper in the network. The number of output channels for each Conv2D layer is controlled by the first argument (e.g., 32 or 64). Typically, as the width and height shrink, you can afford (computationally) to add more output channels in each Conv2D layer.
def main(): args = read_args() dataset, dev_dataset, test_dataset = load_dataset(args.dataset_dir, args.batch_size) nlabels = dataset[TARGET_COL].unique().shape[0] # It's important to always use the same one-hot length one_hot_columns = { one_hot_col: dataset[one_hot_col].max() for one_hot_col in ['Gender', 'Color1', 'Color2', 'Color3', 'Type'] } embedded_columns = { embedded_col: dataset[embedded_col].max() + 1 for embedded_col in ['Breed1', 'Breed2', 'State'] } numeric_columns = ['Age', 'Fee'] # TODO (optional) put these three types of columns in the same dictionary with "column types" X_train, y_train = process_features(dataset, one_hot_columns, numeric_columns, embedded_columns) direct_features_input_shape = (X_train['direct_features'].shape[1], ) X_dev, y_dev = process_features(dev_dataset, one_hot_columns, numeric_columns, embedded_columns) X_test, y_test = process_features(test_dataset, one_hot_columns, numeric_columns, embedded_columns, test=True) # Create the tensorflow Dataset batch_size = 32 # TODO shuffle the train dataset! train_ds = tf.data.Dataset.from_tensor_slices( (X_train['direct_features'], y_train)).batch(batch_size) dev_ds = tf.data.Dataset.from_tensor_slices( (X_dev['direct_features'], y_dev)).batch(batch_size) test_ds = tf.data.Dataset.from_tensor_slices( X_test['direct_features']).batch(batch_size) # TODO: Build the Keras model model = Sequential([ Dense(10, input_shape=direct_features_input_shape, activation='relu', kernel_regularizer=regularizers.l2(0.001)), Dropout(0.3), Dense(5, activation='softmax') ]) print(model.summary()) SVG(model_to_dot(model, dpi=72).create(prog='dot', format='svg')) model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True), metrics=['accuracy']) # TODO: Fit the model model.fit(train_ds, epochs=args.epochs, validation_data=dev_ds, verbose=1) # Prediction predictions = model.predict(test_ds) print(predictions) mlflow.set_experiment(args.experiment_name) ''' with mlflow.start_run(nested=True): # Log model hiperparameters first mlflow.log_param('hidden_layer_size', args.hidden_layer_sizes) mlflow.log_param('embedded_columns', embedded_columns) mlflow.log_param('one_hot_columns', one_hot_columns) # mlflow.log_param('numerical_columns', numerical_columns) # Not using these yet mlflow.log_param('epochs', args.epochs) # Train # history = model.fit(train_ds, epochs=args.epochs) # TODO: analyze history to see if model converges/overfits # TODO: Evaluate the model, calculating the metrics. # Option 1: Use the model.evaluate() method. For this, the model must be # already compiled with the metrics. # performance = model.evaluate(X_test, y_test) loss, accuracy = 0, 0 # loss, accuracy = model.evaluate(dev_ds) print("*** Dev loss: {} - accuracy: {}".format(loss, accuracy)) mlflow.log_metric('loss', loss) mlflow.log_metric('accuracy', accuracy) # Option 2: Use the model.predict() method and calculate the metrics using # sklearn. We recommend this, because you can store the predictions if # you need more analysis later. Also, if you calculate the metrics on a # notebook, then you can compare multiple classifiers. predictions = 'No prediction yet' # predictions = model.predict(test_ds) # TODO: Convert predictions to classes # TODO: Save the results for submission # ... print(predictions) ''' print('All operations completed')
# Define Model model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=outputs, name='training_model') # Compile model.compile(optimizer=Adam(learning_rate=learning_late), loss=sparse_categorical_crossentropy) # Display Model Summary from IPython.display import SVG from tensorflow.keras.utils import model_to_dot # You need to install graphviz! (sudo apt install graphviz or brew install graphviz) SVG( model_to_dot(model, show_shapes=True, dpi=65).create(prog='dot', format='svg')) # In[16]: from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(keywords, sentences, test_size=0.33) # In[17]: history = model.fit([x_train, y_train[:, :-1]], y_train[:, 1:], epochs=epochs, batch_size=batch_size,
def main(settings: dict, log_dst: PathLike): source_features = Path(settings["use_csv"]) with open(project_dir / "setting.yml") as f: config = yaml.safe_load(f) img_dst = Path(config["destination"]) / "img" model_dst = Path(config["log_dst"]) / ".tmp_models" logdst = logdir(log_dst) df = pd.read_csv(source_features) n_data = len(df) labels = to_categorical(df["class"].values) weights = calc_loss_weight.calc_class_weight(np.argmax(labels, axis=1), settings["use_weight_method"]) images = load_images(map(lambda x: img_dst / f"{x}.png", df["accession"])) seq_lens = df["seq_len"].values.reshape([-1, 1]) at_gc_rates = df["at_gc_rate"].values.reshape([-1, 1]) atgc = np.stack([df["A"].values, df["T"].values, df["G"].values, df["T"].values]).T if settings["seq_len"]["enable"]: if settings["seq_len"]["rescale"]: if settings["seq_len"]["way"] == "standardization": scaler = StandardScaler() scaler.fit_transform(seq_lens) settings["seq_len"]["mean"] = float(scaler.mean_) settings["seq_len"]["var"] = float(scaler.var_) elif settings["seq_len"]["way"] == "normalization": seq_lens = minmax_scale(seq_lens) settings["seq_len"]["max"] = int(max(seq_lens)) settings["seq_len"]["min"] = int(min(seq_lens)) else: seq_lens = np.zeros(seq_lens.shape) if settings["atgc_rate"]["enable"]: if settings["atgc_rate"]["rescale"]: if settings["atgc_rate"]["way"] == "standardization": scaler = StandardScaler() scaler.fit_transform(at_gc_rates) settings["atgc_rate"]["mean"] = float(scaler.mean_) settings["atgc_rate"]["var"] = float(scaler.var_) elif settings["atgc_rate"]["way"] == "normalization": at_gc_rates = minmax_scale(at_gc_rates) settings["atgc_rate"]["max"] = float(max(at_gc_rates)) settings["atgc_rate"]["min"] = float(min(at_gc_rates)) else: at_gc_rates = np.zeros(at_gc_rates.shape) if settings["atgc"]["enable"]: if settings["atgc"]["rescale"]: if settings["atgc"]["way"] == "standardization": scaler = StandardScaler() scaler.fit_transform(atgc) settings["atgc"]["mean"] = float(scaler.mean_) settings["atgc"]["var"] = float(scaler.var_) elif settings["atgc"]["way"] == "normalization": atgc = minmax_scale(atgc) settings["atgc"]["max"] = float(max(atgc)) settings["atgc"]["min"] = float(min(atgc)) else: atgc = np.zeros(atgc.shape) n_class = len(labels[0]) skf = StratifiedKFold(settings["KFold"]) raw_labels = df["class"].values study_log = {"acc": [], "f1": []} for i, (train_index, test_index) in enumerate(skf.split(images, df["raw_class"].values), 1): trial_dst = logdst / f"trial_{i}" trial_dst.mkdir() ml_model = model.construct_model(n_class) model.show_model(ml_model, model_dst / "model.pdf") model_to_dot(ml_model, show_shapes=True).write(str(trial_dst / "model.svg"), format="svg") train_images, test_images = images[train_index], images[test_index] train_labels, test_labels = labels[train_index], labels[test_index] train_seq_lens, test_seq_lens = seq_lens[train_index], seq_lens[test_index] train_at_gc_rates, test_at_gc_rates = at_gc_rates[train_index], at_gc_rates[ test_index] train_atgc, test_atgc = atgc[train_index], atgc[test_index] test_raw_labels = raw_labels[test_index] csv_log = CSVLogger(trial_dst / "logger.csv") tensor_board = TensorBoard(log_dir=trial_dst, write_graph=False, write_images=True, histogram_freq=1) f1cb = callbacks.F1Callback_( ml_model, [test_images, test_seq_lens, test_atgc, test_at_gc_rates], test_labels) history = History() checkpoint = ModelCheckpoint(Path(config["log_dst"]) / ".tmp_models" / "model_{epoch:02d}.hdf5", monitor="val_loss", save_weights_only=True) if n_class == 2: ml_model.compile(loss=[focal_loss.binary_focal_loss(alpha=.25, gamma=2)], metrics=["accuracy"], optimizer=Adam()) else: ml_model.compile(loss=[ focal_loss.categorical_focal_loss(alpha=[[.25 for _ in range(n_class)]], gamma=2) ], metrics=["accuracy"], optimizer=Adam()) # if n_class == 2: # ml_model.compile(loss="binary_crossentropy", # metrics=["accuracy"], # optimizer=Adam()) # else: # ml_model.compile(loss="categorical_crossentropy", # metrics=["accuracy"], # optimizer=Adam()) history = ml_model.fit( [train_images, train_seq_lens, train_atgc, train_at_gc_rates], train_labels, validation_data=([test_images, test_seq_lens, test_atgc, test_at_gc_rates], test_labels), epochs=100, batch_size=settings["batch"], callbacks=[csv_log, tensor_board, f1cb, checkpoint], class_weight=weights) # study_log[f"trial_{i}"] = history.history.copy() # TODO # f1scoreもプロットする # f1s = f1cb.f1s history.history["f1score"] = f1cb.f1s visualize.visualize_history(history.history, "study_log", trial_dst) # load best f1 score model ml_model.load_weights(logdst.parent / ".tmp_models" / f"model_{np.argmax(f1cb.f1s) + 1:02}.hdf5") loss, acc, *_ = ml_model.evaluate( [test_images, test_seq_lens, test_atgc, test_at_gc_rates], test_labels, verbose=1) pred_labels = np.argmax(ml_model.predict( [test_images, test_seq_lens, test_atgc, test_at_gc_rates]), axis=1) test_labels = np.argmax(test_labels, axis=1) settings["results"].append({ f"trial_{i}": { "Accuracy": float(acc), "F1 score": float(max(f1cb.f1s)) # "micro_f1": float(f1_score(test_labels, pred_labels, average="micro")) } }) study_log["acc"].append(float(acc)) study_log["f1"].append(float(max(f1cb.f1s))) visualize.plot_cmx(test_labels, pred_labels, get_sorted_class(df["class"].values), title="cmx", dst=trial_dst) with open(trial_dst / "report.txt", "w") as f: print(classification_report(test_labels, pred_labels, target_names=get_sorted_class(raw_labels), zero_division=0), file=f) with open(logdst / "weight.json", "w") as f: json.dump( {str(k): weights[i] for i, k in enumerate(get_sorted_class(raw_labels))}, f, indent=2) settings["average"] = { "Accuracy": sum(study_log["acc"]) / len(study_log["acc"]), "F1 score": sum(study_log["f1"]) / len(study_log["f1"]) } with open(logdst / "status.json", "w") as f: json.dump(settings, f, indent=2, ensure_ascii=False) visualize.visualize_all_cmxs( settings["results"], [logdst / f"trial_{i}" / "cmx.png" for i in range(1, settings["KFold"] + 1)], logdst)
D.add(Conv2D(512, (5, 5), padding='same')) D.add(LeakyReLU(0.2)) D.add(MaxPooling2D(pool_size=(2, 2), strides=2)) D.add(Dropout(0.3)) D.add(Flatten()) D.add(Dense(256)) D.add(LeakyReLU(0.2)) D.add(Dropout(0.3)) D.add(Dense(1, activation='sigmoid')) adam = Adam(lr=0.0002, beta_1=0.5) D.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) # %matplotlib inline SVG(model_to_dot(D, show_shapes=True).create(prog='dot', format='svg')) # Commented out IPython magic to ensure Python compatibility. G = Sequential() G.add(Dense(512, input_dim=100)) G.add(LeakyReLU(0.2)) G.add(Dense(128 * 7 * 7)) G.add(LeakyReLU(0.2)) G.add(BatchNormalization()) G.add(Reshape((128, 7, 7), input_shape=(128 * 7 * 7,))) G.add(UpSampling2D(size=(2, 2))) G.add(Conv2D(64, (5, 5), padding='same', activation='tanh')) G.add(UpSampling2D(size=(2, 2))) G.add(Conv2D(1, (5, 5), padding='same', activation='tanh')) adam = Adam(lr=0.0002, beta_1=0.5)