def get_head_concat_model(DATA): # shape:(sequence长度, ) # first input input_creative_id = Input(shape=(None,), name='creative_id') x1 = Embedding(input_dim=NUM_creative_id+1, output_dim=128, weights=[DATA['creative_id_emb']], trainable=args.not_train_embedding, input_length=LEN_creative_id, mask_zero=True)(input_creative_id) input_ad_id = Input(shape=(None,), name='ad_id') x2 = Embedding(input_dim=NUM_ad_id+1, output_dim=128, weights=[DATA['ad_id_emb']], trainable=args.not_train_embedding, input_length=LEN_ad_id, mask_zero=True)(input_ad_id) input_product_id = Input(shape=(None,), name='product_id') x3 = Embedding(input_dim=NUM_product_id+1, output_dim=128, weights=[DATA['product_id_emb']], trainable=args.not_train_embedding, input_length=LEN_product_id, mask_zero=True)(input_product_id) input_advertiser_id = Input(shape=(None,), name='advertiser_id') x4 = Embedding(input_dim=NUM_advertiser_id+1, output_dim=128, weights=[DATA['advertiser_id_emb']], trainable=args.not_train_embedding, input_length=LEN_advertiser_id, mask_zero=True)(input_advertiser_id) input_industry = Input(shape=(None,), name='industry') x5 = Embedding(input_dim=NUM_industry+1, output_dim=128, weights=[DATA['industry_emb']], trainable=args.not_train_embedding, input_length=LEN_industry, mask_zero=True)(input_industry) input_product_category = Input(shape=(None,), name='product_category') x6 = Embedding(input_dim=NUM_product_category+1, output_dim=128, weights=[DATA['product_category_emb']], trainable=args.not_train_embedding, input_length=LEN_product_category, mask_zero=True)(input_product_category) x = Concatenate(axis=2)([x1, x2, x3, x4, x5, x6]) for _ in range(args.num_lstm): x = Bidirectional(LSTM(128, return_sequences=True))(x) x = layers.GlobalMaxPooling1D()(x) # x = layers.GlobalAvaregePooling1D()(x) output_gender = Dense(2, activation='softmax', name='gender')(x) output_age = Dense(10, activation='softmax', name='age')(x) model = Model( [ input_creative_id, input_ad_id, input_product_id, input_advertiser_id, input_industry, input_product_category ], [ output_gender, output_age ] ) model.compile( optimizer=optimizers.Adam(1e-4), loss={'gender': losses.CategoricalCrossentropy(from_logits=False), 'age': losses.CategoricalCrossentropy(from_logits=False)}, loss_weights=[0.5, 0.5], metrics=['accuracy']) model.summary() return model
def infer_loss(self): if not self.num_classes: return None if self.num_classes == 2 or self.multi_label: return losses.BinaryCrossentropy() return losses.CategoricalCrossentropy()
def main(train_dir): # Hyper-parameters train_epochs = 200 batch_size = 2 learning_rate = 1e-5 beta_1 = 0.9 # Model folder and names model_name = "{}px_{}py_{}e_{}b_{}lr_{}b1".format(HEIGHT, WIDTH, train_epochs, batch_size, learning_rate, beta_1) model_file_name = "{}.h5".format(model_name) model_dir = os.path.join(train_dir, model_name) # Getting filenames from the dataset image_names, segmentation_names = image_filenames('data') # Divide into train and test set. len_data = len(image_names) train_start_idx, train_end_idx = (0, len_data // 100 * 80) val_start_idx, val_end_idx = (len_data // 100 * 80, len_data - 1) preprocess_train = preprocess preprocess_val = preprocess # Get image tensors from the filenames train_set = dataset_from_filenames( image_names[train_start_idx:train_end_idx], segmentation_names[train_start_idx:train_end_idx], preprocess=preprocess_train, batch_size=batch_size) # Get the validation tensors val_set = dataset_from_filenames( image_names[val_start_idx:val_end_idx], segmentation_names[val_start_idx:val_end_idx], batch_size=batch_size, preprocess=preprocess_val, shuffle=False) model = unet.unet((HEIGHT, WIDTH, 3), SEGMENTATION_CLASSES) loss_fn = losses.CategoricalCrossentropy() optimizer = optimizers.Adam(lr=learning_rate, beta_1=beta_1) print("Summaries are written to '%s'." % model_dir) writer = tf.summary.create_file_writer(model_dir, flush_millis=3000) summary_interval = 10 train_loss = metrics.Mean() train_iou = metrics.MeanIoU(num_classes=SEGMENTATION_CLASSES) train_precision = metrics.Precision() train_recall = metrics.Recall() train_accuracy = metrics.CategoricalAccuracy() val_loss = metrics.Mean() val_iou = metrics.MeanIoU(num_classes=SEGMENTATION_CLASSES) val_precision = metrics.Precision() val_recall = metrics.Recall() val_accuracy = metrics.CategoricalAccuracy() step = 0 start_training = start = time.time() for epoch in range(train_epochs): print("Training epoch: %d" % epoch) for image, y in train_set: with tf.GradientTape() as tape: y_pred = model(image) loss = loss_fn(y, y_pred) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) # Update metrics and step train_loss.update_state(loss) train_iou.update_state(y, y_pred) train_precision.update_state(y, y_pred) train_recall.update_state(y, y_pred) train_accuracy.update_state(y, y_pred) step += 1 activation = 1 / SEGMENTATION_CLASSES if step % summary_interval == 0: duration = time.time() - start print("step %d. sec/batch: %g. Train loss: %g" % (step, duration / summary_interval, train_loss.result().numpy())) # Write summaries to TensorBoard with writer.as_default(): tf.summary.scalar("train_loss", train_loss.result(), step=step) tf.summary.scalar("train_iou", train_iou.result(), step=step) tf.summary.scalar("train_precision", train_precision.result(), step=step) tf.summary.scalar("train_recall", train_recall.result(), step=step) tf.summary.scalar("train_accuracy", train_accuracy.result(), step=step) vis = vis_mask(image, y_pred >= activation) tf.summary.image("train_image", vis, step=step) # Reset metrics and time train_loss.reset_states() train_iou.reset_states() train_precision.reset_states() train_recall.reset_states() train_accuracy.reset_states() start = time.time() # Do validation after each epoch for i, (image, y) in enumerate(val_set): y_pred = model(image) loss = loss_fn(y, y_pred) val_loss.update_state(loss) val_iou.update_state(y, y_pred) val_precision.update_state(y, y_pred) val_recall.update_state(y, y_pred) val_accuracy.update_state(y, y_pred) with writer.as_default(): vis = vis_mask(image, y_pred >= activation) tf.summary.image("val_image_batch_%d" % i, vis, step=step, max_outputs=batch_size) with writer.as_default(): tf.summary.scalar("val_loss", val_loss.result(), step=step) tf.summary.scalar("val_iou", val_iou.result(), step=step) tf.summary.scalar("val_precision", val_precision.result(), step=step) tf.summary.scalar("val_recall", val_recall.result(), step=step) tf.summary.scalar("val_accuracy", val_accuracy.result(), step=step) val_loss.reset_states() val_iou.reset_states() val_precision.reset_states() val_recall.reset_states() val_accuracy.reset_states() print("Finished training %d epochs in %g minutes." % (train_epochs, (time.time() - start_training) / 60)) # save a model which we can later load by tf.keras.models.load_model(model_path) model_path = os.path.join(model_dir, model_file_name) print("Saving model to '%s'." % model_path) model.save(model_path) print(model.summary())
class CustomLoss(losses_module.Loss): pass class CustomMetric(metrics_module.AUC): pass @pytest.mark.parametrize( "obj", [ "categorical_crossentropy", "CategoricalCrossentropy", losses_module.categorical_crossentropy, losses_module.CategoricalCrossentropy, losses_module.CategoricalCrossentropy(), ], ) def test_loss_invariance(obj): """Test to make sure loss_name returns same string no matter which object is passed (str, function, class, type)""" assert loss_name(obj) == "categorical_crossentropy" @pytest.mark.parametrize("obj", [CustomLoss, CustomLoss()]) def test_custom_loss(obj): assert loss_name(obj) == "custom_loss" @pytest.mark.parametrize( "obj",
''' mnist = datasets.mnist (x_train, t_train), (x_test, t_test) = mnist.load_data() x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32) x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32) t_train = np.eye(10)[t_train].astype(np.float32) t_test = np.eye(10)[t_test].astype(np.float32) ''' 2. モデルの構築 ''' model = DNN(200, 10) ''' 3. モデルの学習 ''' criterion = losses.CategoricalCrossentropy() optimizer = optimizers.SGD(learning_rate=0.01) train_loss = metrics.Mean() train_acc = metrics.CategoricalAccuracy() def compute_loss(t, y): return criterion(t, y) def train_step(x, t): with tf.GradientTape() as tape: preds = model(x) loss = compute_loss(t, preds) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss) train_acc(t, preds)
def __init__(self): super().__init__( loss=losses.CategoricalCrossentropy(), optimizer=optimizers.Adam(0.001), metrics=[metrics.Accuracy()], )
class single_in_single_out(tf.keras.Model): # 自定义网络 def __init__(self, number_classes=10): super(single_in_single_out, self).__init__(name="my_model") self.number_classes = number_classes self.dense_1 = layers.Dense(64, activation="relu") self.dense_2 = layers.Dense(number_classes, activation="softmax") def call(self, inputs): x = self.dense_1(inputs) x = self.dense_2(x) return x model = single_in_single_out(number_classes=10) loss_object = losses.CategoricalCrossentropy() optimizer = optimizers.SGD(1e-3) data = random_sample((1000, 64)) labels = random_sample((1000, 10)) batch_size = 64 train_dataset = tf.data.Dataset.from_tensor_slices( (data, labels)) # 建立一一对应的数据集对象 train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) epochs = 5 for epoch in range(epochs): print("Start of epoch %d" % (epoch, )) for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
def __init__(self, model_name, klass_name, embedding_matrix, embedding_size=EMBEDDING_SIZE, input_length=MAX_DOCUMENT_LENGTH): self.klass_name = klass_name self.model = models.Sequential(name=f'{model_name}-model') self.model.add( layers.Embedding( embedding_matrix.shape[0], embedding_size, input_length=input_length, embeddings_initializer=initializers.Constant(embedding_matrix), trainable=False)) # model.add(layers.Embedding(len(tokenizer.word_index)+1, embedding_size, input_length=MAX_DOCUMENT_LENGTH)) # for trainable embedding layer self.model.add(layers.Dropout(0.1)) self.model.add( layers.Convolution1D( 16, kernel_size=4, activation='relu', strides=1, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Convolution1D( 12, kernel_size=8, activation='relu', strides=2, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Convolution1D( 8, kernel_size=16, activation='relu', strides=2, padding='same', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add(layers.Flatten()) self.model.add( layers.Dense(128, activation='relu', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Dense(64, activation='relu', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.add(layers.Dropout(0.5)) self.model.add( layers.Dense(2, activation='softmax', kernel_constraint=constraints.MaxNorm(max_value=3))) self.model.compile( optimizer=optimizers.Adam(), #learning_rate=0.001), loss=losses.CategoricalCrossentropy(from_logits=False), metrics=[ metrics.CategoricalAccuracy(), metrics.Recall(class_id=0), metrics.Precision(class_id=0) ])
img = cv2.imread("PokemonDataset/" + row[3], cv2.IMREAD_GRAYSCALE) img = cv2.resize(img, (32, 32)) x_train_l.append(img.reshape((32, 32, 1))) y_new = np.zeros(493) y_new[row[4] - 1] = 1 y_train_l.append(y_new) x_train = np.array(x_train_l) y_train = np.array(y_train_l) model = models.Sequential([ layers.Conv2D(256, (3, 3), input_shape=(32, 32, 1), padding="same", activation="relu"), layers.MaxPool2D((2, 2), padding="same"), layers.Conv2D(128, (3, 3), padding="same", activation="relu"), layers.MaxPool2D((2, 2), padding="same"), layers.Conv2D(64, (3, 3), padding="same", activation="relu"), layers.MaxPool2D((2, 2), padding="same"), layers.Conv2D(32, (3, 3), padding="same", activation="relu"), layers.Flatten(), layers.Dense(200), layers.Dense(493, activation="softmax") ]) model.compile(loss=losses.CategoricalCrossentropy(), metrics=["acc"]) history = model.fit(np.divide(x_train, 255), y_train, epochs=50) model.save("best_classifier_493.h5", save_format="h5")
x1 = layers.GlobalMaxPooling2D()(x1) x2 = layers.Conv1D(3, 3)(timeseries_input) x2 = layers.GlobalMaxPooling1D()(x2) x = layers.concatenate([x1, x2]) score_output = layers.Dense(1, name="score_output")(x) class_output = layers.Dense(5, name="class_output")(x) # 模型(对象)构建 model = tf.keras.Model(inputs=[image_input, timeseries_input], outputs=[score_output, class_output]) loss_score_object = losses.MeanSquaredError() loss_class_object = losses.CategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam() # 数据构建 img_data = random_sample(size=(100, 32, 32, 3)) ts_data = random_sample(size=(100, 20, 10)) score_targets = random_sample(size=(100, 1)) class_targets = random_sample(size=(100, 5)) # 使用Tape进行一步参数更新 with tf.GradientTape() as tape: [score_predict, class_predict] = model({ "img_input": img_data, "ts_input": ts_data
def main(): dataset_path = './dataset/' train_dir = os.path.join(dataset_path, 'train') val_dir = os.path.join(dataset_path, 'validation') weights_path = "./model_weights/DenseNet.h5" width = height = 224 channel = 3 batch_size = 32 num_classes = 5 epochs = 20 lr = 0.0003 growth_rate = 12 reduction = 0.5 is_train = False if is_train: dropout_rate = 0.2 else: dropout_rate = None # 选择编号为0的GPU,如果不使用gpu则置为-1 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 这里的操作是让GPU动态分配内存不要将GPU的所有内存占满 gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # 数据读取 train_image, train_label = read_data(train_dir) val_image, val_label = read_data(val_dir) train_step = len(train_label) // batch_size val_step = len(val_label) // batch_size train_dataset = make_datasets(train_image, train_label, batch_size, mode='train') val_dataset = make_datasets(val_image, val_label, batch_size, mode='validation') # 模型搭建 model = DenseNet121(height, width, channel, num_classes, growth_rate=growth_rate, reduction=reduction, dropout_rate=dropout_rate) model.compile(loss=losses.CategoricalCrossentropy(from_logits=False), optimizer=optimizers.Adam(learning_rate=lr), metrics=["accuracy"]) if is_train: # 模型训练 model_train(model, train_dataset, val_dataset, epochs, train_step, val_step, weights_path) else: # 模型预测 model_predict(model, weights_path, height, width)
numberOfEpochs = int(input("Enter the number of epochs: ")) epochs.append(numberOfEpochs) batchSize = int(input("Enter the batch size: ")) batches.append(batchSize) learningRate = float(input("Enter the learning rate: ")) learning_rates.append(learningRate) print(hidden_units) print(epochs) print(batches) print(learning_rates) classification = build_model( model_file, fc_nodes) #construction of the classification_model classification.compile(loss=losses.CategoricalCrossentropy(), optimizer=RMSprop(learning_rate=learningRate), metrics=[metrics.CategoricalAccuracy('accuracy')]) #classification.compile(loss=losses.CategoricalCrossentropy(), optimizer='adam',metrics=[metrics.CategoricalAccuracy('accuracy')]) FC_train = classification.fit(training_set, training_labels, validation_split=0.1, batch_size=batchSize, epochs=numberOfEpochs, verbose=1) print("The train of the fully connected layer with number of nodes ", fc_nodes, " is finished") print("It's time to train the whole model now") for layer in classification.layers: #ta layers toy encoder ginontai kai ayta trainable layer.trainable = True
def build_network(): # 先创建包含多网络层的列表 conv_layers = [ # Conv-Conv-Pooling 单元 1 # 64 个 3x3 卷积核, 输入输出同大小 layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), # 高宽减半 layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv-Conv-Pooling 单元 2,输出通道提升至 128,高宽大小减半 layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv-Conv-Pooling 单元 3,输出通道提升至 256,高宽大小减半 layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv-Conv-Pooling 单元 4,输出通道提升至 512,高宽大小减半 layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'), # Conv-Conv-Pooling 单元 5,输出通道提升至 512,高宽大小减半 layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu), layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same') ] fc_layers = [ layers.Flatten(), layers.Dense(256, activation=tf.nn.relu), layers.Dense(128, activation=tf.nn.relu), layers.Dense(10, activation=None), ] conv_layers.extend(fc_layers) network = Sequential(conv_layers) network.build(input_shape=[None, 32, 32, 3]) network.summary() network.compile( optimizer=optimizers.Adam(lr=1e-4), loss=losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'] # 设置测量指标为准确率 ) return network
def main(): # set GPU memory os.environ["CUDA_VISIBLE_DEVICES"] = "0" config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) path_1 = "../data/embedding_line" path_2 = "../data/embedding_lda" path_3 = "../data/features" embeddings_samples, ori_emb_samples, original_features, labels, ot, trips, pattern = fetch_features( path_1, path_2, path_3, -1) labels_one_hot = to_categorical(labels, num_classes=166) split = int(len(labels) * 0.9) train_X = [ embeddings_samples[:split], ori_emb_samples[:split], original_features[:split] ] train_y = labels_one_hot[:split] test_X = [ embeddings_samples[split:], ori_emb_samples[split:], original_features[split:] ] test_y = labels_one_hot[split:] ot_test = ot[split:] input_1 = Input(shape=embeddings_samples.shape[1:]) input_2 = Input(shape=ori_emb_samples.shape[1:]) layer_1 = MyLayer(1)([input_1, input_2]) input_3 = Input(shape=original_features.shape[1:]) final_feature = layers.concatenate([layer_1, input_3]) bn_final_feature = BatchNormalization(axis=1)(final_feature) dense1 = Dense(128, activation='relu')(bn_final_feature) dense2 = Dense(64, activation='relu')(dense1) dense3 = Dense(64, activation='relu')(dense2) conv1 = Conv1D(filters=128, kernel_size=1, activation='relu')(dense3) bn_conv1 = BatchNormalization()(conv1) # dense1 = Dense(128, activation='relu')(bn_conv1) conv2 = Conv1D(filters=64, kernel_size=1, activation='relu')(bn_conv1) bn_conv2 = BatchNormalization()(conv2) # dense2 = Dense(64, activation='relu')(bn_conv2) conv3 = Conv1D(filters=64, kernel_size=1, activation='relu')(bn_conv2) # dense3 = Dense(32, activation='relu')(conv3) # conv4 = Conv1D(filters=20, kernel_size=1, activation='relu')(conv3) # conv5 = Conv1D(filters=20, kernel_size=1, activation='relu')(conv4) conv6 = Conv1D(filters=1, kernel_size=1, activation='relu')(conv3) flaten_conv_output = tf.reshape(conv6, shape=(-1, 166)) # dense_1 = Dense(64, activation='relu')(flaten_conv_output) # dense_2 = Dense(64, activation='relu')(dense_1) # dense_3 = Dense(166, activation='relu')(dense_2) output = Softmax()(flaten_conv_output) model = Model(inputs=[input_1, input_2, input_3], outputs=output) model.summary() my_optimizer = optimizers.Adam(learning_rate=1e-5) my_loss = losses.CategoricalCrossentropy() model.compile(optimizer=my_optimizer, loss=my_loss, metrics=[metrics.categorical_accuracy]) history = model.fit(x=train_X, y=train_y, validation_split=0.2, shuffle=True, batch_size=512, epochs=100, verbose=2) plt.title('Model loss') plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper right') plt.show() plt.title('Model accuracy') plt.plot(history.history['categorical_accuracy']) plt.plot(history.history['val_categorical_accuracy']) plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() pred = model.predict(test_X) pred_argmax = np.argmax(pred, axis=1) true_argmax = np.argmax(test_y, axis=1) count = sum([ 1 if pred_argmax[i] == true_argmax[i] else 0 for i in range(len(pred)) ]) print("\nTest %d samples, accuracy: %.2f%%" % (len(pred), count / len(pred) * 100)) ot_acc, ot_num = [0] * 12, [0] * 12 for i in range(len(pred)): ot_num[ot_test[i]] += 1 if pred_argmax[i] == true_argmax[i]: ot_acc[ot_test[i]] += 1 ot_acc = [ round(ot_acc[i] / ot_num[i], 4) if ot_num[i] else 0 for i in range(len(ot_num)) ] print("ot-acc distribution") print(ot_acc)
def build_and_compile(self, local_model_name, local_settings, local_hyperparameters): try: # keras,tf session/random seed reset/fix # kb.clear_session() # tf.compat.v1.reset_default_graph() np.random.seed(11) tf.random.set_seed(2) # load hyperparameters units_layer_1 = local_hyperparameters['units_layer_1'] units_layer_2 = local_hyperparameters['units_layer_2'] units_layer_3 = local_hyperparameters['units_layer_3'] units_layer_4 = local_hyperparameters['units_layer_4'] units_dense_layer_4 = local_hyperparameters['units_dense_layer_4'] units_final_layer = local_hyperparameters['units_final_layer'] activation_1 = local_hyperparameters['activation_1'] activation_2 = local_hyperparameters['activation_2'] activation_3 = local_hyperparameters['activation_3'] activation_4 = local_hyperparameters['activation_4'] activation_dense_layer_4 = local_hyperparameters[ 'activation_dense_layer_4'] activation_final_layer = local_hyperparameters[ 'activation_final_layer'] dropout_layer_1 = local_hyperparameters['dropout_layer_1'] dropout_layer_2 = local_hyperparameters['dropout_layer_2'] dropout_layer_3 = local_hyperparameters['dropout_layer_3'] dropout_layer_4 = local_hyperparameters['dropout_layer_4'] dropout_dense_layer_4 = local_hyperparameters[ 'dropout_dense_layer_4'] input_shape_y = local_hyperparameters['input_shape_y'] input_shape_x = local_hyperparameters['input_shape_x'] nof_channels = local_hyperparameters['nof_channels'] stride_y_1 = local_hyperparameters['stride_y_1'] stride_x_1 = local_hyperparameters['stride_x_1'] kernel_size_y_1 = local_hyperparameters['kernel_size_y_1'] kernel_size_x_1 = local_hyperparameters['kernel_size_x_1'] kernel_size_y_2 = local_hyperparameters['kernel_size_y_2'] kernel_size_x_2 = local_hyperparameters['kernel_size_x_2'] kernel_size_y_3 = local_hyperparameters['kernel_size_y_3'] kernel_size_x_3 = local_hyperparameters['kernel_size_x_3'] kernel_size_y_4 = local_hyperparameters['kernel_size_y_4'] kernel_size_x_4 = local_hyperparameters['kernel_size_x_4'] pool_size_y_1 = local_hyperparameters['pool_size_y_1'] pool_size_x_1 = local_hyperparameters['pool_size_x_1'] pool_size_y_2 = local_hyperparameters['pool_size_y_2'] pool_size_x_2 = local_hyperparameters['pool_size_x_2'] pool_size_y_3 = local_hyperparameters['pool_size_y_3'] pool_size_x_3 = local_hyperparameters['pool_size_x_3'] pool_size_y_4 = local_hyperparameters['pool_size_y_4'] pool_size_x_4 = local_hyperparameters['pool_size_x_4'] optimizer_function = local_hyperparameters['optimizer'] optimizer_learning_rate = local_hyperparameters['learning_rate'] epsilon_adam = local_hyperparameters['epsilon_adam'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam( learning_rate=optimizer_learning_rate, epsilon=epsilon_adam) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) elif optimizer_function == 'sgd': optimizer_function = optimizers.SGD(optimizer_learning_rate) elif optimizer_function == 'rmsp': optimizer_function = optimizers.RMSprop( optimizer_learning_rate, epsilon=epsilon_adam) optimizer_function = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer_function) loss_1 = local_hyperparameters['loss_1'] loss_2 = local_hyperparameters['loss_2'] loss_3 = local_hyperparameters['loss_3'] label_smoothing = local_hyperparameters['label_smoothing'] losses_list = [] union_settings_losses = [loss_1, loss_2, loss_3] if 'CategoricalCrossentropy' in union_settings_losses: losses_list.append( losses.CategoricalCrossentropy( label_smoothing=label_smoothing)) if 'BinaryCrossentropy' in union_settings_losses: losses_list.append(losses.BinaryCrossentropy()) if 'CategoricalHinge' in union_settings_losses: losses_list.append(losses.CategoricalHinge()) if 'KLD' in union_settings_losses: losses_list.append(losses.KLDivergence()) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) if 'customized_loss_t2' in union_settings_losses: losses_list.append(customized_loss_t2) if "Huber" in union_settings_losses: losses_list.append(losses.Huber()) metrics_list = [] metric1 = local_hyperparameters['metrics1'] metric2 = local_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'auc_roc' in union_settings_metrics: metrics_list.append(metrics.AUC()) if 'customized_metric_auc_roc' in union_settings_metrics: metrics_list.append(customized_metric_auc_roc()) if 'CategoricalAccuracy' in union_settings_metrics: metrics_list.append(metrics.CategoricalAccuracy()) if 'CategoricalHinge' in union_settings_metrics: metrics_list.append(metrics.CategoricalHinge()) if 'BinaryAccuracy' in union_settings_metrics: metrics_list.append(metrics.BinaryAccuracy()) if local_settings['use_efficientNetB2'] == 'False': type_of_model = '_custom' if local_hyperparameters['regularizers_l1_l2_1'] == 'True': l1_1 = local_hyperparameters['l1_1'] l2_1 = local_hyperparameters['l2_1'] activation_regularizer_1 = regularizers.l1_l2(l1=l1_1, l2=l2_1) else: activation_regularizer_1 = None if local_hyperparameters['regularizers_l1_l2_2'] == 'True': l1_2 = local_hyperparameters['l1_2'] l2_2 = local_hyperparameters['l2_2'] activation_regularizer_2 = regularizers.l1_l2(l1=l1_2, l2=l2_2) else: activation_regularizer_2 = None if local_hyperparameters['regularizers_l1_l2_3'] == 'True': l1_3 = local_hyperparameters['l1_3'] l2_3 = local_hyperparameters['l2_3'] activation_regularizer_3 = regularizers.l1_l2(l1=l1_3, l2=l2_3) else: activation_regularizer_3 = None if local_hyperparameters['regularizers_l1_l2_4'] == 'True': l1_4 = local_hyperparameters['l1_4'] l2_4 = local_hyperparameters['l2_4'] activation_regularizer_4 = regularizers.l1_l2(l1=l1_4, l2=l2_4) else: activation_regularizer_4 = None if local_hyperparameters[ 'regularizers_l1_l2_dense_4'] == 'True': l1_dense_4 = local_hyperparameters['l1_dense_4'] l2_dense_4 = local_hyperparameters['l2_dense_4'] activation_regularizer_dense_layer_4 = regularizers.l1_l2( l1=l1_dense_4, l2=l2_dense_4) else: activation_regularizer_dense_layer_4 = None # building model classifier_ = tf.keras.models.Sequential() # first layer classifier_.add( layers.Input(shape=(input_shape_y, input_shape_x, nof_channels))) # classifier_.add(layers.ZeroPadding2D(padding=((0, 1), (0, 1)))) classifier_.add( layers.Conv2D( units_layer_1, kernel_size=(kernel_size_y_1, kernel_size_x_1), strides=(stride_y_1, stride_x_1), activity_regularizer=activation_regularizer_1, activation=activation_1, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_1)) # LAYER 1.5 classifier_.add( layers.Conv2D( units_layer_1, kernel_size=(kernel_size_y_1, kernel_size_x_1), input_shape=(input_shape_y, input_shape_x, nof_channels), strides=(stride_y_1, stride_x_1), activity_regularizer=activation_regularizer_1, activation=activation_1, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_1)) # second layer classifier_.add( layers.Conv2D( units_layer_2, kernel_size=(kernel_size_y_2, kernel_size_x_2), activity_regularizer=activation_regularizer_2, activation=activation_2, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_2)) # LAYER 2.5 classifier_.add( layers.Conv2D( units_layer_2, kernel_size=(kernel_size_y_2, kernel_size_x_2), activity_regularizer=activation_regularizer_2, activation=activation_2, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_2)) # third layer classifier_.add( layers.Conv2D( units_layer_3, kernel_size=(kernel_size_y_3, kernel_size_x_3), activity_regularizer=activation_regularizer_3, activation=activation_3, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_3)) # LAYER 3.5 classifier_.add( layers.Conv2D( units_layer_3, kernel_size=(kernel_size_y_3, kernel_size_x_3), activity_regularizer=activation_regularizer_3, activation=activation_3, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_3)) # fourth layer classifier_.add( layers.Conv2D( units_layer_4, kernel_size=(kernel_size_y_4, kernel_size_x_4), activity_regularizer=activation_regularizer_4, activation=activation_4, padding='same', kernel_initializer=tf.keras.initializers. VarianceScaling(scale=2., mode='fan_out', distribution='truncated_normal'))) classifier_.add(layers.BatchNormalization(axis=-1)) classifier_.add(layers.Activation(tf.keras.activations.swish)) classifier_.add(layers.GlobalAveragePooling2D()) classifier_.add(layers.Dropout(dropout_layer_4)) # Full connection and final layer classifier_.add( layers.Dense(units=units_final_layer, activation=activation_final_layer)) # Compile model classifier_.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) elif local_settings['use_efficientNetB2'] == 'True': type_of_model = '_EfficientNetB2' # pretrained_weights = ''.join([local_settings['models_path'], # local_hyperparameters['weights_for_training_efficientnetb2']]) classifier_pretrained = tf.keras.applications.EfficientNetB2( include_top=False, weights='imagenet', input_tensor=None, input_shape=(input_shape_y, input_shape_x, 3), pooling=None, classifier_activation=None) # classifier_pretrained.save_weights(''.join([local_settings['models_path'], # 'pretrained_efficientnetb2_weights.h5'])) # # classifier_receptor = tf.keras.applications.EfficientNetB2(include_top=False, weights=None, # input_tensor=None, # input_shape=(input_shape_y, # input_shape_x, 1), # pooling=None, # classifier_activation=None) # # classifier_receptor.load_weights(''.join([local_settings['models_path'], # 'pretrained_efficientnetb2_weights.h5']), by_name=True) # # classifier_pretrained = classifier_receptor if local_settings['nof_classes'] == 2 or local_hyperparameters[ 'use_bias_always'] == 'True': # if two classes, log(pos/neg) = log(0.75/0.25) = 0.477121254719 bias_initializer = tf.keras.initializers.Constant( local_hyperparameters['bias_initializer']) else: # assuming balanced classes... bias_initializer = tf.keras.initializers.Constant(0) effnb2_model = models.Sequential(classifier_pretrained) effnb2_model.add(layers.GlobalAveragePooling2D()) effnb2_model.add(layers.Dropout(dropout_dense_layer_4)) # effnb2_model.add(layers.Dense(units=units_dense_layer_4, activation=activation_dense_layer_4, # kernel_initializer=tf.keras.initializers.VarianceScaling(scale=0.333333333, # mode='fan_out', # distribution='uniform'), # bias_initializer=bias_initializer)) # effnb2_model.add(layers.Dropout(dropout_dense_layer_4)) effnb2_model.add( layers.Dense(units_final_layer, activation=activation_final_layer, kernel_initializer=tf.keras.initializers. VarianceScaling(scale=0.333333333, mode='fan_out', distribution='uniform'), bias_initializer=bias_initializer)) classifier_ = effnb2_model if local_settings[ 'use_local_pretrained_weights_for_retraining'] != 'False': classifier_.load_weights(''.join([ local_settings['models_path'], local_settings[ 'use_local_pretrained_weights_for_retraining'] ])) for layer in classifier_.layers[0].layers: layer.trainable = True # if 'excite' in layer.name: # layer.trainable = True # if 'top_conv' in layer.name: # layer.trainable = True # if 'project_conv' in layer.name: # layer.trainable = True classifier_.build(input_shape=(input_shape_y, input_shape_x, nof_channels)) classifier_.compile(optimizer=optimizer_function, loss=losses_list, metrics=metrics_list) # Summary of model classifier_.summary() # save_model classifier_json = classifier_.to_json() with open(''.join([local_settings['models_path'], local_model_name, type_of_model, '_classifier_.json']), 'w') \ as json_file: json_file.write(classifier_json) json_file.close() classifier_.save(''.join([ local_settings['models_path'], local_model_name, type_of_model, '_classifier_.h5' ])) classifier_.save(''.join([ local_settings['models_path'], local_model_name, type_of_model, '/' ]), save_format='tf') print('model architecture saved') # output png and pdf with model, additionally saves a json file model_name_analyzed.json if local_settings['model_analyzer'] == 'True': model_architecture = model_structure() model_architecture_review = model_architecture.analize( ''.join( [local_model_name, type_of_model, '_classifier_.h5']), local_settings, local_hyperparameters) except Exception as e: print('error in build or compile of customized model') print(e) classifier_ = None logger.error(str(e), exc_info=True) return classifier_
def setup_network(self, lr=1e-4): self.model.add( layers.Conv2D(64, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last", input_shape=(self.width, self.height, self.channel))) self.model.add( layers.Conv2D(64, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add(layers.MaxPooling2D((2, 2))) self.model.add( layers.Conv2D(128, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add(layers.MaxPooling2D((2, 2))) self.model.add( layers.Conv2D(128, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add(layers.MaxPooling2D((2, 2))) self.model.add( layers.Conv2D(256, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add( layers.Conv2D(256, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add(layers.MaxPooling2D((2, 2))) self.model.add( layers.Conv2D(512, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add( layers.Conv2D(512, (3, 3), strides=(1, 1), activation="relu", padding="same", kernel_initializer="uniform", data_format="channels_last")) self.model.add(layers.MaxPooling2D((2, 2))) self.model.add(layers.Flatten()) self.model.add( layers.Dense(4096, activation="relu", kernel_regularizer=regularizers.l2(0.1))) self.model.add(layers.Dense(4096, activation="relu")) self.model.add(layers.Dense(1000, activation="relu")) self.model.add(layers.Dense(2, activation="softmax")) self.model.compile(optimizer=optimizers.Adam(lr=lr), loss=losses.CategoricalCrossentropy(), metrics=['accuracy']) print(self.model.summary())
def main(): # set GPU memory os.environ["CUDA_VISIBLE_DEVICES"] = "0" config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) X, y, ot = fetch_data("../data/features") y = to_categorical(y, num_classes=166) print(X.shape, y.shape) X_train, X_test, y_train, y_test, _, ot_test = train_test_split( X, y, ot, test_size=0.1) # print(X_train.shape, y_train.shape) input_1 = Input(shape=X.shape[1:]) dense = Dense(128, activation='relu')(input_1) dense_1 = Dense(64, activation='relu')(dense) dense_2 = Dense(64, activation='relu')(dense_1) dense_3 = Dense(166, activation='relu')(dense_2) output = Softmax()(dense_3) model = Model(inputs=input_1, outputs=output) model.summary() my_optimizer = optimizers.Adam(learning_rate=1e-5) my_loss = losses.CategoricalCrossentropy() model.compile(optimizer=my_optimizer, loss=my_loss, metrics=[metrics.categorical_accuracy]) history = model.fit(X_train, y_train, batch_size=512, validation_split=0.2, shuffle=True, epochs=100, verbose=2) plt.title('Model loss') plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper right') plt.show() plt.title('Model accuracy') plt.plot(history.history['categorical_accuracy']) plt.plot(history.history['val_categorical_accuracy']) plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Val'], loc='upper left') plt.show() pred = model.predict(x=X_test) pred_argmax = np.argmax(pred, axis=1) true_argmax = np.argmax(y_test, axis=1) count = sum([ 1 if pred_argmax[i] == true_argmax[i] else 0 for i in range(len(pred)) ]) print("\nTest samples: %d, accuracy: %.2f%%" % (len(pred), count / len(pred) * 100)) ot_acc, ot_num = [0] * 12, [0] * 12 for i in range(len(pred)): ot_num[ot_test[i]] += 1 if pred_argmax[i] == true_argmax[i]: ot_acc[ot_test[i]] += 1 ot_acc = [ round(ot_acc[i] / ot_num[i], 4) if ot_num[i] else 0 for i in range(len(ot_num)) ] print("ot-acc distribution") print(ot_acc)
optimizer_learning_rate = model_hyperparameters['learning_rate'] if optimizer_function == 'adam': optimizer_function = optimizers.Adam(optimizer_learning_rate) optimizer_function = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer_function) elif optimizer_function == 'ftrl': optimizer_function = optimizers.Ftrl(optimizer_learning_rate) elif optimizer_function == 'sgd': optimizer_function = optimizers.SGD(optimizer_learning_rate) losses_list = [] loss_1 = model_hyperparameters['loss_1'] loss_2 = model_hyperparameters['loss_2'] loss_3 = model_hyperparameters['loss_3'] union_settings_losses = [loss_1, loss_2, loss_3] if 'CategoricalCrossentropy' in union_settings_losses: losses_list.append(losses.CategoricalCrossentropy()) if 'CategoricalHinge' in union_settings_losses: losses_list.append(losses.CategoricalHinge()) if 'LogCosh' in union_settings_losses: losses_list.append(losses.LogCosh) if 'customized_loss_function' in union_settings_losses: losses_list.append(customized_loss()) metrics_list = [] metric1 = model_hyperparameters['metrics1'] metric2 = model_hyperparameters['metrics2'] union_settings_metrics = [metric1, metric2] if 'auc_roc' in union_settings_metrics: metrics_list.append(metrics.AUC()) if 'CategoricalAccuracy' in union_settings_metrics: metrics_list.append(metrics.CategoricalAccuracy()) if 'CategoricalHinge' in union_settings_metrics:
def __init__(self, is_one_hot, scope='SFTMXE'): super(SftmXE, self).__init__(scope) if is_one_hot: self.cost = losses.CategoricalCrossentropy(from_logits=True, reduction=losses.Reduction.SUM) else: self.cost = losses.SparseCategoricalCrossentropy(from_logits=True, reduction=losses.Reduction.SUM)
tf.summary.scalar('validation_loss', test_loss.result(), step=optimizer.iterations) tf.summary.scalar('train_accuracy', train_accuracy.result(), step=optimizer.iterations) tf.summary.scalar('validation_accuracy', test_accuracy.result(), step=optimizer.iterations) if test_loss.result() < best_test_loss: best_test_loss = test_loss.result() model.save_weights("../logs/model/mobile_net.h5") if __name__ == '__main__': epochs = 50 batch_size = 2 lr = 0.0001 # 自定义损失、优化器、准确率 loss_object = losses.CategoricalCrossentropy(from_logits=False) optimizer = optimizers.Adam(learning_rate=lr) train_loss = metrics.Mean(name='train_loss') train_accuracy = metrics.CategoricalAccuracy(name='train_accuracy') # 自定义损失和准确率方法 test_loss = metrics.Mean(name='test_loss') test_accuracy = metrics.CategoricalAccuracy(name='test_accuracy') cfg.data_pretreatment = 'normal' reader = ClassifierDataRead("../config/train.txt", cfg.input_shape, batch_size) train_path, valid_path = reader.read_data_and_split_data() train_datasets = reader.make_datasets(train_path, "train") valid_datasets = reader.make_datasets(valid_path, "valid")
def get_age_model(DATA): feed_forward_size = 2048 max_seq_len = 150 model_dim = 256 + 256 + 64 + 32 + 8 + 16 input_creative_id = Input(shape=(max_seq_len, ), name='creative_id') x1 = Embedding( input_dim=NUM_creative_id + 1, output_dim=256, # weights=[DATA['creative_id_emb']], # trainable=args.not_train_embedding, # trainable=False, input_length=150, mask_zero=True)(input_creative_id) # encodings = PositionEncoding(model_dim)(x1) # encodings = Add()([embeddings, encodings]) input_ad_id = Input(shape=(max_seq_len, ), name='ad_id') x2 = Embedding( input_dim=NUM_ad_id + 1, output_dim=256, # weights=[DATA['ad_id_emb']], # trainable=args.not_train_embedding, # trainable=False, input_length=150, mask_zero=True)(input_ad_id) input_product_id = Input(shape=(max_seq_len, ), name='product_id') x3 = Embedding( input_dim=NUM_product_id + 1, output_dim=32, # weights=[DATA['product_id_emb']], # trainable=args.not_train_embedding, # trainable=False, input_length=150, mask_zero=True)(input_product_id) input_advertiser_id = Input(shape=(max_seq_len, ), name='advertiser_id') x4 = Embedding( input_dim=NUM_advertiser_id + 1, output_dim=64, # weights=[DATA['advertiser_id_emb']], # trainable=args.not_train_embedding, # trainable=False, input_length=150, mask_zero=True)(input_advertiser_id) input_industry = Input(shape=(max_seq_len, ), name='industry') x5 = Embedding( input_dim=NUM_industry + 1, output_dim=16, # weights=[DATA['industry_emb']], trainable=True, # trainable=False, input_length=150, mask_zero=True)(input_industry) input_product_category = Input(shape=(max_seq_len, ), name='product_category') x6 = Embedding( input_dim=NUM_product_category + 1, output_dim=8, # weights=[DATA['product_category_emb']], trainable=True, # trainable=False, input_length=150, mask_zero=True)(input_product_category) # (bs, 100, 128*2) encodings = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6]) # (bs, 100) masks = tf.equal(input_creative_id, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 8, 79)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim, feed_forward_size) ff_out = ff(attention_out) # Add & Norm # ff_out (bs, 100, 128),但是attention_out是(bs,100,256) ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) # output_gender = Dense(2, activation='softmax', name='gender')(encodings) output_age = Dense(10, activation='softmax', name='age')(encodings) model = Model(inputs=[ input_creative_id, input_ad_id, input_product_id, input_advertiser_id, input_industry, input_product_category ], outputs=[output_age]) model.compile( optimizer=optimizers.Adam(2.5e-4), loss={ # 'gender': losses.CategoricalCrossentropy(from_logits=False), 'age': losses.CategoricalCrossentropy(from_logits=False) }, # loss_weights=[0.4, 0.6], metrics=['accuracy']) return model
def Classifier(shape_, args): def cbr(x, out_layer, kernel, stride, dilation): x = Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) return x def wave_block(x, filters, kernel_size, n): dilation_rates = [2**i for i in range(n)] x = Conv1D(filters=filters, kernel_size=1, padding='same')(x) res_x = x for dilation_rate in dilation_rates: tanh_out = Conv1D(filters=filters, kernel_size=kernel_size, padding='same', activation='tanh', dilation_rate=dilation_rate)(x) sigm_out = Conv1D(filters=filters, kernel_size=kernel_size, padding='same', activation='sigmoid', dilation_rate=dilation_rate)(x) x = Multiply()([tanh_out, sigm_out]) x = Conv1D(filters=filters, kernel_size=1, padding='same')(x) res_x = Add()([res_x, x]) return res_x #Returns a list of convolution softmax heads depending on the number of #multitask predictions desired def Multitask_Head(fork, num_preds): if num_preds == 0: return [] heads = [] for i in range(num_preds): pred = cbr(fork, 32, 7, 1, 1) pred = BatchNormalization()(pred) pred = Dropout(0.2)(pred) pred = Dense(11, activation='softmax', name='multout_{}'.format(i + 1))(pred) heads.append(pred) return heads #Returns the weights of the heads for the classifier. multi_weight is the # weight given to each multitask prediction. def Get_Weights(num_losses, multi_weight): if num_losses == 1: return [1.] else: return [1. - multi_weight * (num_losses - 1) ] + [multi_weight for i in range(num_losses - 1)] inp = Input(shape=shape_) x = cbr(inp, 64, 7, 1, 1) #Commented for faster prototyping. Get rid of comments when actually submitting code x = BatchNormalization()(x) x = wave_block(x, 16, 3, 12) x = BatchNormalization()(x) x = wave_block(x, 32, 3, 8) x = BatchNormalization()(x) x = wave_block(x, 64, 3, 4) x = BatchNormalization()(x) x = wave_block(x, 128, 3, 1) x = cbr(x, 32, 7, 1, 1) x = BatchNormalization()(x) x = wave_block(x, 64, 3, 1) fork = cbr(x, 32, 7, 1, 1) if args['Rnn'] == True: fork = Bidirectional(LSTM(64, return_sequences=True))(fork) fork = Bidirectional(LSTM(64, return_sequences=True))(fork) fork = Bidirectional(LSTM(64, return_sequences=True))(fork) multitask_list = Multitask_Head(fork, len(args['Multitask'])) x = BatchNormalization()(fork) x = Dropout(0.2)(x) out = Dense(11, activation='softmax', name='out')(x) outputs = [out] + multitask_list model = models.Model(inputs=inp, outputs=outputs) opt = Adam(lr=args['LR']) losses_ = [losses.CategoricalCrossentropy() for i in range(len(outputs))] loss_weights_ = Get_Weights(len(losses_), args['Multi_Weights']) model.compile(loss=losses_, optimizer=opt, metrics=['accuracy'], loss_weights=loss_weights_) return model
# y_encoding = "onehot" # y_encoding = "label" # to be used binary cross-entropy if params.y_encoding == "onehot": if index_col_name in data.columns: # Using Yitan's T/V/E splits # print(te_meta[["index", "Group", "grp_name", "Response"]]) ytr = pd.get_dummies(tr_meta[args.target[0]].values) yvl = pd.get_dummies(vl_meta[args.target[0]].values) yte = pd.get_dummies(te_meta[args.target[0]].values) else: ytr = y_onehot.iloc[tr_id, :].reset_index(drop=True) yvl = y_onehot.iloc[vl_id, :].reset_index(drop=True) yte = y_onehot.iloc[te_id, :].reset_index(drop=True) loss = losses.CategoricalCrossentropy() elif params.y_encoding == "label": if index_col_name in data.columns: # Using Yitan's T/V/E splits ytr = tr_meta[args.target[0]].values yvl = vl_meta[args.target[0]].values yte = te_meta[args.target[0]].values loss = losses.BinaryCrossentropy() else: ytr = ydata_label[tr_id] yvl = ydata_label[vl_id] yte = ydata_label[te_id] loss = losses.SparseCategoricalCrossentropy() else:
def compile_fit(self, model_input, q_train_padded, a_train_padded, y_q_label_df, y_a_label_df, y_q_classify_list, y_q_classify_dict, y_a_classify_list, y_a_classify_dict, epoch_num=3): """ This function is used to switch between numrical. The switch controled by hyperparameters self.TYPE When self.TYPE == 'num', input will be q_train_padded and y_q_label_df (others are same) Meanwhile, switch to ['MSE'] as loss and ['mse', 'mae'] as metrics When self.TYPE == 'classify', input will be q_train_padded and y_q_classify_list[0] etc. Meanwhile, swith to ['categorical_crossentropy'] as loss and ['accuracy'] as metrics """ start_time = time() print("*" * 40, "Start {} Processing".format(model_input._name), "*" * 40) # loss_fun = 'categorical_crossentropy' # loss_fun = 'MSE' #MeanSquaredError # loss_fun = ' METRICS = [ metrics.TruePositives(name='tp'), metrics.FalsePositives(name='fp'), metrics.TrueNegatives(name='tn'), metrics.FalseNegatives(name='fn'), metrics.CategoricalAccuracy(name='accuracy'), metrics.Precision(name='precision'), metrics.Recall(name='recall'), metrics.AUC(name='auc'), # F1Score(num_classes = int(y_train.shape[1]), name='F1') ] loss_fun = None metrics_fun = None # becase large data input, we want to process automaticaly. So set this arugs to choose # question process or answer process automatically if self.PART == 'q': print("Start processing question part") # start to decide complie parameters if self.TYPE == 'num': print("Start numerical output") # call split X_train, X_val, y_train, y_val = self.split_data( q_train_padded, y_q_label_df, test_size=0.2) loss_fun = losses.MeanSquaredError() metrics_fun = ['mse', 'mae'] elif self.TYPE == 'classify': print("Start classify output") X_train, X_val, y_train, y_val = self.split_data( q_train_padded, y_q_classify_list[0], test_size=0.2) loss_fun = losses.CategoricalCrossentropy() metrics_fun = METRICS else: print("UNKNOW self.TYPE") elif self.PART == 'a': print("Start processing answer part") if self.TYPE == 'num': print("Start numerical output") # call split X_train, X_val, y_train, y_val = self.split_data( a_train_padded, y_a_label_df, test_size=0.2) loss_fun = losses.MeanSquaredError() metrics_fun = ['mse', 'mae'] elif self.TYPE == 'classify': print("Start classify output") X_train, X_val, y_train, y_val = self.split_data( a_train_padded, y_a_classify_list[0], test_size=0.2) loss_fun = losses.CategoricalCrossentropy() metrics_fun = METRICS else: print("UNKNOW self.TYPE") learning_rate = 1e-3 opt_adam = optimizers.Adam(lr=learning_rate, decay=1e-5) model_input.compile(loss=loss_fun, optimizer=opt_adam, metrics=metrics_fun) # batch_size is subjected to my GPU and GPU memory, after testing, 32 is reasonable value size. # If vector bigger, this value should dercrease history = model_input.fit( X_train, y_train, validation_data=(X_val, y_val), epochs=epoch_num, batch_size=16, verbose=1, callbacks=[PredictCallback(X_val, y_val, model_input)]) # spearmanr_list = PredictCallback(X_val, y_val, model_input).spearmanr_list # dic = ['loss', 'accuracy', 'val_loss','val_accuracy'] history_dict = [x for x in history.history] # model_input.predict(train_features[:10]) cost_time = round((time() - start_time), 4) print("*" * 40, "End {} with {} seconds".format(model_input._name, cost_time), "*" * 40, end='\n\n') return history, model_input
layers.Dense(120, activation='relu'), # 全连接层,120个节点 layers.Dense(84, activation='relu'), # 全连接层,84节点 layers.Dense(10) # 全连接层,10个节点 ]) # build一次网络模型,给输入X的形状,其中4为随意给的batchsz network.build(input_shape=(4, 28, 28, 1)) # 统计网络信息 network.summary() # %% # 导入误差计算,优化器模块 from tensorflow.keras import losses, optimizers # 创建损失函数的类,在实际计算时直接调用类实例即可 criteon = losses.CategoricalCrossentropy(from_logits=True) # %% # 构建梯度记录环境 with tf.GradientTape() as tape: # 插入通道维度,=>[b,28,28,1] x = tf.expand_dims(x, axis=3) # 前向计算,获得10类别的预测分布,[b, 784] => [b, 10] out = network(x) # 真实标签one-hot编码,[b] => [b, 10] y_onehot = tf.one_hot(y, depth=10) # 计算交叉熵损失函数,标量 loss = criteon(y_onehot, out) # 自动计算梯度 grads = tape.gradient(loss, network.trainable_variables) # 自动更新参数
def run_train_cycle(train: pd.DataFrame, splits: int, feats: list, nn_epochs: int, nn_batch_size: int, seed: int, lr: float, save_dir: str, version: int, n_classes: int, augs: list): """ Wavenet training cycle. Runs GroupKFold crossvalidation. Saves model for each fold. :param train: DataFrame with training data. :param splits: Number of folds in CV. :param feats: List of features for training. :param nn_epochs: Number of epochs to train. :param nn_batch_size: Batch size. :param seed: Random seed. :param lr: Learning rate. :param save_dir: Directory for storing models and OOF predictions. :param version: Model version. Specified in nn.py. :param n_classes: Number of classes. :param augs: Augmentation pipeline. Format is specified in augs.py. :return: """ seed_everything(seed) K.clear_session() config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config) tf.compat.v1.keras.backend.set_session(sess) oof_ = np.zeros((len(train), n_classes)) target = ['open_channels'] group = train['group'] # Setup GroupKFold validation kf = GroupKFold(n_splits=splits) splits = [x for x in kf.split(train, train[target], group)] # Find batches corresponding to validation splits new_splits = [] for sp in splits: new_split = [] tr_idx = np.unique(group[sp[0]]) new_split.append(tr_idx) new_split.append(np.unique(group[sp[1]])) new_split.append(sp[1]) new_splits.append(new_split) tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]], axis=1) tr.columns = ['target_' + str(i) for i in range(n_classes)] + ['group'] target_cols = ['target_' + str(i) for i in range(n_classes)] train_tr = np.array(list(tr.groupby('group').apply(lambda x: x[target_cols].values))).astype(np.float32) train = np.array(list(train.groupby('group').apply(lambda x: x[feats].values))) # Train <splits> models for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:], start=0): train_x, train_y = train[tr_idx], train_tr[tr_idx] valid_x, valid_y = train[val_idx], train_tr[val_idx] print(f'Our training dataset shape is {train_x.shape}') print(f'Our validation dataset shape is {valid_x.shape}') # Data generators train_gen = DataGenerator(train_x, train_y, batch_size=nn_batch_size, shuffle=True, mode='train', augs=augs) val_gen = DataGenerator(valid_x, valid_y, batch_size=nn_batch_size, shuffle=False, mode='val', augs=None) # Early stopping configuration e_s = tf.keras.callbacks.EarlyStopping( monitor="val_loss", patience=25, verbose=1, restore_best_weights=True, ) gc.collect() shape_ = (None, train_x.shape[2]) # Model opt = Adam(lr=lr) loss = losses.CategoricalCrossentropy() model = get_model(version=version, shape=shape_, n_classes=n_classes, loss=loss, opt=opt) # Learning scheduler is used cb_lr_schedule = LearningRateScheduler(lambda x: lr_schedule(x, lr)) model.fit_generator( generator=train_gen, epochs=nn_epochs, callbacks=[cb_lr_schedule, MacroF1(model, valid_x, valid_y), e_s], verbose=2, validation_data=val_gen ) # Save weights to disc model.save(os.path.join(save_dir, f"wavenet_fold_{n_fold}.h5")) # Write OOF predictions and compute F1 score for the fold preds_f = model.predict(valid_x) f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1), np.argmax(preds_f, axis=2).reshape(-1), average='macro') print(f'Training fold {n_fold} completed. macro f1 score : {f1_score_ :1.5f}') preds_f = preds_f.reshape(-1, preds_f.shape[-1]) oof_[val_orig_idx, :] += preds_f # Save OOF array and compute Overall OOF score np.save(os.path.join(save_dir, "train_wavenet_proba.npy"), oof_) f1_score_ = f1_score(np.argmax(train_tr, axis=2).reshape(-1), np.argmax(oof_, axis=1), average='macro') print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}')
early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5) # Learning Rate Reducer learn_control = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.2, min_lr=1e-7) tic = time.time() # stage 1 newnet.compile(optimizer=optimizers.Adam(lr=1e-4), loss=losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) history = newnet.fit(db_train, validation_data=db_val, validation_freq=1, verbose=2, epochs=5, callbacks=[learn_control, early_stopping]) # stage 2 newnet.trainable = True newnet.compile(optimizer=optimizers.Adam(lr=1e-5), loss=losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
name="class_output")(fc1) dense2 = layers.Dense(1, activation='sigmoid', name="bounding_box")( fc1) # later change this into bounding box regression values = model.predict(image) values1 = maxpoolmodel.predict(image) region_array = np.asarray([[[0.0, 0.0, 1.0, 1.0]]], dtype='float32') roimodel = tf.keras.Model(inputs=(feature_input, roi_input), outputs=(dense1, dense2)) roimodel.compile( optimizer=optimizers.RMSprop(1e-3), loss={ "bounding_box": losses.MeanSquaredError(), "class_output": losses.CategoricalCrossentropy(), }, metrics={ "bounding_box": [ metrics.MeanAbsolutePercentageError(), metrics.MeanAbsoluteError(), ], "class_output": [metrics.CategoricalAccuracy()], }, ) roimodel.summary() values = values.reshape( 1, 1, 5, 5, 1280) # take into account batch size which is first input region_array = region_array.reshape(1, 1, 1, 4) output2 = np.array([1]) output1 = np.zeros(5 * 5 * 1280)