Beispiel #1
0
        lambda x, y: ({
            "input_1": real_process_img_test(x['input_1']),
            "input_2": real_process_img_test(x['input_2'])
        }, process_label(y))).batch(FLAGS.batch_size).prefetch(
            FLAGS.batch_size)
    print('test tf.data loaded')

    weight_decay = 0.0005

    print('Build network')
    model = network(weight_decay, FLAGS.batch_size)
    # print(model.summary())

    lr = 0.001

    optim = optimizers.Adam(learning_rate=lr)

    model.compile(loss='binary_crossentropy', optimizer=optim, metrics=['acc'])
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=FLAGS.save,
                                                    mode='max',
                                                    monitor='val_acc',
                                                    verbose=2,
                                                    save_best_only=True)

    history = model.fit(x=ds,
                        epochs=100,
                        verbose=1,
                        callbacks=[checkpoint],
                        validation_data=test_ds,
                        steps_per_epoch=500,
                        validation_steps=100)
Beispiel #2
0
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir="logs",
    histogram_freq=0,
    write_graph=True,
    write_grads=False,
    write_images=True,
    embeddings_freq=0,
    embeddings_layer_names=None,
    embeddings_metadata=None,
    embeddings_data=None,
    update_freq="epoch")
training_callbacks.append(tensorboard_callback)

# Compile the model.
lr = 0.0001
adam = optimizers.Adam(lr=lr)
model.compile(optimizer=adam, loss="mse", metrics=["mae"])

batch_size = 128
epochs = 500
model.fit(dataset_training.batch(batch_size),
          validation_data=dataset_validate.batch(batch_size),
          epochs=epochs,
          callbacks=training_callbacks)

# Save the model.
print("Saving and uploading weights...")
path = "gapnet_weights.h5"
model.save_weights(path)
run.upload_file(name="gapnet_weights.h5", path_or_stream=path)
Beispiel #3
0
        Conv2D(512, (3, 3),
               activation='relu',
               padding='same',
               kernel_initializer='he_normal',
               kernel_regularizer=l2(1e-4)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    # model.add(Dense(256,activation='relu'))
    return model
    # fix random seed for reproducibility


seed = 29
np.random.seed(seed)
optmz = optimizers.Adam(lr=0.001)
# define the deep learning model


def LSTMModel():
    model = Sequential()
    model.add(
        TimeDistributed(ConvModel(),
                        input_shape=(frames, cropHeight, cropWidth, 1)))
    #    model.add(LSTM(2048,
    #         return_sequences=True,
    #         dropout=0.25,
    #         recurrent_dropout=0.25))
    #    model.add(LSTM(512,
    #         return_sequences=True,
    #         dropout=0.25,
Beispiel #4
0

def prepare_mnist_features_and_labels(x, y):
    x = tf.cast(x, tf.float32) / 255.0
    y = tf.cast(y, tf.int64)
    return x, y


model = keras.Sequential([
    layers.Reshape(target_shape=(28 * 28), input_shape=(28, 28)),
    layers.Dense(100, activation='relu'),
    layers.Dense(100, activation='relu'),
    layers.Dense(10),
])

optimizer = optimizers.Adam()


#使用@tf.function AutoGraph装饰器将我方法预编译为TensorFlow计算图
@tf.function
def compute_loss(logits, labels):
    return tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels))


@tf.function
def compute_accuracy(logits, labels):
    predictions = tf.argmax(logits, axis=1)
    return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
def main():

    # [b, 32, 32, 3] => [b, 1, 1, 512]
    conv_net = Sequential(conv_layers)  #第一部分,卷积层

    fc_net = Sequential([
        layers.Dense(256, activation=tf.nn.relu),    #第二部分,全连接层
        layers.Dense(128, activation=tf.nn.relu),
        layers.Dense(100, activation=None),
    ])

    conv_net.build(input_shape=[None, 32, 32, 3])
    fc_net.build(input_shape=[None, 512])    #第二部分的输入是第一部分的输出
    optimizer = optimizers.Adam(lr=1e-4)

    # [1, 2] + [3, 4] => [1, 2, 3, 4]
    variables = conv_net.trainable_variables + fc_net.trainable_variables  #需要求梯度的参数

    for epoch in range(50):

        for step, (x,y) in enumerate(train_db):

            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = conv_net(x)
                # flatten, => [b, 512]
                out = tf.reshape(out, [-1, 512])
                # [b, 512] => [b, 100]
                logits = fc_net(out)
                # [b] => [b, 100]
                y_onehot = tf.one_hot(y, depth=100)
                # compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss = tf.reduce_mean(loss)

            grads = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables))

            if step %100 == 0:
                print(epoch, step, 'loss:', float(loss))



        total_num = 0
        total_correct = 0
        for x,y in test_db:

            out = conv_net(x)
            out = tf.reshape(out, [-1, 512])
            logits = fc_net(out)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)

            total_num += x.shape[0]
            total_correct += int(correct)

        acc = total_correct / total_num
        print(epoch, 'acc:', acc)
Beispiel #6
0
def vpg(env_fn, model_fn, model_kwargs, pi_lr=1e-2, v_lr=1e-2, gamma=0.99, epochs=50,
        batch_size=5000, seed=0, render=False, render_last=False, logger_kwargs=dict(),
        save_freq=10, overwrite_save=True, preprocess_fn=None, obs_dim=None):
    """Vanilla Policy Gradient

    Arguments:
    ----------
    env_fn : A function which creates a copy of OpenAI Gym environment
    model_fn : function for creating the policy gradient models to use
        (see models module for more info)
    model_kwargs : any kwargs to pass into model function
    lr : learning rate for policy network update
    epochs : number of epochs to train for
    batch_size : max batch size for epoch
    seed : random seed
    render : whether to render environment or not
    render_last : whether to render environment after final epoch
    logger_kwargs : dictionary of keyword arguments for logger
    save_freq : number of epochs between model saves (always atleast saves at end of training)
    overwrite_save : whether to overwrite last saved model or save in new dir
    preprocess_fn : the preprocess function for observation. (If None then no preprocessing is
        done apart for handling reshaping for discrete observation spaces)
    obs_dim : dimensions for observations (if None then dimensions extracted from environment
        observation space)
    """
    print("Setting seeds")
    tf.random.set_seed(seed)
    np.random.seed(seed)

    print("Initializing logger")
    logger = log.Logger(**logger_kwargs)
    logger.save_config(locals())

    if preprocess_fn is None:
        preprocess_fn = preprocess.preprocess_obs

    print("Initializing environment")
    env = env_fn()

    if obs_dim is None:
        obs_dim = env.observation_space.shape
    num_actions = utils.get_dim_from_space(env.action_space)
    act_dim = env.action_space.shape

    print("Initializing Replay Buffer")
    buf = PGReplayBuffer(obs_dim, act_dim, batch_size, gamma=gamma, adv_fn="gae")

    print("Building network")
    pi_model, pi_fn, v_model, v_fn = model_fn(env, **model_kwargs)

    print_model_summary({"Actor": pi_model, "Critic": v_model})

    print("Setup training ops - actor")
    pi_train_op = optimizers.Adam(learning_rate=pi_lr)

    @tf.function
    def policy_loss(a_pred, a_taken, a_adv):
        action_mask = tf.one_hot(tf.cast(a_taken, tf.int32), num_actions)
        log_probs = tf.reduce_sum(action_mask * tf.nn.log_softmax(a_pred), axis=1)
        return -tf.reduce_mean(log_probs * a_adv)

    print("Setup training ops - critic")
    v_train_op = optimizers.Adam(learning_rate=v_lr)

    @tf.function
    def value_loss(o_val, o_ret):
        return tf.reduce_mean((o_ret - o_val)**2)

    @tf.function
    def get_grads(batch_obs, batch_acts, batch_rets, batch_adv):
        with tf.GradientTape(persistent=True) as tape:
            a_pred = pi_model(batch_obs)
            o_val = v_model(batch_obs)
            pi_loss = policy_loss(a_pred, batch_acts, batch_adv)
            v_loss = value_loss(o_val, batch_rets)
        pi_grads = tape.gradient(pi_loss, pi_model.trainable_variables)
        v_grads = tape.gradient(v_loss, v_model.trainable_variables)
        return pi_loss, pi_grads, v_loss, v_grads

    @tf.function
    def apply_gradients(pi_grads, v_grads):
        pi_train_op.apply_gradients(zip(pi_grads, pi_model.trainable_variables))
        v_train_op.apply_gradients(zip(v_grads, v_model.trainable_variables))

    @tf.function
    def update(batch_obs, batch_acts, batch_rets, batch_adv):
        pi_loss, pi_grads, v_loss, v_grads = get_grads(
            batch_obs, batch_acts, batch_rets, batch_rets)
        apply_gradients(pi_grads, v_grads)
        return pi_loss, v_loss

    print("Setting up model saver")
    logger.setup_tf_model_saver(pi_model, env, "pg", v_model)

    def train_one_epoch():
        o, r, d = env.reset(), 0, False
        finished_rendering_this_epoch = False
        batch_ep_lens, batch_ep_rets = [], []
        ep_len, ep_ret = 0, 0
        t = 0

        while True:
            if not finished_rendering_this_epoch and render:
                env.render()

            o = preprocess_fn(o, env)
            a = pi_fn(o)
            v_t = v_fn(o)
            buf.store(o, a, r, v_t)
            o, r, d, _ = env.step(a)

            ep_len += 1
            ep_ret += r
            t += 1

            if d or (t == batch_size):
                # set last_val as final reward or value of final state
                # since we may end epoch not at terminal state
                if d:
                    last_val = r
                    # only save completed episodes for reporting
                    batch_ep_lens.append(ep_len)
                    batch_ep_rets.append(ep_ret)
                else:
                    o = preprocess_fn(o, env)
                    last_val = v_fn(o)
                buf.finish_path(last_val)

                o, r, d = env.reset(), 0, False
                finished_rendering_this_epoch = True
                ep_len, ep_ret = 0, 0
                if t == batch_size:
                    break

        batch_obs, batch_acts, batch_adv, batch_rets, batch_vals = buf.get()
        pi_loss, v_loss = update(batch_obs, batch_acts, batch_rets, batch_adv)
        return pi_loss.numpy(), v_loss.numpy(), batch_ep_rets, batch_ep_lens

    total_training_time = 0
    total_episodes = 0
    for i in range(epochs):
        epoch_start = time.time()
        results = train_one_epoch()

        epoch_time = time.time() - epoch_start
        total_training_time += epoch_time
        avg_return = np.mean(results[2])
        total_episodes += len(results[3])

        logger.log_tabular("epoch", i)
        logger.log_tabular("pi_loss", results[0])
        logger.log_tabular("v_loss", results[1])
        logger.log_tabular("avg_return", avg_return)
        logger.log_tabular("avg_ep_lens", np.mean(results[3]))
        logger.log_tabular("epoch_time", epoch_time)
        logger.log_tabular("total_eps", total_episodes)
        logger.log_tabular("total_time", total_training_time)
        logger.dump_tabular()

        if (save_freq != 0 and i % save_freq == 0) or i == epochs-1:
            itr = None if overwrite_save else i
            logger.save_model(itr)

    if render_last:
        input("Press enter to view final policy in action")
        final_ret = 0
        o, r, d = env.reset(), 0, False
        finished_rendering_this_epoch = False
        while not finished_rendering_this_epoch:
            env.render()
            o = preprocess_fn(o, env)
            a = pi_fn(o)
            o, r, d, _ = env.step(a)
            final_ret += r
            if d:
                finished_rendering_this_epoch = True
        print("Final return: %.3f" % (final_ret))
Beispiel #7
0
drop_prob = 0.6

# determined in preprocessing, NOT hyperparameter
frames_per_annotation = 4
multi_input_gen_train = uf.multi_input_generator(speech_train_x, video_train_x,
                                                 train_target, SEQ_LENGTH,
                                                 batch_size,
                                                 frames_per_annotation)
multi_input_gen_val = uf.multi_input_generator(speech_valid_x, video_valid_x,
                                               validation_target, SEQ_LENGTH,
                                               batch_size,
                                               frames_per_annotation)

opt = optimizers.Adam(lr=0.0001,
                      beta_1=0.9,
                      beta_2=0.999,
                      epsilon=None,
                      decay=0.0,
                      amsgrad=False)


#custom loss
def batch_CCC(y_true, y_pred):
    CCC = uf.CCC(y_true, y_pred)
    CCC = CCC / float(batch_size)
    CCC = 1 - CCC
    return CCC


time_dim = frames_per_annotation * SEQ_LENGTH
features_dim = speech_train_x.shape[1]
Beispiel #8
0
def get_age_model(DATA):

    feed_forward_size = 2048
    max_seq_len = 150
    model_dim = 256 + 256 + 64 + 32 + 8 + 16

    input_creative_id = Input(shape=(max_seq_len, ), name='creative_id')
    x1 = Embedding(
        input_dim=NUM_creative_id + 1,
        output_dim=256,
        weights=[DATA['creative_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_creative_id)
    # encodings = PositionEncoding(model_dim)(x1)
    # encodings = Add()([embeddings, encodings])

    input_ad_id = Input(shape=(max_seq_len, ), name='ad_id')
    x2 = Embedding(
        input_dim=NUM_ad_id + 1,
        output_dim=256,
        weights=[DATA['ad_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_ad_id)

    input_product_id = Input(shape=(max_seq_len, ), name='product_id')
    x3 = Embedding(
        input_dim=NUM_product_id + 1,
        output_dim=32,
        weights=[DATA['product_id_emb']],
        trainable=args.not_train_embedding,  #
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_product_id)

    input_advertiser_id = Input(shape=(max_seq_len, ), name='advertiser_id')
    x4 = Embedding(
        input_dim=NUM_advertiser_id + 1,
        output_dim=64,
        weights=[DATA['advertiser_id_emb']],
        trainable=args.not_train_embedding,  #
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_advertiser_id)

    input_industry = Input(shape=(max_seq_len, ), name='industry')
    x5 = Embedding(
        input_dim=NUM_industry + 1,
        output_dim=16,
        weights=[DATA['industry_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_industry)

    input_product_category = Input(shape=(max_seq_len, ),
                                   name='product_category')
    x6 = Embedding(
        input_dim=NUM_product_category + 1,
        output_dim=8,
        weights=[DATA['product_category_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_product_category)

    # (bs, 100, 128*2)
    encodings = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6])
    # (bs, 100)
    masks = tf.equal(input_creative_id, 0)

    # (bs, 100, 128*2)
    attention_out = MultiHeadAttention(
        8, 79)([encodings, encodings, encodings, masks])

    # Add & Norm
    attention_out += encodings
    attention_out = LayerNormalization()(attention_out)
    # Feed-Forward
    ff = PositionWiseFeedForward(model_dim, feed_forward_size)
    ff_out = ff(attention_out)
    # Add & Norm
    # ff_out (bs, 100, 128),但是attention_out是(bs,100,256)
    ff_out += attention_out
    encodings = LayerNormalization()(ff_out)
    encodings = GlobalMaxPooling1D()(encodings)
    encodings = Dropout(0.2)(encodings)

    # output_gender = Dense(2, activation='softmax', name='gender')(encodings)
    output_age = Dense(10, activation='softmax', name='age')(encodings)

    model = Model(inputs=[
        input_creative_id, input_ad_id, input_product_id, input_advertiser_id,
        input_industry, input_product_category
    ],
                  outputs=[output_age])

    model.compile(
        optimizer=optimizers.Adam(2.5e-4),
        loss={
            # 'gender': losses.CategoricalCrossentropy(from_logits=False),
            'age': losses.CategoricalCrossentropy(from_logits=False)  #
        },
        # loss_weights=[0.4, 0.6],
        metrics=['accuracy'])
    return model
Beispiel #9
0
def main():

    # [b, 32, 32, 3] => [b, 1, 1, 512]
    conv_net = Sequential(conv_layers)

    conv_net.build(input_shape=[None, 32, 32, 3])
    fc_net.build(input_shape=[None, 128])
    optimizer = optimizers.Adam(lr=1e-4)

    # [1, 2] + [3, 4] => [1, 2, 3, 4] 需要同时对卷积和全连接求导 所以要两个
    variables = conv_net.trainable_variables + fc_net.trainable_variables

    for epoch in range(50):

        for step, (x, y) in enumerate(train_db):

            with tf.GradientTape() as tape:
                # [b, 32, 32, 3] => [b, 1, 1, 512]
                out = conv_net(x)
                # print(out.shape)
                # flatten, => [b, 512]
                out = tf.reshape(out, [-1, 128])

                # 全连接
                # [b, 512] => [b, 100]
                logits = fc_net(out)

                # print(logits.shape)
                # [b] => [b, 100]
                y_onehot = tf.one_hot(y, depth=10)
                # compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot,
                                                          logits,
                                                          from_logits=True)
                loss = tf.reduce_mean(loss)

            grads = tape.gradient(loss, variables)
            optimizer.apply_gradients(zip(grads, variables))

            if step % 100 == 0:
                print(epoch, step, 'loss:', float(loss))

            with summary_writer.as_default():
                tf.summary.scalar("loss", float(loss), step=epoch)

        total_num = 0
        total_correct = 0
        for x, y in test_db:

            out = conv_net(x)
            out = tf.reshape(out, [-1, 128])
            logits = fc_net(out)
            prob = tf.nn.softmax(logits, axis=1)
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)

            correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
            correct = tf.reduce_sum(correct)

            total_num += x.shape[0]
            total_correct += int(correct)

        acc = total_correct / total_num
        print(epoch, 'acc:', acc)

        with summary_writer.as_default():
            tf.summary.scalar("acc", acc, step=epoch)
np.random.seed(0)
np.random.shuffle(my_data)

features_init = my_data[:,1:]
features = features_init / features_init.max(axis=0)
labels = my_data[:,0] 
labels -= np.ones(len(labels))

train_labels, train_data = labels[0:2000], features[0:2000,:]
test_labels, test_data = labels[2000:], features[2000:,:]

#tf_data = tf.convert_to_tensor(my_data)
#tf.random.shuffle(tf_data, seed = 1)
from tensorflow.keras import optimizers

adam = optimizers.Adam(lr=0.05)

model = keras.Sequential([
	keras.layers.Dense(1024, activation = tf.nn.relu),
	#keras.layers.Dropout(0.5),
	keras.layers.Dense(1024, activation = tf.nn.relu),
	#keras.layers.Dropout(0.5),
	keras.layers.Dense(256, activation = tf.nn.relu),
	keras.layers.Dense(128, activation = tf.nn.relu),
	keras.layers.Dense(64, activation = tf.nn.relu),
	#keras.layers.Dropout(0.2),
    keras.layers.Dense(22, activation = tf.nn.softmax)
])

model.compile(optimizer = adam,
             loss = 'sparse_categorical_crossentropy',
Beispiel #11
0
# Load pre-process data
model_path = os.path.join(config.checkpoint_dir, "model_weight")
if config.is_train:
    nx, ny = input_setup(config=config)

    data_dir = checkpoint_dir(config)

    input_, label_ = read_data(data_dir)

    train_dataset = tf.data.Dataset.from_tensor_slices((input_, label_))
    train_dataset = train_dataset.shuffle(1000).batch(256)

    # Build the model
    model = SRCNN(config, padding='valid')
    model.compile(optimizer=optimizers.Adam(config.learning_rate),
                  loss='mse',
                  metrics=['mae'])

    # Train the mode
    history = model.fit(train_dataset, epochs=config.epoch)
    model.save_weights(model_path)
else:
    nx, ny = input_setup(config=config)

    data_dir = checkpoint_dir(config)

    # input_, label_ = read_data(data_dir)
    test_data, test_label = read_test_data(config)

    # When testing mode, set padding = 'same', it will keep the size
Beispiel #12
0
def train_model(dataset_name,
                model,
                normalize_data=True,
                batch_size=4,
                val_split=None,
                test_split=None,
                monitor_test=False,
                balance_loss=True,
                opt=None,
                monitor='loss',
                save_dir=None,
                experiment_idx=None,
                use_lsuv=False):

    save_dir = save_dir or './results/{}/'.format(model.name.split('_')[0])
    experiment_name = '{}_{}'.format(dataset_name,
                                     '_'.join(model.name.split('_')[1:]))
    if experiment_idx is not None:
        experiment_name += '_{}'.format(experiment_idx)

    # load data
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(
        dataset_name,
        val_split=val_split,
        test_split=test_split,
        normalize_data=normalize_data)

    num_train, len_streams, num_features = X_train.shape
    num_val = X_val.shape[0] if val_split is not None else None
    num_test = X_test.shape[0]
    num_classes = np.unique(y_train).size

    batch_size = max(min(batch_size, int(num_train / 10.)), 4)

    y_train_1hot = utils.to_categorical(y_train)
    y_val_1hot = utils.to_categorical(y_val) if val_split is not None else None
    y_test_1hot = utils.to_categorical(y_test)

    opt = opt or optimizers.Adam(1e-3, clipvalue=1.)  #

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    if use_lsuv:
        init_batch_size = 100
        idx_init = np.random.choice(num_train,
                                    size=(init_batch_size),
                                    replace=False)
        X_init = X_train[idx_init]
        y_init = y_train[idx_init]

        model = seq2tens.utils.LSUVReinitializer(model, X_init)

    sample_weight = compute_sample_weight('balanced',
                                          y_train) if balance_loss else None

    # set callbacks

    class TimeHistory(callbacks.Callback):
        def on_train_begin(self, logs={}):
            self.times = []

        def on_epoch_begin(self, epoch, logs={}):
            self.epoch_time_start = time.time()

        def on_epoch_end(self, epoch, logs={}):
            self.times.append(time.time() - self.epoch_time_start)

    class ReduceLRBacktrack(callbacks.ReduceLROnPlateau):
        def __init__(self, best_path, *args, **kwargs):
            super(ReduceLRBacktrack, self).__init__(*args, **kwargs)
            self.best_path = best_path

        def on_epoch_end(self, epoch, logs=None):
            current = logs.get(self.monitor)
            if current is None:
                logging.warning(
                    'Reduce LR on plateau conditioned on metric `%s` '
                    'which is not available. Available metrics are: %s',
                    self.monitor, ','.join(list(logs.keys())))
            if not self.monitor_op(current, self.best):  # not new best
                if not self.in_cooldown() and float(
                        tf.keras.backend.get_value(
                            self.model.optimizer.lr)) > self.min_lr:
                    if self.wait + 1 >= self.patience:  # going to reduce lr
                        # load best model so far
                        print("Backtracking to best model before reducting LR")
                        self.model.load_weights(self.best_path)
            super().on_epoch_end(epoch, logs)  # actually reduce LR

    class EarlyStoppingByLossVal(callbacks.Callback):
        def __init__(self, monitor='val_loss', value=0.00001, verbose=0):
            super(EarlyStoppingByLossVal, self).__init__()
            self.monitor = monitor
            self.value = value
            self.verbose = verbose

        def on_epoch_end(self, epoch, logs={}):
            current = logs.get(self.monitor)
            if current is None:
                warnings.warn(
                    "Early stopping requires %s available!" % self.monitor,
                    RuntimeWarning)

            if current < self.value:
                if self.verbose > 0:
                    print("Epoch %05d: early stopping THR" % epoch)
                self.model.stop_training = True

    es = callbacks.EarlyStopping(monitor=monitor,
                                 patience=300,
                                 restore_best_weights=True,
                                 verbose=1)
    es_loss = EarlyStoppingByLossVal(monitor=monitor, value=1e-8)
    weights_file = os.path.join(save_dir, experiment_name + '.hdf5')
    mc = callbacks.ModelCheckpoint(weights_file,
                                   monitor=monitor,
                                   save_best_only=True,
                                   save_weights_only=True,
                                   verbose=1)
    #     reduce_lr = ReduceLRBacktrack(weights_file, monitor=monitor, patience=50, factor=1/np.sqrt(2.), min_lr=1e-4, verbose=1)
    reduce_lr = callbacks.ReduceLROnPlateau(monitor=monitor,
                                            patience=50,
                                            factor=1 / np.sqrt(2.),
                                            min_lr=1e-4,
                                            verbose=1)

    fit_time = TimeHistory()

    callback_list = [fit_time, mc, es, es_loss, reduce_lr]

    val_data = (X_val, y_val_1hot) if val_split is not None else (
        X_test, y_test_1hot) if monitor_test else None

    history = model.fit(X_train,
                        y_train_1hot,
                        batch_size=batch_size,
                        epochs=10000,
                        callbacks=callback_list,
                        verbose=1,
                        validation_data=val_data,
                        shuffle=True,
                        sample_weight=sample_weight)

    model.load_weights(weights_file)

    # evaluate model performance
    history = history.history
    history['time'] = fit_time.times
    history['results'] = {}
    write_to_txt = ''
    if val_split is not None:
        ##  evaluate on validation set
        y_val_pred = np.argmax(model.predict(X_val), axis=1)
        val_acc = accuracy_score(y_val, y_val_pred)
        val_report = classification_report(y_val, y_val_pred)

        history['results']['val_acc'] = val_acc
        history['results']['val_report'] = val_report

        write_to_txt += 'Val. acc.: {:.3f}\n'.format(val_acc)
        write_to_txt += 'Val. report:\n{}\n'.format(val_report)

        print('Val. acc.: {:.3f}'.format(val_acc))
        print('Val. report:\n{}\n'.format(val_report))

    ## evaluate on test set
    y_test_pred = np.argmax(model.predict(X_test), axis=1)
    test_acc = accuracy_score(y_test, y_test_pred)
    test_report = classification_report(y_test, y_test_pred)

    history['results']['test_acc'] = test_acc
    history['results']['test_report'] = test_report

    write_to_txt += 'Test acc.: {:.3f}\n'.format(test_acc)
    write_to_txt += 'Test report:\n{}\n'.format(test_report)

    print('Test acc.: {:.3f}'.format(test_acc))
    print('Test report:\n{}\n'.format(test_report))

    pkl_file = os.path.join(save_dir, experiment_name + '.pkl')
    with open(pkl_file, 'wb') as f:
        pickle.dump(history, f)

    txt_file = os.path.join(save_dir, experiment_name + '.txt')
    with open(txt_file, 'w') as f:
        f.write(write_to_txt)

    return
    # print(layer.name)
    layer.trainable = False

# Check trainable status, i.e. frozen layers
for i, layer in enumerate(model.layers):
    print("Layer:", i, " layer name:", layer.name, " Trainable Status: ", layer.trainable)

"""# Compile and Train the Model"""

from tensorflow.keras import optimizers

# Customise the optimiser

# sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
sgd = optimizers.SGD(lr=0.0001, momentum=0.90, decay=5 ** (-4), nesterov=True)
adam = optimizers.Adam(learning_rate=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)

# Compile the model 
# Try with adam with custom sgd (SGD+nesterov) and choose the best
# default adam: lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0.
# For Loss: chcek with mse, binary crossentropy , kullback_leibler_divergence

model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

# Check the final model Summary: Check non trainable parameters
model.summary()

"""# Start Training"""

# Fit the model
hist1 = model.fit(X, Y, validation_split=0.2, shuffle=True, batch_size=12, epochs=60,
Beispiel #14
0
    to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
    tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

    # configuration
    train_rows = 66253
    valid_rows = 11692
    print_interval = 500
    batch_size = 32

    embedding_dim = 300
    cnn_filters = 50
    dnn_units = 256
    dropout_rate = 0.3

    loss = 'mse'
    opti = optimizers.Adam(0.0005, decay=1e-6)

    epochs = 15
    batch_size = 32
    useState = False

    # max length of all the train/valid tokenized inputs
    max_length = 71

    # create text model
    text_model = TEXT_MODEL(vocabulary_size=len(tokenizer.vocab),
                            embedding_dimensions=embedding_dim,
                            cnn_filters=cnn_filters,
                            dnn_units=dnn_units,
                            dropout_rate=dropout_rate,
                            useState=useState,
Beispiel #15
0
                     name=name + "_Output")(x)

    return models.Model(input, x)


input_shape = imgs_tr.shape[1:]
alpha = 1

mobile = build_mobilenet(input_shape=input_shape,
                         num_classes=num_classes,
                         alpha=1,
                         name="Mobile")
mobile.summary()

loss = 'binary_crossentropy' if num_classes == 1 else 'categorical_crossentropy'
mobile.compile(optimizer=optimizers.Adam(), loss=loss, metrics=['accuracy'])

# %%
# Training Network
epochs = 100
batch_size = 16

history = mobile.fit(imgs_tr,
                     labs_tr,
                     epochs=epochs,
                     batch_size=batch_size,
                     validation_data=(imgs_val, labs_val))

plt.figure(figsize=(10, 4))
plt.subplot(121)
plt.title("Loss graph")
def make_model(loss,
               fact=1,
               k_size=3,
               in_shape=(32, 32, 1),
               dil_rate=2,
               pad='same',
               num_bins=None):
    model = tf.keras.Sequential()
    # 2-3 convolution layers with relu followed by batch norm.

    # conv1_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(64 / fact),
                      strides=1,
                      name='conv1_1',
                      input_shape=in_shape,
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv2_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(64 / fact),
                      strides=2,
                      name='conv1_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv2_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(128 / fact),
                      strides=1,
                      name='conv2_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv2_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(128 / fact),
                      strides=2,
                      name='conv2_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv3_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=1,
                      name='conv3_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv3_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=1,
                      name='conv3_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv3_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=2,
                      name='conv3_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv4_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      name='conv4_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv4_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      name='conv4_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv4_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      name='conv4_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv5_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv5_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv5_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv5_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv5_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv5_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv6_1
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv6_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv6_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv6_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv6_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(512 / fact),
                      strides=1,
                      dilation_rate=dil_rate,
                      name='conv6_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv7_1
    model.add(layers.UpSampling2D(size=(2, 2)))
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=1,
                      name='conv7_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # model.add(layers.Conv2D(activation='relu', filters=int(512/fact), strides=1, name='conv7_1', kernel_size=k_size, padding='same', data_format="channels_last"))
    # conv7_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=1,
                      name='conv7_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv7_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(256 / fact),
                      strides=1,
                      name='conv7_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    model.add(layers.BatchNormalization())

    # conv8_1
    model.add(layers.UpSampling2D(size=(2, 2)))
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(128 / fact),
                      strides=1,
                      name='conv8_1',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv8_2
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(128 / fact),
                      strides=1,
                      name='conv8_2',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    # conv8_3
    model.add(
        layers.Conv2D(activation='relu',
                      filters=int(128 / fact),
                      strides=1,
                      name='conv8_3',
                      kernel_size=k_size,
                      padding=pad,
                      data_format="channels_last"))
    #model.add(layers.BatchNormalization())

    # 1x1 convolution.
    #model.add(layers.Conv2D(activation='relu', filters=int(128 / fact), strides=1, name='conv8_3L', kernel_size=1,
    #                        padding=pad, data_format="channels_last"))
    #model.add(layers.BatchNormalization())
    model.add(layers.UpSampling2D(size=(2, 2)))

    if loss is euclidean_loss:
        """ regression version (Euclidean loss) """
        model.add(
            layers.Conv2D(activation='relu',
                          filters=2,
                          kernel_size=1,
                          padding=pad))
    elif loss is loss_classification:
        """ report version (multinomial classificaion loss) """
        model.add(
            layers.Conv2D(activation='relu',
                          filters=num_bins,
                          strides=1,
                          kernel_size=1,
                          padding=pad))

    model.compile(loss=loss,
                  optimizer=optimizers.Adam(lr=0.001,
                                            beta_1=0.9,
                                            beta_2=0.99,
                                            decay=0.001))  #compile

    return model
def train(args):

    root_dataset = args.dataset

    k_train = args.k_train  #999
    augmentation = args.augmentation  #True
    batch_size = args.batch_size  #768
    epochs = args.epochs  #100

    y_train = np.empty((0, 80), dtype=float)
    y_val = np.empty((0, 80), dtype=float)
    X_train = np.empty((0, 80), dtype=float)
    X_val = np.empty((0, 80), dtype=float)



    for top, dirs, files in tqdm(os.walk(root_dataset + 'train/noisy'), \
                                 desc='train',total=799):
        for nm in files[:k_train]:
            file_name = os.path.join(top, nm)
            data_n = np.load(file_name)
            data_c = np.load(file_name.replace('train/noisy', 'train\\clean'))

            if data_n.shape[1] == 80 and data_n.shape[0] > 20 \
            and data_c.shape[1] == 80 and data_c.shape[0] > 20 \
            and data_n.shape[0] == data_c.shape[0]: #filter

                X_train = np.append(X_train, data_n, axis=0)
                if augmentation:
                    X_train = np.append(X_train, np.flip(data_n, 1),
                                        axis=0)  #aug

                y_train = np.append(y_train, data_c, axis=0)

                if augmentation:
                    y_train = np.append(y_train, np.flip(data_c, 1),
                                        axis=0)  #aug

    for top, dirs, files in tqdm(os.walk(root_dataset + 'val/noisy'), \
                                 desc='val', total=150):
        for nm in files[:k_train]:
            file_name = os.path.join(top, nm)
            data_n = np.load(file_name)
            data_c = np.load(file_name.replace('val/noisy', 'val\\clean'))

            if data_n.shape[1] == 80 and data_n.shape[0] > 20 \
            and data_c.shape[1] == 80 and data_c.shape[0] > 20 \
            and data_n.shape[0] == data_c.shape[0]: #filter

                #X_val = np.append(X_val,data_n,axis=0)
                #X_val = np.append(X_val,np.flip(data_n,1),axis=0) #aug

                #y_val = np.append(y_val, data_c,axis=0)
                #y_val = np.append(y_val,np.flip(data_c,1),axis=0) #aug

                #for shuffle
                X_train = np.append(X_train, data_n, axis=0)
                if augmentation:
                    X_train = np.append(X_train, np.flip(data_n, 1),
                                        axis=0)  #aug

                y_train = np.append(y_train, data_c, axis=0)

                if augmentation:
                    y_train = np.append(y_train, np.flip(data_c, 1),
                                        axis=0)  #aug


    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, \
                                        test_size=0.14, random_state=87)

    K_test = np.int(np.ceil(X_train.shape[0] / 200))

    #train/test
    X_test = X_train[:K_test]
    y_test = y_train[:K_test]
    X_train = X_train[K_test:]
    y_train = y_train[K_test:]

    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]

    num_hidden = [2049, 500, 180]

    ILayer1 = Input(shape=(input_dim, ), name="ILayer")
    ILayer2 = BatchNormalization(axis=1, momentum=0.6)(ILayer1)

    #H1
    HLayer1_1 = Dense(num_hidden[0], activation='relu', \
                      name="HLayer1", kernel_initializer=he_normal(seed=20))(ILayer2)
    HLayer1_2 = BatchNormalization(axis=1, momentum=0.55)(HLayer1_1)
    HLayer1_3 = Dropout(0.1)(HLayer1_2)

    #H2
    HLayer2_1 = Dense(num_hidden[1], activation='relu', \
                      name="HLayer2", kernel_initializer=he_normal(seed=60))(HLayer1_3)
    HLayer2_2 = BatchNormalization(axis=1, momentum=0.55)(HLayer2_1)

    #H3
    HLayer3_1 = Dense(num_hidden[2], activation='relu', \
                      name="HLayer3", kernel_initializer=he_normal(seed=120))(HLayer2_2)

    HLayer3_2 = BatchNormalization(axis=1, momentum=0.55)(HLayer3_1)
    HLayer3_2 = Dropout(0.1)(HLayer3_2)
    #H2_R
    HLayer2__1 = Dense(num_hidden[1], activation='relu', \
                       name="HLayer2_R", kernel_initializer=he_normal(seed=60))(HLayer3_2)
    HLayer2__2 = BatchNormalization(axis=1, momentum=0.55)(HLayer2__1)

    #H1_R
    HLayer1__1 = Dense(num_hidden[0], activation='relu', \
                       name="HLayer1_R", kernel_initializer=he_normal(seed=20))(HLayer2__2)
    HLayer1__2 = BatchNormalization(axis=1, momentum=0.55)(HLayer1__1)
    HLayer1__3 = Dropout(0.1)(HLayer1__2)

    OLayer = Dense(output_dim,  \
                        name="OLayer",kernel_initializer=he_normal(seed=62))(HLayer1__3)

    model = Model(inputs=[ILayer1], outputs=[OLayer])
    opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, \
                          decay=0.0001, amsgrad=False)
    #compile
    model.compile(loss='mse', optimizer=opt)

    plot_model(model, to_file='model.png', show_shapes=True, \
               show_layer_names=True)
    model.summary()

    tensorboard = TensorBoard(log_dir="logs", histogram_freq=0, \
                              write_graph=True, write_images=True)
    print("Fit")
    hist = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, \
                     verbose=1, validation_data=([X_val], [y_val]),
                     callbacks=[tensorboard])

    plt.figure(figsize=(10, 8))
    plt.plot(hist.history['loss'], label='Loss')
    plt.plot(hist.history['val_loss'], label='Val_Loss')
    plt.legend(loc='best')
    plt.title('Train/Val Loss')
    plt.savefig('train_val_Loss.png')
    #plt.show()

    results = model.evaluate(X_test, y_test, batch_size=len(y_test))
    print('Test loss:%3f' % results)

    #save model
    model_json = model.to_json()
    with open("model/model.json", 'w') as f:
        f.write(model_json)
    model.save_weights("model/model.h5")
    print("Done")
Beispiel #18
0

model = create_model(input_shape=(height, width, colors), n_out=n_classes)

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.initializers import glorot_uniform

for layer in model.layers[:based_model_last_block_layer_number]:
    layer.trainable = False
for layer in model.layers[based_model_last_block_layer_number:]:
    layer.trainable = True

from tensorflow.keras import metrics

metric_list = ['accuracy']
optimizer = optimizers.Adam(lr=warmup_learning_rate)
model.compile(optimizer=optimizer,
              loss="categorical_crossentropy",
              metrics=metric_list)

rlrop = ReduceLROnPlateau(monitor='val_loss',
                          mode='min',
                          patience=rlrop_patience,
                          factor=decay_drop,
                          min_lr=1e-6,
                          verbose=1)

from tensorflow.keras import metrics
metric_list = ['accuracy']
optimizer = optimizers.Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
Beispiel #19
0
    print(*[x.name + '\n' for x in EA.trainable_variables])

"""### Training F and P on labeled source data"""

# Trainable layers for EAFP training with fixed embeddings
log.info(f' Current ckpt : {current_ckpt}')
if opt.last_ckpt == current_ckpt:
    log.info(f' Running ckpt {current_ckpt}')
    print("Trainable EAFP layers : ")
    print(*[x.name + '\n' for x in EAFP.trainable_variables])

# Training F and P : sparse categorical
if opt.last_ckpt == current_ckpt:
    log.info('Training EAFP model with src data with fixed embeddings...')
    TRAIN = True
    EAFP.compile(optimizer=optimizers.Adam(opt.learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    epochs = 20
    for epoch in trange(epochs):
        batch_no = 0
        for (inputs, lengths, labels) in train_src:
            log.info(f"Training on batch no : {batch_no}"); batch_no += 1
            print(scce(labels, EAFP.predict([inputs, lengths])))
            history = EAFP.fit(x=[inputs, lengths], y=labels, epochs=1)
else:
    log.info(f' Skipping to ckpt {opt.last_ckpt}')

"""### Evaluation of Sentiment Classifier"""

# P results : Unseen target data loss and  accuracy
log.info(f' Current ckpt : {current_ckpt}')
if opt.last_ckpt == current_ckpt:
Beispiel #20
0
def train(model, data, args):
    """
    Training a CapsuleNet
    :param model: the CapsuleNet model
    :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))`
    :param args: arguments
    :return: The trained model
    """
    # unpacking the data
    (x_train, y_train), (x_test, y_test) = data

    # callbacks
    log = callbacks.CSVLogger(args.save_dir + '/log.csv')
    tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs',
                               batch_size=args.batch_size,
                               histogram_freq=int(args.debug))
    checkpoint = callbacks.ModelCheckpoint(args.save_dir +
                                           '/weights-{epoch:02d}.h5',
                                           monitor='val_capsnet_acc',
                                           save_best_only=True,
                                           save_weights_only=True,
                                           verbose=1)
    lr_decay = callbacks.LearningRateScheduler(
        schedule=lambda epoch: args.lr * (args.lr_decay**epoch))

    # compile the model
    model.compile(optimizer=optimizers.Adam(lr=args.lr),
                  loss=[margin_loss, 'mse'],
                  loss_weights=[1., args.lam_recon],
                  metrics={'capsnet': 'accuracy'})
    """
    # Training without data augmentation:
    model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs,
              validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay])
    """

    # Begin: Training with data augmentation ---------------------------------------------------------------------#
    def train_generator(x, y, batch_size, shift_fraction=0.):
        train_datagen = ImageDataGenerator(
            width_shift_range=shift_fraction,
            height_shift_range=shift_fraction)  # shift up to 2 pixel for MNIST
        generator = train_datagen.flow(x, y, batch_size=batch_size)
        while 1:
            x_batch, y_batch = generator.next()
            yield ([x_batch, y_batch], [y_batch, x_batch])

    # Training with data augmentation. If shift_fraction=0., also no augmentation.
    model.fit_generator(
        generator=train_generator(x_train, y_train, args.batch_size,
                                  args.shift_fraction),
        steps_per_epoch=int(y_train.shape[0] / args.batch_size),
        epochs=args.epochs,
        validation_data=[[x_test, y_test], [y_test, x_test]],
        callbacks=[log, tb, checkpoint, lr_decay])
    # End: Training with data augmentation -----------------------------------------------------------------------#

    model.save_weights(args.save_dir + '/trained_model.h5')
    print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir)

    from utils import plot_log
    plot_log(args.save_dir + '/log.csv', show=True)

    return model

import tensorflow as tf
from tensorflow.keras import datasets, layers, models, callbacks, Model, optimizers, Input, utils
from tensorflow.keras.layers import Conv1D, Dropout, AveragePooling1D, Flatten, Dense, concatenate, SpatialDropout1D
from scipy import stats
from random import randint
import sys

# check of inputs and setting a few base parameters
if SEED == 0:
    SEED = randint(0, sys.maxsize)
print('seed:', SEED)
tf.random.set_seed(SEED)
if OPT == 'adam':
    optimizer = optimizers.Adam(LEARN)
elif OPT == 'rmsprop':
    optimizer = optimizers.RMSprop(LEARN)
else:
    raise Exception

#inputs
#one hot
inputs = list()
if TYPE.find('C') > -1:
    input_c = Input(shape=(
        eLENGTH,
        eDEPTH,
    ), name="input_onehot")
    inputs.append(input_c)
Beispiel #22
0
    def cp_resnet_moa_train_pred(self):
        
        print("Is GPU Available?")
        if (len(tf.config.list_physical_devices('GPU')) > 0) & (tf.test.is_built_with_cuda()):
            print("\n Yes, GPU is available!!")
        else:
            print("No, GPU is not available!!")
            
        no_of_compts = 25
        label_smoothing_alpha = 0.0005
        P_MIN = label_smoothing_alpha
        P_MAX = 1 - P_MIN
        NFOLDS = 5
        ##dir names
        model_file_name = "cp_resnet"
        model_dir_name = "cp_resnet_model"
        trn_pred_name = 'cp_train_preds_resnet'
        tst_pred_name = 'cp_test_preds_resnet'
        model_file_name,model_dir_name,trn_pred_name,tst_pred_name = \
        check_if_shuffle_data(self.shuffle, model_file_name, model_dir_name, trn_pred_name, tst_pred_name)
        model_dir = os.path.join(self.data_dir, model_dir_name)
        os.makedirs(model_dir, exist_ok=True)
        
        # Setup file names
        if self.shuffle:
            if self.subsample:
                input_train_file = os.path.join(self.data_dir, "train_shuffle_lvl4_data_subsample.csv.gz")
                input_test_file = os.path.join(self.data_dir, "test_lvl4_data_subsample.csv.gz")
            else:
                input_train_file = os.path.join(self.data_dir, "train_shuffle_lvl4_data.csv.gz")
                input_test_file = os.path.join(self.data_dir, "test_lvl4_data.csv.gz")
        else:
            if self.subsample:
                input_train_file = os.path.join(self.data_dir, "train_lvl4_data_subsample.csv.gz")
                input_test_file = os.path.join(self.data_dir, "test_lvl4_data_subsample.csv.gz")
            else:
                input_train_file = os.path.join(self.data_dir, "train_lvl4_data.csv.gz")
                input_test_file = os.path.join(self.data_dir, "test_lvl4_data.csv.gz")
        
        if self.subsample:
            input_target_file = os.path.join(self.data_dir, 'target_labels_subsample.csv')
        else:
            input_target_file = os.path.join(self.data_dir, 'target_labels.csv')

        df_train = pd.read_csv(input_train_file, compression='gzip',low_memory = False)
        df_test = pd.read_csv(input_test_file, compression='gzip',low_memory = False)
        df_targets = pd.read_csv(input_target_file)
        
        metadata_cols = ['Metadata_broad_sample', 'Metadata_pert_id', 'Metadata_Plate', 'Metadata_Well', 
                         'Metadata_broad_id', 'Metadata_moa', 'broad_id', 'pert_iname', 'moa', 'replicate_name', 
                         'Metadata_dose_recode']
        
        target_cols = df_targets.columns[1:]
        df_train_x, df_train_y, df_test_x, df_test_y = split_data(df_train, df_test, metadata_cols, target_cols)
        df_train = drug_stratification(df_train,NFOLDS,target_cols,col_name='replicate_name',cpd_freq_num=24)
        
        oof_preds = np.zeros(df_train_y.shape)
        y_pred = np.zeros(df_test_y.shape)
        for fold in range(NFOLDS):
            
            x_fold_trn, y_fold_trn, x_fold_val, y_fold_val, df_tst_x_copy, val_idx, no_of_feats = \
            preprocess(fold, df_train, df_train_x, df_train_y, df_test_x,no_of_compts)
            model_path = os.path.join(model_dir, model_file_name + f"_FOLD{fold}_model.h5")
            
            input_, answer5 = resnet_model(df_train_y, no_of_feats)
            model_nn = tf.keras.Model([input_], answer5)
            model_nn.compile(optimizer=optimizers.Adam(learning_rate=self.LEARNING_RATE),
                             loss=losses.BinaryCrossentropy(label_smoothing=label_smoothing_alpha),
                             metrics=logloss)
            early_stopping = EarlyStopping(min_delta=1e-5,monitor='val_loss',patience=10, verbose=0,
                                           mode='min', restore_best_weights=True)
            check_point = ModelCheckpoint(model_path, save_best_only=True,verbose=0, mode="min")
            reduce_lr = ReduceLROnPlateau(factor=0.5, patience=4,verbose=0, mode="auto")
            history = model_nn.fit([x_fold_trn], y_fold_trn, epochs=self.EPOCHS, batch_size=self.BATCH_SIZE,
                                   validation_data=([x_fold_val], y_fold_val),
                                   callbacks=[check_point, early_stopping, reduce_lr])
            
            model_nn = tf.keras.models.load_model(model_path,custom_objects={'logloss': logloss})
            val_old = model_nn.predict(x_fold_val)
            val_metric_old = mean_logloss(val_old, y_fold_val)
            print('Before loop: validation_loss =', val_metric_old)
            
            #---------Freeze and Unfreeze model weights to improve model training------
            model_nn = freeze_unfreeze_model_weights(model_nn, x_fold_trn, y_fold_trn, x_fold_val, y_fold_val, 
                                                     val_metric_old, self.BATCH_SIZE, model_path)
            
            # OOF(Out-of-fold) Predictions and Score #
            val_preds = model_nn.predict([x_fold_val])
            fold_val_score = mean_logloss(val_preds, y_fold_val)
            oof_preds[val_idx, :] += val_preds
            print('Fold:', fold, 'score:', fold_val_score)
            
            ##Test Prediction
            test_preds = model_nn.predict([df_tst_x_copy])
            y_pred += test_preds / NFOLDS
            print('\n')
            
        df_oofs = pd.DataFrame(oof_preds, columns=df_train_y.columns)
        df_preds = pd.DataFrame(y_pred, columns=df_test_y.columns)
        
        model_eval_results(df_train_y, oof_preds, df_test, df_test_y, df_preds, target_cols)
        save_to_csv(df_preds, self.model_pred_dir, f"{tst_pred_name}{self.output_file_indicator}.csv")
        save_to_csv(df_oofs, self.model_pred_dir, f"{trn_pred_name}{self.output_file_indicator}.csv.gz", compress="gzip")
        print("\n All is set, Train and Test predictions have been read as csv files into the model predictions directory!!")
Beispiel #23
0
warnings.filterwarnings("ignore")
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers, applications
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input

tf.random.set_random_seed(42)

IMAGES = 20

print('\nGenerating image data...')
t = time()
images = np.random.random((IMAGES, 224, 224, 3))
labels = np.random.randint(0, 9, (IMAGES, 1))
print(f'It took {(time()-t):.2f} s.\n')

input_tensor = Input(shape=(224, 224, 3))
base_model = applications.ResNet50(include_top=False,
                                   input_tensor=input_tensor)
x = GlobalAveragePooling2D()(base_model.output)
final_output = Dense(10, activation='softmax')(x)
model = Model(input_tensor, final_output)

model.compile(optimizer=optimizers.Adam(lr=1e-3),
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])
print(f'Training 5 epochs of 10-class ResNet50 on ({IMAGES}, 224, 224, 3)...')
t = time()
model.fit(images, labels, batch_size=2, epochs=5)
print(f'Training took {(time()-t):.2f} s.\n')
Beispiel #24
0
                  size_filter2,
                  padding='same',
                  activation='relu'))
cnn.add(
    Convolution2D(filtrosConv3,
                  size_filter2,
                  padding='same',
                  activation='relu'))
cnn.add(MaxPool2D(pool_size=size_pool))
cnn.add(Flatten())
cnn.add(Dense(256, activation='relu'))
cnn.add(Dropout(0.5))

cnn.add(Dense(nums_class, activation='softmax'))
cnn.compile(loss='categorical_crossentropy',
            optimizer=optimizers.Adam(lr=lr),
            metrics=['accuracy'])

history = cnn.fit_generator(train_gen,
                            epochs=epochs,
                            validation_data=val_gen,
                            use_multiprocessing=True)

#Visualization of results
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
        train_test_split(x_train, t_train, test_size=0.2)

    x_train = pad_sequences(x_train, maxlen=maxlen, padding='pre')
    x_val = pad_sequences(x_val, maxlen=maxlen, padding='pre')
    x_test = pad_sequences(x_test, maxlen=maxlen, padding='pre')

    '''
    2. モデルの構築
    '''
    model = BiRNN(num_words, 128)

    '''
    3. モデルの学習
    '''
    criterion = losses.BinaryCrossentropy()
    optimizer = optimizers.Adam(learning_rate=0.001,
                                beta_1=0.9, beta_2=0.999, amsgrad=True)
    train_loss = metrics.Mean()
    train_acc = metrics.BinaryAccuracy()
    val_loss = metrics.Mean()
    val_acc = metrics.BinaryAccuracy()

    def compute_loss(t, y):
        return criterion(t, y)

    def train_step(x, t):
        with tf.GradientTape() as tape:
            preds = model(x)
            loss = compute_loss(t, preds)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss(loss)
Beispiel #26
0
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))
model.add(
    layers.Conv3D(64, kernel_size=(3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.SpatialDropout3D(0.2))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dense(NUM_LABELS, activation='softmax'))

model.summary()

model.compile(loss=losses.sparse_categorical_crossentropy,
              optimizer=optimizers.Adam(lr=learning_rate, beta_1=0.4, beta_2=0.8), metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=batch_size, epochs=no_epochs, validation_split=0.1)

X_test = np.load(os.path.join(SUBJ_DIR, 'test_features.npy'), allow_pickle=True)
X_test = np.transpose(X_test, (0, 2, 3, 4, 1))
y_test = np.load(os.path.join(SUBJ_DIR, 'test_labels.npy'), allow_pickle=True)
y_test = le.transform(y_test)

print('Test data is loaded: %s %s' % (X_test.shape, y_test.shape))

pred = model.predict(X_test)
print(accuracy(np.argmax(pred, axis=1), y_test))

plt.imshow(X_train[0, :, :, 20, 0])
plt.show()
plt.imshow(X_train[0, :, 20, :, 0])
Beispiel #27
0

def preprocess(x, y):

    x = tf.convert_to_tensor(x, dtype=tf.float32)
    y = tf.convert_to_tensor(y, dtype=tf.int32)

    return x, y


model = Sequential([layers.Dense(3, activation=tf.nn.leaky_relu)])
# model.build(input_shape=[None, 13])
# print(model.trainable_variables[0])
# print(model.trainable_variables[1])

optimizer = optimizers.Adam(lr=1e-3)

batchsz = 20
attribute, label = load_data('D:\\Jupyter\\数值分析课设2\\wine.csv')
num = int(len(attribute) * 0.8)

idx = tf.range(len(attribute))
idx = tf.random.shuffle(idx)

x_train, y_train = tf.gather(attribute, idx[:num]), tf.gather(label, idx[:num])
x_test, y_test = tf.gather(attribute, idx[num:]), tf.gather(label, idx[num:])

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
Beispiel #28
0
def run(args):
    split_on = "none" if args.split_on is (None or "none") else args.split_on

    # Create project dir (if it doesn't exist)
    # import ipdb; ipdb.set_trace()
    prjdir = cfg.MAIN_PRJDIR / args.prjname
    os.makedirs(prjdir, exist_ok=True)

    # Create outdir (using the loaded hyperparamters) or
    # use content (model) from an existing run
    fea_strs = ["use_tile"]
    args_dict = vars(args)
    fea_names = "_".join(
        [k.split("use_")[-1] for k in fea_strs if args_dict[k] is True])
    prm_file_path = prjdir / f"params_{fea_names}.json"
    if prm_file_path.exists() is False:
        shutil.copy(
            fdir / f"../default_params/default_params_{fea_names}.json",
            prm_file_path)
    params = Params(prm_file_path)

    if args.rundir is not None:
        outdir = Path(args.rundir).resolve()
        assert outdir.exists(), f"The {outdir} doen't exist."
        print_fn = print
    else:
        # outdir = create_outdir(prjdir, args)
        outdir = prjdir / f"{params.base_image_model}_finetuned"

        # Save hyper-parameters
        params.save(outdir / "params.json")

        # Logger
        lg = Logger(outdir / "logger.log")
        print_fn = get_print_func(lg.logger)
        print_fn(f"File path: {fdir}")
        print_fn(f"\n{pformat(vars(args))}")

    # Load dataframe (annotations)
    annotations_file = cfg.DATA_PROCESSED_DIR / args.dataname / cfg.SF_ANNOTATIONS_FILENAME
    dtype = {"image_id": str, "slide": str}
    data = pd.read_csv(annotations_file,
                       dtype=dtype,
                       engine="c",
                       na_values=["na", "NaN"],
                       low_memory=True)
    # data = data.astype({"image_id": str, "slide": str})
    print_fn(data.shape)

    # print_fn("\nFull dataset:")
    # if args.target[0] == "Response":
    #     print_groupby_stat_rsp(data, split_on="Group", print_fn=print_fn)
    # else:
    #     print_groupby_stat_ctype(data, split_on="Group", print_fn=print_fn)
    print_groupby_stat_ctype(data, split_on="Group", print_fn=print_fn)

    # Drop slide dups
    fea_columns = ["slide"]
    data = data.drop_duplicates(subset=fea_columns)

    # Aggregate non-responders to balance the responders
    # import ipdb; ipdb.set_trace()
    # n_samples = data["ctype"].value_counts().min()
    n_samples = 30
    dfs = []
    for ctype, count in data['ctype'].value_counts().items():
        aa = data[data.ctype == ctype]
        if aa.shape[0] > n_samples:
            aa = aa.sample(n=n_samples)
        dfs.append(aa)
    data = pd.concat(dfs, axis=0).reset_index(drop=True)
    print_groupby_stat_ctype(data, split_on="Group", print_fn=print_fn)

    te_size = 0.15
    itr, ite = train_test_split(np.arange(data.shape[0]),
                                test_size=te_size,
                                shuffle=True,
                                stratify=data["ctype_label"].values)
    tr_meta_ = data.iloc[itr, :].reset_index(drop=True)
    te_meta = data.iloc[ite, :].reset_index(drop=True)

    vl_size = 0.10
    itr, ivl = train_test_split(np.arange(tr_meta_.shape[0]),
                                test_size=vl_size,
                                shuffle=True,
                                stratify=tr_meta_["ctype_label"].values)
    tr_meta = tr_meta_.iloc[itr, :].reset_index(drop=True)
    vl_meta = tr_meta_.iloc[ivl, :].reset_index(drop=True)

    print_groupby_stat_ctype(tr_meta, split_on="Group", print_fn=print_fn)
    print_groupby_stat_ctype(vl_meta, split_on="Group", print_fn=print_fn)
    print_groupby_stat_ctype(te_meta, split_on="Group", print_fn=print_fn)

    print_fn(tr_meta.shape)
    print_fn(vl_meta.shape)
    print_fn(te_meta.shape)

    # Determine tfr_dir (the path to TFRecords)
    tfr_dir = (cfg.DATADIR / args.tfr_dir_name).resolve()
    pred_tfr_dir = (cfg.DATADIR / args.pred_tfr_dir_name).resolve()
    label = f"{params.tile_px}px_{params.tile_um}um"
    tfr_dir = tfr_dir / label
    pred_tfr_dir = pred_tfr_dir / label

    # Scalers for each feature set
    ge_scaler, dd1_scaler, dd2_scaler = None, None, None

    ge_cols = [c for c in data.columns if c.startswith("ge_")]
    dd1_cols = [c for c in data.columns if c.startswith("dd1_")]
    dd2_cols = [c for c in data.columns if c.startswith("dd2_")]

    if args.scale_fea:
        if args.use_ge and len(ge_cols) > 0:
            ge_scaler = get_scaler(data[ge_cols])
        if args.use_dd1 and len(dd1_cols) > 0:
            dd1_scaler = get_scaler(data[dd1_cols])
        if args.use_dd2 and len(dd2_cols) > 0:
            dd2_scaler = get_scaler(data[dd2_cols])

    # --------------------------
    # Obtain T/V/E tfr filenames
    # --------------------------
    # List of sample names for T/V/E
    tr_smp_names = list(tr_meta[args.id_name].values)
    vl_smp_names = list(vl_meta[args.id_name].values)
    te_smp_names = list(te_meta[args.id_name].values)

    # TFRecords filenames
    train_tfr_files = get_tfr_files(tfr_dir, tr_smp_names)
    val_tfr_files = get_tfr_files(tfr_dir, vl_smp_names)
    if args.eval is True:
        assert pred_tfr_dir.exists(), f"Dir {pred_tfr_dir} is not found."
        # test_tfr_files = get_tfr_files(tfr_dir, te_smp_names)  # use same tfr_dir for eval
        test_tfr_files = get_tfr_files(pred_tfr_dir, te_smp_names)
        # print_fn("Total samples {}".format(len(train_tfr_files) + len(val_tfr_files) + len(test_tfr_files)))

    assert sorted(tr_smp_names) == sorted(tr_meta[args.id_name].values.tolist(
    )), "Sample names in the tr_smp_names and tr_meta don't match."
    assert sorted(vl_smp_names) == sorted(vl_meta[args.id_name].values.tolist(
    )), "Sample names in the vl_smp_names and vl_meta don't match."
    assert sorted(te_smp_names) == sorted(te_meta[args.id_name].values.tolist(
    )), "Sample names in the te_smp_names and te_meta don't match."

    # -------------------------------
    # Class weight
    # -------------------------------
    tile_cnts = pd.read_csv(tfr_dir / "tile_counts_per_slide.csv")
    tile_cnts.insert(
        loc=0,
        column="tfr_abs_fname",
        value=tile_cnts["tfr_fname"].map(lambda s: str(tfr_dir / s)))
    cat = tile_cnts[tile_cnts["tfr_abs_fname"].isin(train_tfr_files)]

    # import ipdb; ipdb.set_trace()
    ### ap --------------
    # if args.target[0] not in cat.columns:
    #     tile_cnts = tile_cnts[tile_cnts["smp"].isin(tr_meta["smp"])]
    df = tr_meta[["smp", args.target[0]]]
    cat = cat.merge(df, on="smp", how="inner")
    ### ap --------------

    cat = cat.groupby(args.target[0]).agg({
        "smp": "nunique",
        "max_tiles": "sum",
        "n_tiles": "sum",
        "slide": "nunique"
    }).reset_index()
    categories = {}
    for i, row_data in cat.iterrows():
        dct = {
            "num_samples": row_data["smp"],
            "num_tiles": row_data["n_tiles"]
        }
        categories[row_data[args.target[0]]] = dct

    class_weight = calc_class_weights(
        train_tfr_files,
        class_weights_method=params.class_weights_method,
        categories=categories)

    # --------------------------
    # Build tf.data objects
    # --------------------------
    tf.keras.backend.clear_session()

    # import ipdb; ipdb.set_trace()
    if args.use_tile:

        # -------------------------------
        # Parsing funcs
        # -------------------------------
        # import ipdb; ipdb.set_trace()
        if args.target[0] == "Response":
            # Response
            parse_fn = parse_tfrec_fn_rsp
            parse_fn_train_kwargs = {
                "use_tile": args.use_tile,
                "use_ge": args.use_ge,
                "use_dd1": args.use_dd1,
                "use_dd2": args.use_dd2,
                "ge_scaler": ge_scaler,
                "dd1_scaler": dd1_scaler,
                "dd2_scaler": dd2_scaler,
                "id_name": args.id_name,
                "augment": params.augment,
                "application": params.base_image_model,
                # "application": None,
            }
        else:
            # Ctype
            parse_fn = parse_tfrec_fn_ctype
            parse_fn_train_kwargs = {
                "use_tile": args.use_tile,
                "use_ge": args.use_ge,
                "ge_scaler": ge_scaler,
                "id_name": args.id_name,
                "augment": params.augment,
                "target": args.target[0]
            }

        parse_fn_non_train_kwargs = parse_fn_train_kwargs.copy()
        parse_fn_non_train_kwargs["augment"] = False

        # ----------------------------------------
        # Number of tiles/examples in each dataset
        # ----------------------------------------
        # import ipdb; ipdb.set_trace()
        tr_tiles = tile_cnts[tile_cnts[args.id_name].isin(
            tr_smp_names)]["n_tiles"].sum()
        vl_tiles = tile_cnts[tile_cnts[args.id_name].isin(
            vl_smp_names)]["n_tiles"].sum()
        te_tiles = tile_cnts[tile_cnts[args.id_name].isin(
            te_smp_names)]["n_tiles"].sum()

        eval_batch_size = 4 * params.batch_size
        tr_steps = tr_tiles // params.batch_size
        vl_steps = vl_tiles // eval_batch_size
        te_steps = te_tiles // eval_batch_size

        # -------------------------------
        # Create TF datasets
        # -------------------------------
        print("\nCreating TF datasets.")

        # Training
        # import ipdb; ipdb.set_trace()
        train_data = create_tf_data(
            batch_size=params.batch_size,
            deterministic=False,
            include_meta=False,
            interleave=True,
            n_concurrent_shards=params.n_concurrent_shards,  # 32, 64
            parse_fn=parse_fn,
            prefetch=1,  # 2
            repeat=True,
            seed=None,  # cfg.seed,
            shuffle_files=True,
            shuffle_size=params.shuffle_size,  # 8192
            tfrecords=train_tfr_files,
            **parse_fn_train_kwargs)

        # Determine feature shapes from data
        bb = next(train_data.__iter__())

        # Infer dims of features from the data
        # import ipdb; ipdb.set_trace()
        if args.use_ge:
            ge_shape = bb[0]["ge_data"].numpy().shape[1:]
        else:
            ge_shape = None

        if args.use_dd1:
            dd_shape = bb[0]["dd1_data"].numpy().shape[1:]
        else:
            dd_shape = None

        # Print keys and dims
        for i, item in enumerate(bb):
            print(f"\nItem {i}")
            if isinstance(item, dict):
                for k in item.keys():
                    print(f"\t{k}: {item[k].numpy().shape}")
            elif isinstance(item.numpy(), np.ndarray):
                print(item)

        # Evaluation (val, test, train)
        create_tf_data_eval_kwargs = {
            "batch_size": eval_batch_size,
            "include_meta": False,
            "interleave": False,
            "parse_fn": parse_fn,
            "prefetch": None,  # 2
            "repeat": False,
            "seed": None,
            "shuffle_files": False,
            "shuffle_size": None,
        }

        # import ipdb; ipdb.set_trace()
        create_tf_data_eval_kwargs.update({
            "tfrecords": val_tfr_files,
            "include_meta": False
        })
        val_data = create_tf_data(**create_tf_data_eval_kwargs,
                                  **parse_fn_non_train_kwargs)

    # ----------------------
    # Prep for training
    # ----------------------
    # import ipdb; ipdb.set_trace()

    # -------------
    # Train model
    # -------------
    model = None

    # import ipdb; ipdb.set_trace()
    if args.train is True:

        # Callbacks list
        monitor = "val_loss"
        callbacks = keras_callbacks(outdir,
                                    monitor=monitor,
                                    save_best_only=params.save_best_only,
                                    patience=params.patience)

        # Mixed precision
        if params.use_fp16:
            print_fn("\nTrain with mixed precision")
            if int(tf.keras.__version__.split(".")[1]) == 4:  # TF 2.4
                from tensorflow.keras import mixed_precision
                policy = mixed_precision.Policy("mixed_float16")
                mixed_precision.set_global_policy(policy)
            elif int(tf.keras.__version__.split(".")[1]) == 3:  # TF 2.3
                from tensorflow.keras.mixed_precision import experimental as mixed_precision
                policy = mixed_precision.Policy("mixed_float16")
                mixed_precision.set_policy(policy)
            print_fn("Compute dtype: %s" % policy.compute_dtype)
            print_fn("Variable dtype: %s" % policy.variable_dtype)

        # ----------------------
        # Define model
        # ----------------------
        # import ipdb; ipdb.set_trace()

        from tensorflow.keras.layers import Input, Dense, Dropout, Activation, BatchNormalization
        from tensorflow.keras import layers
        from tensorflow.keras import losses
        from tensorflow.keras import optimizers
        from tensorflow.keras.models import Sequential, Model, load_model

        # trainable = True
        trainable = False
        # from_logits = True
        from_logits = False
        fit_verbose = 1
        pretrain = params.pretrain
        pooling = params.pooling
        n_classes = len(sorted(tr_meta[args.target[0]].unique()))

        model_inputs = []
        merge_inputs = []

        if args.use_tile:
            image_shape = (cfg.IMAGE_SIZE, cfg.IMAGE_SIZE, 3)
            tile_input_tensor = tf.keras.Input(shape=image_shape,
                                               name="tile_image")

            base_img_model = tf.keras.applications.Xception(include_top=False,
                                                            weights=pretrain,
                                                            input_shape=None,
                                                            input_tensor=None,
                                                            pooling=pooling)

            print_fn(
                f"\nNumber of layers in the base image model ({params.base_image_model}): {len(base_img_model.layers)}"
            )
            print_fn("Trainable variables: {}".format(
                len(base_img_model.trainable_variables)))
            print_fn("Shape of trainable variables at {}: {}".format(
                0, base_img_model.trainable_variables[0].shape))
            print_fn("Shape of trainable variables at {}: {}".format(
                -1, base_img_model.trainable_variables[-1].shape))

            print_fn("\nFreeze base model.")
            base_img_model.trainable = trainable  # Freeze the base_img_model
            print_fn("Trainable variables: {}".format(
                len(base_img_model.trainable_variables)))

            print_fn("\nPrint some layers")
            print_fn("Name of layer {}: {}".format(
                0, base_img_model.layers[0].name))
            print_fn("Name of layer {}: {}".format(
                -1, base_img_model.layers[-1].name))

            # training=False makes the base model to run in inference mode so
            # that batchnorm layers are not updated during the fine-tuning stage.
            # x_tile = base_img_model(tile_input_tensor)
            x_tile = base_img_model(tile_input_tensor, training=False)
            # x_tile = base_img_model(tile_input_tensor, training=trainable)
            model_inputs.append(tile_input_tensor)

            # x_tile = Dense(params.dense1_img, activation=tf.nn.relu, name="dense1_img")(x_tile)
            # x_tile = Dense(params.dense2_img, activation=tf.nn.relu, name="dense2_img")(x_tile)
            # x_tile = BatchNormalization(name="batchnorm_im")(x_tile)
            merge_inputs.append(x_tile)
            del tile_input_tensor, x_tile

        # Merge towers
        if len(merge_inputs) > 1:
            mm = layers.Concatenate(axis=1, name="merger")(merge_inputs)
        else:
            mm = merge_inputs[0]

        # Dense layers of the top classfier
        mm = Dense(params.dense1_top, activation=tf.nn.relu,
                   name="dense1_top")(mm)
        # mm = BatchNormalization(name="batchnorm_top")(mm)
        # mm = Dropout(params.dropout1_top)(mm)

        # Output
        output = Dense(n_classes, activation=tf.nn.relu, name="logits")(mm)
        if from_logits is False:
            output = Activation(tf.nn.softmax, dtype="float32",
                                name="softmax")(output)

        # Assemble final model
        model = Model(inputs=model_inputs, outputs=output)

        metrics = [
            tf.keras.metrics.SparseCategoricalAccuracy(name="CatAcc"),
            tf.keras.metrics.SparseCategoricalCrossentropy(
                from_logits=from_logits, name="CatCrossEnt")
        ]

        if params.optimizer == "SGD":
            optimizer = optimizers.SGD(learning_rate=params.learning_rate,
                                       momentum=0.9,
                                       nesterov=True)
        elif params.optimizer == "Adam":
            optimizer = optimizers.Adam(learning_rate=params.learning_rate)

        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=from_logits)

        model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        # import ipdb; ipdb.set_trace()
        print_fn("\nBase model")
        base_img_model.summary(print_fn=print_fn)
        print_fn("\nFull model")
        model.summary(print_fn=print_fn)
        print_fn("Trainable variables: {}".format(
            len(model.trainable_variables)))

        print_fn(f"Train steps:      {tr_steps}")
        # print_fn(f"Validation steps: {vl_steps}")

        # ------------
        # Train
        # ------------
        import ipdb
        ipdb.set_trace()
        # tr_steps = 10  # tr_tiles // params.batch_size // 15  # for debugging
        print_fn("\n{}".format(yellow("Train")))
        timer = Timer()
        history = model.fit(x=train_data,
                            validation_data=val_data,
                            steps_per_epoch=tr_steps,
                            validation_steps=vl_steps,
                            class_weight=class_weight,
                            epochs=params.epochs,
                            verbose=fit_verbose,
                            callbacks=callbacks)
        # del train_data, val_data
        timer.display_timer(print_fn)
        plot_prfrm_metrics(history,
                           title="Train stage",
                           name="tn",
                           outdir=outdir)
        model = load_best_model(outdir)  # load best model

        # Save trained model
        print_fn("\nSave trained model.")
        model.save(outdir / "best_model_trained")

        create_tf_data_eval_kwargs.update({
            "tfrecords": test_tfr_files,
            "include_meta": True
        })
        test_data = create_tf_data(**create_tf_data_eval_kwargs,
                                   **parse_fn_non_train_kwargs)

        # Calc hits
        te_tile_preds = calc_tile_preds(test_data, model=model, outdir=outdir)
        te_tile_preds = te_tile_preds.sort_values(["image_id", "tile_id"],
                                                  ascending=True)
        hits_tn = calc_hits(te_tile_preds, te_meta)
        hits_tn.to_csv(outdir / "hits_tn.csv", index=False)

        # ------------
        # Finetune
        # ------------
        # import ipdb; ipdb.set_trace()
        print_fn("\n{}".format(green("Finetune")))
        unfreeze_top_layers = 50
        # Unfreeze layers of the base model
        for layer in base_img_model.layers[-unfreeze_top_layers:]:
            layer.trainable = True
            print_fn("{}: (trainable={})".format(layer.name, layer.trainable))
        print_fn("Trainable variables: {}".format(
            len(model.trainable_variables)))

        model.compile(
            loss=loss,
            optimizer=optimizers.Adam(learning_rate=params.learning_rate / 10),
            metrics=metrics)

        callbacks = keras_callbacks(outdir,
                                    monitor=monitor,
                                    save_best_only=params.save_best_only,
                                    patience=params.patience,
                                    name="finetune")

        total_epochs = history.epoch[-1] + params.finetune_epochs
        timer = Timer()
        history_fn = model.fit(x=train_data,
                               validation_data=val_data,
                               steps_per_epoch=tr_steps,
                               validation_steps=vl_steps,
                               class_weight=class_weight,
                               epochs=total_epochs,
                               initial_epoch=history.epoch[-1] + 1,
                               verbose=fit_verbose,
                               callbacks=callbacks)
        del train_data, val_data
        plot_prfrm_metrics(history_fn,
                           title="Finetune stage",
                           name="fn",
                           outdir=outdir)
        timer.display_timer(print_fn)

        # Save trained model
        print_fn("\nSave finetuned model.")
        model.save(outdir / "best_model_finetuned")
        base_img_model.save(outdir / "best_model_img_base_finetuned")

    if args.eval is True:

        print_fn("\n{}".format(bold("Test set predictions.")))
        timer = Timer()
        # import ipdb; ipdb.set_trace()
        te_tile_preds = calc_tile_preds(test_data, model=model, outdir=outdir)
        te_tile_preds = te_tile_preds.sort_values(["image_id", "tile_id"],
                                                  ascending=True)
        te_tile_preds.to_csv(outdir / "te_tile_preds.csv", index=False)
        # print(te_tile_preds[["image_id", "tile_id", "y_true", "y_pred_label", "prob"]][:20])
        # print(te_tile_preds.iloc[:20, 1:])
        del test_data

        # Calc hits
        hits_fn = calc_hits(te_tile_preds, te_meta)
        hits_fn.to_csv(outdir / "hits_fn.csv", index=False)

        # import ipdb; ipdb.set_trace()
        roc_auc = {}
        import matplotlib.pyplot as plt
        from sklearn.metrics import roc_curve, auc
        fig, ax = plt.subplots(figsize=(8, 6))
        for true in range(0, n_classes):
            if true in te_tile_preds["y_true"].values:
                fpr, tpr, thresh = roc_curve(te_tile_preds["y_true"],
                                             te_tile_preds["prob"],
                                             pos_label=true)
                roc_auc[i] = auc(fpr, tpr)
                plt.plot(fpr,
                         tpr,
                         linestyle='--',
                         label=f"Class {true} vs Rest")
            else:
                roc_auc[i] = None

        # plt.plot([0,0], [1,1], '--', label="Random")
        plt.title("Multiclass ROC Curve")
        plt.xlabel("FPR")
        plt.ylabel("TPR")
        plt.legend(loc="best")
        plt.savefig(outdir / "Multiclass ROC", dpi=70)

        # Avergae precision score
        from sklearn.metrics import average_precision_score
        y_true_vec = te_tile_preds.y_true.values
        y_true_onehot = np.zeros((y_true_vec.size, n_classes))
        y_true_onehot[np.arange(y_true_vec.size), y_true_vec] = 1
        y_probs = te_tile_preds[[
            c for c in te_tile_preds.columns if "prob_" in c
        ]]
        print_fn("\nAvearge precision")
        print_fn("Micro    {}".format(
            average_precision_score(y_true_onehot, y_probs, average="micro")))
        print_fn("Macro    {}".format(
            average_precision_score(y_true_onehot, y_probs, average="macro")))
        print_fn("Wieghted {}".format(
            average_precision_score(y_true_onehot, y_probs,
                                    average="weighted")))
        print_fn("Samples  {}".format(
            average_precision_score(y_true_onehot, y_probs,
                                    average="samples")))

        import ipdb
        ipdb.set_trace()
        agg_method = "mean"
        # agg_by = "smp"
        agg_by = "image_id"
        smp_preds = agg_tile_preds(te_tile_preds,
                                   agg_by=agg_by,
                                   meta=te_meta,
                                   agg_method=agg_method)

        timer.display_timer(print_fn)

    lg.close_logger()
Beispiel #29
0
                                 frequency=100)
    }
    if isinstance(layer, tf.keras.layers.Dense):
        return sparsity.prune_low_magnitude(
            layer, **pruning_params
        )  # note: can pass a whole model instead of individual layers to prune_low_magnitude if desired
    return layer


pruned_model = tf.keras.models.clone_model(
    model,
    clone_function=apply_pruning_to_dense,
)
adam = optimizers.Adam(learning_rate=lr,
                       beta_1=0.9,
                       beta_2=0.99,
                       epsilon=None,
                       decay=1e-5,
                       amsgrad=False)
pruned_model.compile(optimizer=adam,
                     loss='categorical_crossentropy',
                     metrics=[
                         TopKCategoricalAccuracy(k=1, name='accuracy'),
                         TopKCategoricalAccuracy(k=3, name='top3_accuracy'),
                         TopKCategoricalAccuracy(k=5, name='top5_accuracy')
                     ])

# Define callbacks
model_folder = os.path.dirname(model_path)
model_filename = os.path.basename(model_path)
output_filename = model_filename[:model_filename.index(
    '.hdf5')] + '_prune' + str(sparsity_target) + '.hdf5'
Beispiel #30
0
ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).batch(batchsz)

print('datasets_pre:', x.shape, y.shape, x.min(), x.max())

network = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10)
])
network.build(input_shape=(None, 28 * 28))
network.summary()

optimizer = optimizers.Adam(lr=0.01)

acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()

for step, (x, y) in enumerate(db):

    with tf.GradientTape() as tape:
        # [b, 28, 28] => [b, 784]
        x = tf.reshape(x, (-1, 28 * 28))
        # [b, 784] => [b, 10]
        out = network(x)
        # [b] => [b, 10]
        y_onehot = tf.one_hot(y, depth=10)
        # [b]
        loss = tf.reduce_mean(