Example #1
0
def run(base_path, gpu_mon, num_GPUs, continue_training, force_GPU, just_one,
        no_val, no_images, debug, wait_for, logger, train_images_per_epoch,
        val_images_per_epoch, **kwargs):

    from MultiPlanarUNet.train import Trainer, YAMLHParams
    from MultiPlanarUNet.models import model_initializer
    from MultiPlanarUNet.preprocessing import get_preprocessing_func

    # Read in hyperparameters from YAML file
    hparams = YAMLHParams(base_path + "/train_hparams.yaml", logger=logger)
    validate_hparams(hparams)

    # Wait for PID?
    if wait_for:
        from MultiPlanarUNet.utils import await_PIDs
        await_PIDs(wait_for)

    # Prepare Sequence generators and potential model specific hparam changes
    f = get_preprocessing_func(hparams["build"].get("model_class_name"))
    train, val, hparams = f(hparams,
                            logger=logger,
                            just_one=just_one,
                            no_val=no_val,
                            continue_training=continue_training,
                            base_path=base_path)

    if gpu_mon:
        # Wait for free GPU
        if not force_GPU:
            gpu_mon.await_and_set_free_GPU(N=num_GPUs, sleep_seconds=120)
        else:
            gpu_mon.set_GPUs = force_GPU
            num_GPUs = len(force_GPU.split(","))
        gpu_mon.stop()

    # Build new model (or continue training an existing one)
    org_model = model_initializer(hparams, continue_training, base_path,
                                  logger)

    # Initialize weights in final layer?
    if not continue_training and hparams["build"].get("biased_output_layer"):
        from MultiPlanarUNet.utils.utils import set_bias_weights_on_all_outputs
        set_bias_weights_on_all_outputs(org_model, train, hparams, logger)

    # Multi-GPU?
    if num_GPUs > 1:
        from tensorflow.keras.utils import multi_gpu_model
        model = multi_gpu_model(org_model,
                                gpus=num_GPUs,
                                cpu_merge=False,
                                cpu_relocation=False)
        logger("Creating multi-GPU model: N=%i" % num_GPUs)
    else:
        model = org_model

    # Init trainer
    trainer = Trainer(model, logger=logger)
    trainer.org_model = org_model

    # Compile model
    trainer.compile_model(n_classes=hparams["build"].get("n_classes"),
                          **hparams["fit"])

    # Debug mode?
    if debug:
        from tensorflow.python import debug as tfdbg
        from tensorflow.keras import backend as k
        k.set_session(tfdbg.LocalCLIDebugWrapperSession(k.get_session()))

    # Fit the model
    _ = trainer.fit(train=train,
                    val=val,
                    train_im_per_epoch=train_images_per_epoch,
                    val_im_per_epoch=val_images_per_epoch,
                    hparams=hparams,
                    no_im=no_images,
                    **hparams["fit"])

    # Save final model weights (usually not used, but maybe....?)
    if not os.path.exists("%s/model" % base_path):
        os.mkdir("%s/model" % base_path)
    model_path = "%s/model/model_weights.h5" % base_path
    logger("Saving current model to: %s" % model_path)
    org_model.save_weights(model_path)

    # Plot learning curves
    from MultiPlanarUNet.utils.plotting import plot_training_curves
    try:
        plot_training_curves(os.path.join(base_path, "logs", "training.csv"),
                             os.path.join(base_path, "logs",
                                          "learning_curve.png"),
                             logy=True)
    except Exception as e:
        logger("Could not plot learning curves due to error:")
        logger(e)
Example #2
0
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'


def weighted_mse(y_true, y_pred):
    return K.mean(K.square((y_pred - y_true)), axis=-1)


sess = tf.Session()
K.set_session(sess)

nonlin_idx_all = []
np.random.seed(11)

rg1 = np.array([0.02, 1.5, 0.2, 0.5, 1 / 3, 25, 2])
rg2 = np.array([0.03, 3, 0.5, 1, 2 / 3, 3000, 6])

rs = (rg2 - rg1)[np.newaxis, :]

#rg2[5] = 3000
num = 1000000
para_pre = np.random.rand(num, 7)
para_cand = rg1 + para_pre * (rg2 - rg1)

name = 'DNN_800_200_sigmoid_Adam_Tsample20000_Epoch20000_X7d_Y2d__20191129-032138_trn_ 0.0491_tst_ 0.0850'
        bio = io.BytesIO()
        with h5py.File(bio) as f:
            model.save(f)
        return bio.getvalue()


    def deserialize_model(model_bytes, load_model_fn):
        """Deserialize model from byte array."""
        bio = io.BytesIO(model_bytes)
        with h5py.File(bio) as f:
            return load_model_fn(f, custom_objects=CUSTOM_OBJECTS)


    # Do not use GPU for the session creation.
    config = tf.ConfigProto(device_count={'GPU': 0})
    K.set_session(tf.Session(config=config))

    # Build the model.
    inputs = {col: Input(shape=(1,), name=col) for col in all_cols}
    embeddings = [Embedding(len(vocab[col]), 10, input_length=1, name='emb_' + col)(inputs[col])
                  for col in categorical_cols]
    continuous_bn = Concatenate()([Reshape((1, 1), name='reshape_' + col)(inputs[col])
                                   for col in continuous_cols])
    continuous_bn = BatchNormalization()(continuous_bn)
    x = Concatenate()(embeddings + [continuous_bn])
    x = Flatten()(x)
    x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x)
    x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x)
    x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x)
    x = Dense(500, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x)
    x = Dropout(0.5)(x)
Example #4
0
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from tensorflow.keras import backend as K

from math import ceil

import argparse

from nn_utils.load_data import load_data, load_images_and_maps
from nn_utils.utils import listdir_fullpath, get_model_memory_usage
import cv2

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
K.set_session(session)

parser = argparse.ArgumentParser(fromfile_prefix_chars='@',
                                 description='train neural network')
parser.add_argument('--images', help='folder with input images', required=True)
parser.add_argument('--maps', help='folder with saliency maps', required=True)
parser.add_argument('--loss', help='loss function', required=True)
parser.add_argument('--optimizer', help='optimizer', required=True)
parser.add_argument('--conv_layers',
                    help='number of conv layers',
                    type=int,
                    default=3)
parser.add_argument('--batch_size', help='batch size', type=int, default=10)
parser.add_argument('--epochs', help='number of epochs', type=int, default=500)
parser.add_argument('--samples',
                    help='number of samples',
 def mutate(self):
     with self.graph.as_default():
         K.set_session(self.sess)
         super().mutate()
Example #6
0
import argparse
import os.path
from data import DataSet
from processor import process_image
from tensorflow.keras.models import load_model

import tensorflow as tf
import tensorflow.keras.backend as KTF

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  #dynamic alloc GPU resource
config.gpu_options.per_process_gpu_memory_fraction = 0.3  #GPU memory threshold 0.3
session = tf.Session(config=config)

# set session
KTF.set_session(session)


def validate_cnn_model(model_file):
    data = DataSet()
    model = load_model(model_file)

    # Get all our test images.
    images = glob.glob(os.path.join('data', 'test_full', '**', '*.jpg'))

    # Count the correct predict
    result_count = 0

    for image in images:
        print('-' * 80)
        # Get a random row.
Example #7
0
def initialize_vars(sess):
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    K.set_session(sess)
Example #8
0
def _init_session():
    from tensorflow.python.keras import backend
    sess = backend.get_session()
    tf.get_default_graph()
    set_session(sess)
    return sess
Example #9
0
def run_ddqn(args):
    '''
    run a DDQN training session, or test it's result, with the donkey simulator
    '''

    # only needed if TF==1.13.1
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    # Construct gym environment. Starts the simulator if path is given.
    env = gym.make(args.env_name, exe_path=args.sim, port=args.port)

    # not working on windows...
    def signal_handler(signal, frame):
        print("catching ctrl+c")
        env.unwrapped.close()
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGABRT, signal_handler)

    # Get size of state and action from environment
    state_size = (img_rows, img_cols, img_channels)
    action_space = env.action_space  # Steering and Throttle

    try:
        agent = DQNAgent(state_size, action_space, train=not args.test)

        throttle = args.throttle  # Set throttle as constant value

        episodes = []

        if os.path.exists(args.model):
            print("load the saved model")
            agent.load_model(args.model)

        for e in range(EPISODES):

            print("Episode: ", e)

            done = False
            obs = env.reset()

            episode_len = 0

            x_t = agent.process_image(obs)

            s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
            # In Keras, need to reshape
            s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1],
                              s_t.shape[2])  #1*80*80*4

            while not done:

                # Get action for the current state and go one step in environment
                steering = agent.get_action(s_t)
                action = [steering, throttle]
                next_obs, reward, done, info = env.step(action)

                x_t1 = agent.process_image(next_obs)

                x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1],
                                    1)  #1x80x80x1
                s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)  #1x80x80x4

                # Save the sample <s, a, r, s'> to the replay memory
                agent.replay_memory(s_t, np.argmax(linear_bin(steering)),
                                    reward, s_t1, done)
                agent.update_epsilon()

                if agent.train:
                    agent.train_replay()

                s_t = s_t1
                agent.t = agent.t + 1
                episode_len = episode_len + 1
                if agent.t % 30 == 0:
                    print("EPISODE", e, "TIMESTEP", agent.t, "/ ACTION",
                          action, "/ REWARD", reward, "/ EPISODE LENGTH",
                          episode_len, "/ Q_MAX ", agent.max_Q)

                if done:

                    # Every episode update the target model to be same with model
                    agent.update_target_model()

                    episodes.append(e)

                    # Save model for each episode
                    if agent.train:
                        agent.save_model(args.model)

                    print("episode:", e, "  memory length:", len(agent.memory),
                          "  epsilon:", agent.epsilon, " episode length:",
                          episode_len)

    except KeyboardInterrupt:
        print("stopping run...")
    finally:
        env.unwrapped.close()
Example #10
0
def predict(args):
    """Inference program."""
    logger_level = logging.getLogger(__package__).level
    if logger_level > logging.DEBUG:
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

    import tensorflow as tf
    from tensorflow.keras import backend as K

    args.regions = medaka.common.get_regions(args.bam,
                                             region_strs=args.regions)
    logger = medaka.common.get_named_logger('Predict')
    logger.info('Processing region(s): {}'.format(' '.join(
        str(r) for r in args.regions)))

    # create output and copy meta
    with medaka.datastore.DataStore(args.model) as ds:
        ds.copy_meta(args.output)
        feature_encoder = ds.get_meta('feature_encoder')

    feature_encoder.tag_name = args.tag_name
    feature_encoder.tag_value = args.tag_value
    feature_encoder.tag_keep_missing = args.tag_keep_missing

    logger.info("Setting tensorflow threads to {}.".format(args.threads))
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    K.set_session(
        tf.Session(
            config=tf.ConfigProto(intra_op_parallelism_threads=args.threads,
                                  inter_op_parallelism_threads=args.threads)))
    if tf.test.is_gpu_available(cuda_only=True):
        logger.info("Found a GPU.")
        logger.info(
            "If cuDNN errors are observed, try setting the environment "
            "variable `TF_FORCE_GPU_ALLOW_GROWTH=true`. To explicitely "
            "disable use of cuDNN use the commandline option "
            "`--disable_cudnn. If OOM (out of memory) errors are found "
            "please reduce batch size.")

    # Split overly long regions to maximum size so as to not create
    #   massive feature matrices
    MAX_REGION_SIZE = int(1e6)  # 1Mb
    regions = []
    for region in args.regions:
        if region.size > MAX_REGION_SIZE:
            # chunk_ovlp is mostly used in overlapping pileups (which generally
            # end up being expanded compared to the draft coordinate system)
            regs = region.split(MAX_REGION_SIZE,
                                overlap=args.chunk_ovlp,
                                fixed_size=False)
        else:
            regs = [region]
        regions.extend(regs)

    logger.info("Processing {} long region(s) with batching.".format(
        len(regions)))

    logger.info("Using model: {}.".format(args.model))

    model = medaka.models.load_model(args.model,
                                     time_steps=args.chunk_len,
                                     allow_cudnn=args.allow_cudnn)

    # the returned regions are those where the pileup width is smaller than
    # chunk_len
    remainder_regions = run_prediction(args.output,
                                       args.bam,
                                       regions,
                                       model,
                                       feature_encoder,
                                       args.chunk_len,
                                       args.chunk_ovlp,
                                       batch_size=args.batch_size,
                                       save_features=args.save_features)

    # short/remainder regions: just do things without chunking. We can do this
    # here because we now have the size of all pileups (and know they are
    # small).
    # TODO: can we avoid calculating pileups twice whilst controlling memory?
    if len(remainder_regions) > 0:
        logger.info("Processing {} short region(s).".format(
            len(remainder_regions)))
        model = medaka.models.load_model(args.model,
                                         time_steps=None,
                                         allow_cudnn=args.allow_cudnn)
        for region in remainder_regions:
            new_remainders = run_prediction(
                args.output,
                args.bam,
                [region[0]],
                model,
                feature_encoder,
                args.chunk_len,
                args.chunk_ovlp,  # these won't be used
                batch_size=args.batch_size,
                save_features=args.save_features,
                enable_chunking=False)
            if len(new_remainders) > 0:
                # shouldn't get here
                ignored = [x[0] for x in new_remainders]
                n_ignored = len(ignored)
                logger.warning("{} regions were not processed: {}.".format(
                    n_ignored, ignored))

    logger.info("Finished processing all regions.")

    if args.check_output:
        logger.info("Validating and finalising output data.")
        with medaka.datastore.DataStore(args.output, 'a') as ds:
            pass
Example #11
0
 def predict(self, sample):
     with self.lock:
         with graph.as_default():
             if sess is not None:
                 set_session(sess)
             return self.model.predict(sample).flatten()
Example #12
0
from keras.optimizers import Adam
from keras import backend as K

from keras.layers.merge import add
sys.path.append('..')

from utils.modifiedRNN import LSTM1, LSTM2, LSTM3, LSTM_forget
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.keras.backend import set_session
tf.keras.backend.clear_session()  # For easy reset of notebook state.

config_proto = tf.compat.v1.ConfigProto()
off = rewriter_config_pb2.RewriterConfig.OFF
config_proto.graph_options.rewrite_options.arithmetic_optimization = off
session = tf.Session(config=config_proto)
set_session(session)


def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


class brnn_keras(object):
    def __init__(self, args, max_seq_length):
        self.max_seq_length = max_seq_length
        self.args = args
        self.model_init(args)
        self.ctc_init(args)
        self.opt_init(args)
Example #13
0
 def __initialize_vars(self, session):
     session.run(tf.local_variables_initializer())
     session.run(tf.global_variables_initializer())
     session.run(tf.tables_initializer())
     K.set_session(session)
def ctc_lamba_func(args):
    y_pred, labels, input_length, label_length = args
    y_pred = y_pred[:, :, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    return tf.Session(config=config)


if __name__ == '__main__':

    K.set_session(get_session())

    n_class = len(chars) + 1
    print(n_class)
    input_tensor = Input((96, 64, 3))
    x = input_tensor
    conv_shape = x.get_shape()

    x = Conv2D(64, (3, 3), strides=(1, 1), padding="same")(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)

    x = Conv2D(128, (3, 3), strides=(1, 1), padding="same")(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)

    x = Conv2D(256, (3, 3), strides=(1, 1), padding="same")(x)
    x = BatchNormalization()(x)
Example #15
0
#!/usr/bin/env python3
# if version.parse(tf.__version__).release[0] >= 2:
# THIS SEEMS TO WORK IN BOTH tf 2 and 1
from packaging import version
import tensorflow as tf
from tensorflow.keras.backend import set_session
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession, Session
config = ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True  # to log device placement (on which device the operation ran)
# session = InteractiveSession(config=config)
import tensorflow as tf
session = Session(config=config)
set_session(session)  # set this TensorFlow session as the default session for Keras


import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Flatten,  MaxPooling2D, Conv2D
from tensorflow.keras.callbacks import TensorBoard

(X_train,y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000,28,28,1).astype('float32')
X_test = X_test.reshape(10000,28,28,1).astype('float32')

X_train /= 255
X_test /= 255
Example #16
0
        epochs=40,
        validation_data=val_generator,
        validation_steps=80,
        callbacks=[
            callbacks.EarlyStopping(monitor="val_loss",
                                    min_delta=1e-7,
                                    patience=5,
                                    restore_best_weights=True),
            callbacks.ModelCheckpoint(filepath=file_save,
                                      monitor="val_loss",
                                      verbose=1,
                                      save_best_only=True),
        ])
    model.save(file_save)


config = ConfigProto()
config.gpu_options.allow_growth = True
backend.set_session(Session(config=config))
base_dir = 'myDataset2'
original_dataset_dir = 'pedro-dataset'
# create_folders(original_dataset_dir,base_dir)
lr_rate = [0.00001, 0.0001, 0.0005, 0.0010, 0.005, 0.01, 0.05, 0.1, 0.5, 1]
train_gen = read_train_dataset(base_dir)
val_gen = read_val_dataset(base_dir)
test_gen = read_test_dataset(base_dir)
for i in lr_rate:
    print(i)
    model = create_conv_net()
    train_conv_net(model, train_gen, val_gen, i)
Example #17
0
        return self.model.evaluate(X, y, verbose=0)[1]


mem_softmaxes = []
mem_accuracies = []

with controllerGraph.as_default():
    controller = Controller()
with open("subpolicy_result", "w"):
    pass

controller_iter = tqdm(range(CONTROLLER_EPOCHS), desc='Controller Epoch: ', position=0, file=sys.stdout, leave=False)
for epoch in controller_iter:
    tf.Graph().as_default()
    session = tf.compat.v1.Session(graph=tf.get_default_graph(), config=config)
    backend.set_session(session)

    child = Child(Xtr.shape[1:])
    attack_func_map = {
        'fgsm' : fgsm(child.model),
        'df' : df(child.model),
        'mim' : mim(child.model),
    }
    with controllerGraph.as_default():
        softmaxes, subpolicies = controller.predict(SUBPOLICIES, argmax=epoch % 10 == 9)


    # mem_softmaxes.append(softmaxes)

    child.fit(subpolicies, Xtr, ytr, log_file=f'runs/{epoch}.json', save_file=f'runs/{epoch}.h5')
    raw_accuracy = child.evaluate(Xts, yts)
Example #18
0
def train(dataset):
    flag_values_dict = FLAGS.flag_values_dict()
    pprint(flag_values_dict, indent=4)
    with open(os.path.join(FLAGS.model, FLAGS_FILENAME), 'w') as f:
        json.dump(flag_values_dict, f, indent=4)

    # FLAGS.weight_ad is parsed to [coverage_min, coverage_max], threshold to apply adaptive weight
    if FLAGS.weight_ad is not None:
        weight_adaptive = [float(x) for x in FLAGS.weight_ad]
    else:
        weight_adaptive = None

    with tf.device('/cpu:0'):
        iter_train, iter_valid = dataset.gen_train_valid(
            n_splits=N_SPLITS,
            idx_kfold=FLAGS.cv,
            batch_size=FLAGS.batch_size,
            adjust=FLAGS.adjust,
            weight_fg=FLAGS.weight_fg,
            weight_bg=FLAGS.weight_bg,
            weight_adaptive=weight_adaptive,
            filter_vert_hori=FLAGS.filter_vert_hori,
            ignore_tiny=FLAGS.ignore_tiny,
            augment_dict=augment_dict(),
            deep_supervised=FLAGS.deep_supervised,
            mask_padding=FLAGS.mask_padding,
            with_depth=FLAGS.with_depth)

    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9,
                                  allow_growth=True)))
    K.set_session(sess)

    if FLAGS.debug:
        debug_img_show(iter_train, iter_valid, sess)

    with tf.device('/gpu:0'):
        if FLAGS.restore is not None:
            path_restore = os.path.join(FLAGS.restore, NAME_MODEL)
            print("Restoring model from {}".format(path_restore))
            model = load_model(path_restore, compile=False)
        elif FLAGS.contrib is not None:
            model = build_model_contrib(IM_HEIGHT,
                                        IM_WIDTH,
                                        IM_CHAN,
                                        encoder=FLAGS.contrib,
                                        residual_unit=FLAGS.residual_unit,
                                        spatial_dropout=FLAGS.spatial_dropout,
                                        preprocess=FLAGS.preprocess,
                                        last_kernel=FLAGS.last_kernel,
                                        last_1x1=FLAGS.last_1x1)
        elif FLAGS.pretrained is not None:
            if not FLAGS.deep_supervised:
                model = build_model_pretrained(
                    IM_HEIGHT,
                    IM_WIDTH,
                    IM_CHAN,
                    encoder=FLAGS.pretrained,
                    spatial_dropout=FLAGS.spatial_dropout,
                    retrain=FLAGS.retrain,
                    preprocess=FLAGS.preprocess,
                    renorm=FLAGS.renorm,
                    last_kernel=FLAGS.last_kernel,
                    last_1x1=FLAGS.last_1x1)
            else:
                model = build_model_pretrained_deep_supervised(
                    IM_HEIGHT,
                    IM_WIDTH,
                    IM_CHAN,
                    encoder=FLAGS.pretrained,
                    spatial_dropout=FLAGS.spatial_dropout,
                    retrain=FLAGS.retrain,
                    preprocess=FLAGS.preprocess,
                    last_kernel=FLAGS.last_kernel,
                    last_1x1=FLAGS.last_1x1)
        elif FLAGS.use_ref2:
            model = build_model_ref2(IM_HEIGHT,
                                     IM_WIDTH,
                                     IM_CHAN,
                                     preprocess=FLAGS.preprocess)
        elif FLAGS.use_ref:
            model = build_model_ref(IM_HEIGHT,
                                    IM_WIDTH,
                                    IM_CHAN,
                                    batch_norm=FLAGS.batch_norm,
                                    drop_out=FLAGS.drop_out,
                                    depth=FLAGS.depth,
                                    start_ch=FLAGS.start_ch)
        else:
            model = build_model(IM_HEIGHT,
                                IM_WIDTH,
                                IM_CHAN,
                                batch_norm=FLAGS.batch_norm,
                                drop_out=FLAGS.drop_out)

        if FLAGS.restore_weight is not None:
            path_weight = os.path.join(FLAGS.restore_weight, NAME_MODEL)
            print("Restoring weights from {}".format(path_weight))
            model.load_weights(path_weight, by_name=True)

        model = compile_model(model,
                              optimizer=FLAGS.opt,
                              loss=FLAGS.loss,
                              weight_decay=FLAGS.weight_decay,
                              exclude_bn=FLAGS.exclude_bn,
                              deep_supervised=FLAGS.deep_supervised)
        write_summary(model, os.path.join(FLAGS.model, MODEL_SUMMARY_FILENAME))
        model.summary()

    path_model = os.path.join(FLAGS.model, NAME_MODEL)

    if not FLAGS.deep_supervised:
        monitor = 'val_weighted_mean_score'
    else:
        monitor = 'val_output_final_weighted_mean_score'
    checkpointer = ModelCheckpoint(path_model,
                                   monitor=monitor,
                                   verbose=1,
                                   save_best_only=FLAGS.save_best_only,
                                   mode='max')
    tensorboarder = MyTensorBoard(FLAGS.log, model=model)
    if not FLAGS.cyclic:
        lrscheduler = LearningRateScheduler(StepDecay(FLAGS.lr, FLAGS.lr_decay,
                                                      FLAGS.epochs_decay,
                                                      FLAGS.freeze_once),
                                            verbose=1)
    else:
        lrscheduler = LearningRateScheduler(CLRDecay(
            FLAGS.lr,
            max_lr=FLAGS.max_lr,
            epoch_size=FLAGS.epoch_size,
            mode=FLAGS.mode_clr,
            freeze_once=FLAGS.freeze_once),
                                            verbose=1)

    callbacks = [checkpointer, tensorboarder, lrscheduler]
    if FLAGS.early_stopping:
        callbacks.append(EarlyStopping(patience=5, verbose=1))
    if FLAGS.reduce_on_plateau:
        lrreducer = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.5,
                                      patience=8,
                                      verbose=1,
                                      mode='min',
                                      epsilon=0.0001,
                                      cooldown=4)
        callbacks.append(lrreducer)

    num_train, num_valid = dataset.len_train_valid(n_splits=N_SPLITS,
                                                   idx_kfold=FLAGS.cv)

    steps_per_epoch = int(num_train / FLAGS.batch_size)
    validation_steps = int(num_valid / FLAGS.batch_size)

    results = model.fit(x=iter_train,
                        validation_data=iter_valid,
                        epochs=FLAGS.epochs,
                        steps_per_epoch=steps_per_epoch,
                        validation_steps=validation_steps,
                        shuffle=True,
                        callbacks=callbacks)
import tensorflow as tf
from tensorflow.keras.backend import clear_session, set_session
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import numpy as np
import copy


app = Flask(__name__)
class_dict = ['Choroidal Neovascularization (CNV)', 'Diabetic Macular Edema (DME)', 'DRUSEN', 'NORMAL']
pred_datagen = ImageDataGenerator(rescale=1./255)
sess = tf.Session()
graph = tf.get_default_graph()
set_session(sess)
model = load_model('./model/model.hdf5')
# model._make_predict_function()
# print(model.predict(np.ones((1,256,256,1))))

def predict(img):
    return model.predict(img)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/predict', methods = ['POST'])
def get_result():    
    global model
    global graph
Example #20
0
    def __init__(self,
                 config,
                 filenames,
                 labels,
                 classes_semantics,
                 num_run=0,
                 resume_model_path=False,
                 resume=False):

        #############################################################################################
        # LIBRARIES
        #############################################################################################
        import os
        import numpy as np
        import tensorflow as tf
        from tensorflow.python import pywrap_tensorflow
        from tensorflow.keras import optimizers, losses, models, backend, layers, metrics
        from tensorflow.keras.utils import multi_gpu_model

        self.run_path = os.path.dirname(os.path.realpath(__file__))
        os.chdir(self.run_path)

        utils = local_module("utils")
        logger = local_module("logger")
        lossnet = local_module("lossnet")
        data_pipeline = local_module("data_pipeline")
        backbones = local_module("backbones")

        #############################################################################################
        # PARAMETERS RUN
        #############################################################################################
        self.config = config
        self.filenames = filenames
        self.labels = labels
        self.num_run = num_run
        self.group = "Stage_" + str(num_run)
        self.name_run = "Train_" + self.group

        self.run_dir = os.path.join(config["PROJECT"]["group_dir"], self.group)
        self.run_dir_check = os.path.join(self.run_dir, 'checkpoints')
        self.checkpoints_path = os.path.join(self.run_dir_check,
                                             'checkpoint.{epoch:03d}.hdf5')
        self.user = get_user()
        self.training_thread = None
        self.resume_training = resume
        self.list_classes = classes_semantics

        #self.num_data_train  = len(labeled_set)
        self.resume_model_path = resume_model_path
        self.transfer_weight_path = self.config['TRAIN'][
            "transfer_weight_path"]
        self.input_shape = [
            self.config["NETWORK"]["INPUT_SIZE"],
            self.config["NETWORK"]["INPUT_SIZE"], 3
        ]

        self.pre = '\033[1;36m' + self.name_run + '\033[0;0m'  #"____" #
        self.problem = '\033[1;31m' + self.name_run + '\033[0;0m'

        # Creating the train folde
        import shutil

        # create base dir and gr
        if os.path.exists(config["PROJECT"]["project_dir"]) is False:
            os.mkdir(config["PROJECT"]["project_dir"])

        if os.path.exists(self.run_dir) and self.resume_model_path is False:
            shutil.rmtree(config["PROJECT"]["group_dir"])
            os.mkdir(config["PROJECT"]["group_dir"])

        if os.path.exists(config["PROJECT"]["group_dir"]) is False:
            os.mkdir(config["PROJECT"]["group_dir"])

        if os.path.exists(self.run_dir) is False:
            os.mkdir(self.run_dir)

        if os.path.exists(self.run_dir_check) is False:
            os.mkdir(self.run_dir_check)

        #############################################################################################
        # SETUP TENSORFLOW SESSION
        #############################################################################################
        # Create a MirroredStrategy.
        #self.strategy = tf.distribute.MirroredStrategy()
        #print(self.pre,'Number of devices: {}'.format(self.strategy.num_replicas_in_sync))

        #with self.strategy.scope():
        if True:
            self.graph = tf.Graph()
            with self.graph.as_default():
                config_tf = tf.ConfigProto(allow_soft_placement=True)
                config_tf.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config_tf, graph=self.graph)
                backend.set_session(self.sess)
                with self.sess.as_default():

                    #############################################################################################
                    # SETUP WANDB
                    #############################################################################################
                    import wandb

                    self.wandb = wandb
                    self.wandb.init(project=config["PROJECT"]["project"],
                                    group=config["PROJECT"]["group"],
                                    name="Train_" + str(num_run),
                                    job_type=self.group,
                                    sync_tensorboard=True,
                                    config=config)

                    #############################################################################################
                    # LOAD DATA
                    #############################################################################################
                    self.DataGen = data_pipeline.ClassificationDataset_AL(
                        config["TRAIN"]["batch_size"],
                        self.filenames,
                        self.labels,
                        self.list_classes,
                        subset="train",
                        original_size=config["DATASET"]["original_size"],
                        data_augmentation=config["DATASET"]
                        ["Data_augementation"],
                        random_flip=config["DATASET"]["random_flip"],
                        pad=config["DATASET"]["pad"],
                        random_crop_pad=config["DATASET"]["random_crop_pad"],
                        random_hue=config["DATASET"]["random_hue"],
                        random_brightness=config["DATASET"]
                        ["random_brightness"],
                        random_saturation=config["DATASET"]
                        ["random_saturation"])

                    self.num_class = len(self.DataGen.list_classes)

                    #############################################################################################
                    # GLOBAL PROGRESS
                    #############################################################################################
                    self.steps_per_epoch = int(
                        np.ceil(self.DataGen.nb_elements /
                                config["TRAIN"]["batch_size"]))
                    self.split_epoch = self.config['TRAIN']["EPOCH_WHOLE"]
                    self.total_epochs = self.config['TRAIN'][
                        "EPOCH_WHOLE"] + self.config['TRAIN']["EPOCH_SLIT"]
                    self.total_steps = self.steps_per_epoch * self.total_epochs

                    #############################################################################################
                    # DEFINE CLASSIFIER
                    #############################################################################################
                    # set input
                    img_input = tf.keras.Input(
                        tensor=self.DataGen.images_tensor, name='input_image')
                    #img_input = tf.keras.Input(self.input_shape,name= 'input_image')

                    include_top = True

                    # Get the selected backbone
                    """
                    ResNet18
                    ResNet50
                    ResNet101
                    ResNet152
                    ResNet50V2
                    ResNet101V2
                    ResNet152V2
                    ResNeXt50
                    ResNeXt101
                    """
                    print(self.pre, "The backbone is: ",
                          self.config["NETWORK"]["Backbone"])
                    self.backbone = getattr(backbones,
                                            self.config["NETWORK"]["Backbone"])
                    #
                    c_pred_features = self.backbone(input_tensor=img_input,
                                                    classes=self.num_class,
                                                    include_top=include_top)
                    self.c_pred_features = c_pred_features
                    if include_top:  # include top classifier
                        # class predictions
                        c_pred = c_pred_features[0]
                    else:
                        x = layers.GlobalAveragePooling2D(name='pool1')(
                            c_pred_features[0])
                        x = layers.Dense(self.num_class, name='fc1')(x)
                        c_pred = layers.Activation('softmax', name='c_pred')(x)
                        c_pred_features[0] = c_pred

                    #self.classifier = models.Model(inputs=[img_input], outputs=c_pred_features,name='Classifier')

                    #############################################################################################
                    # DEFINE FULL MODEL
                    #############################################################################################
                    #c_pred_features_1 = self.classifier(img_input)
                    #c_pred_1 = c_pred_features[0]
                    loss_pred_embeddings = lossnet.Lossnet(
                        c_pred_features,
                        self.config["NETWORK"]["embedding_size"])

                    model_inputs = [img_input]
                    model_outputs = [c_pred] + loss_pred_embeddings

                    self.model = models.Model(
                        inputs=model_inputs,
                        outputs=model_outputs)  #, embedding_s] )

                    ########################################
                    # INIT GLOBAL VARIABLES
                    #######################################
                    self.sess.run(tf.global_variables_initializer())

                    #############################################################################################
                    # LOAD PREVIUS WEIGTHS
                    #############################################################################################
                    if self.resume_model_path:
                        # check the epoch where is loaded
                        try:
                            loaded_epoch = int(
                                self.resume_model_path.split('.')[-2])
                            print(self.pre, "Loading weigths from: ",
                                  self.resume_model_path)
                            print(self.pre, "The detected epoch is: ",
                                  loaded_epoch)
                            # load weigths
                            self.model.load_weights(self.resume_model_path)
                        except:
                            print(self.problem,
                                  "=> Problem loading the weights from ",
                                  self.resume_model_path)
                            print(self.problem, '=> It will rain from scratch')
                    elif self.transfer_weight_path:
                        try:
                            print(
                                self.pre,
                                "(transfer learning) Loading weigths by name from: ",
                                self.transfer_weight_path)
                            # load weigths
                            self.model.load_weights(self.transfer_weight_path,
                                                    by_name=True)
                        except:
                            print(
                                self.problem,
                                "=>(transfer learning) Problem loading the weights from ",
                                self.transfer_weight_path)
                            print(self.problem, '=> It will rain from scratch')

                    if self.resume_training:
                        self.current_epoch = loaded_epoch
                        self.current_step = loaded_epoch * self.steps_per_epoch

                        if self.current_epoch > self.total_epochs:
                            raise ValueError(
                                "The starting epoch is higher that the total epochs"
                            )
                        else:
                            print(self.pre,
                                  "Resuming the training from stage: ",
                                  self.num_run, " at epoch ",
                                  self.current_epoch)
                    else:
                        self.current_epoch = 0
                        self.current_step = 0

                    #############################################################################################
                    # DEFINE WEIGHT DECAY
                    #############################################################################################
                    if self.config['TRAIN']['apply_weight_decay']:
                        utils.add_weight_decay(
                            self.model, self.config['TRAIN']['weight_decay'])

                    #############################################################################################
                    # DEFINE LOSSES
                    #############################################################################################

                    # losses
                    self.loss_dict = {}
                    self.loss_dict[
                        'c_pred'] = losses.sparse_categorical_crossentropy
                    self.loss_dict['l_pred_w'] = lossnet.Loss_Lossnet
                    self.loss_dict['l_pred_s'] = lossnet.Loss_Lossnet
                    # weights
                    self.weight_w = backend.variable(
                        self.config['TRAIN']['weight_lossnet_loss'])
                    self.weight_s = backend.variable(0)

                    self.loss_w_dict = {}
                    self.loss_w_dict['c_pred'] = 1.0
                    self.loss_w_dict['l_pred_w'] = self.weight_w
                    self.loss_w_dict['l_pred_s'] = self.weight_s
                    #self.loss_w_dict['Embedding']  = 0

                    #############################################################################################
                    # DEFINE METRICS
                    #############################################################################################
                    # metrics
                    self.metrics_dict = {}
                    self.metrics_dict[
                        'c_pred'] = tf.keras.metrics.SparseCategoricalAccuracy(
                        )
                    #self.metrics_dict['l_pred_w']   = lossnet.MAE_Lossnet
                    #self.metrics_dict['l_pred_s']   = lossnet.MAE_Lossnet

                    #############################################################################################
                    # DEFINE OPTIMIZER
                    #############################################################################################
                    self.opt = optimizers.Adam(lr=self.config['TRAIN']['lr'])

                    #############################################################################################
                    # DEFINE CALLBACKS
                    #############################################################################################
                    # Checkpoint saver
                    self.callbacks = []
                    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                        filepath=self.checkpoints_path,
                        save_weights_only=True,
                        period=self.config["TRAIN"]["test_each"])

                    self.callbacks.append(model_checkpoint_callback)

                    # Callback to wandb
                    # self.callbacks.append(self.wandb.keras.WandbCallback())

                    # Callback Learning Rate
                    def scheduler(epoch):
                        lr = self.config['TRAIN']['lr']
                        for i in self.config['TRAIN']['MILESTONES']:
                            if epoch > i:
                                lr *= 0.1
                        return lr

                    self.callbacks.append(
                        tf.keras.callbacks.LearningRateScheduler(scheduler))

                    # callback to change the weigths for the split training:
                    self.callbacks.append(
                        lossnet.Change_loss_weights(
                            self.weight_w, self.weight_s, self.split_epoch,
                            self.config['TRAIN']['weight_lossnet_loss']))

                    ##################
                    # SETUP WATCHER
                    ##################
                    self.run_watcher = get_run_watcher()

                    self.run_watcher.add_run.remote(
                        name=self.name_run,
                        user=self.user,
                        progress=0,
                        wandb_url=self.wandb.run.get_url(),
                        status="Idle")

                    # Callback update progress
                    self.Update_progress = logger.Update_progress(
                        self.run_watcher, self.wandb, self.name_run,
                        self.steps_per_epoch, self.total_epochs,
                        self.total_steps, self.current_epoch,
                        self.current_step)

                    self.callbacks.append(self.Update_progress)

                    #############################################################################################
                    # COMPILE MODEL
                    #############################################################################################
                    self.model.compile(
                        loss=self.loss_dict,
                        loss_weights=self.loss_w_dict,
                        metrics=self.metrics_dict,
                        optimizer=self.opt,
                        target_tensors=self.DataGen.labels_tensor)

                    ########################################
                    # INIT LOCAL VARIABLES
                    #######################################
                    self.sess.run(tf.local_variables_initializer())

            print(self.pre, 'Init done')
Example #21
0
def main(mname, model_dir, batch_size, epochs, eval_steps, eps_log_steps):
    global model_dir_hdfs
    if model_dir.startswith('hdfs'):
        model_dir_hdfs = True

    tf.logging.set_verbosity(tf.logging.DEBUG)
    # get TF logger
    log.setLevel(logging.DEBUG)
    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    # create file handler which logs even debug messages
    if model_dir_hdfs is False:
        if os.path.exists(model_dir) is False:
            os.makedirs(model_dir)
        log_dir = model_dir
    else:
        model_dir = os.path.join(
            model_dir, "job_cifar10_" +
            datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
        log_dir = '.'

    # clear old log files
    with open(log_dir + '/tensorflow.log', 'w'):
        pass
    with open(log_dir + '/gpu.csv', 'w'):
        pass
    with open(log_dir + '/cpu.csv', 'w'):
        pass

    fh = logging.FileHandler(log_dir + '/tensorflow.log')

    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    log.addHandler(fh)

    log.info("TF version: %s", tf.__version__)
    log.info("Model directory: %s", model_dir)
    log.info("Batch size: %s", batch_size)
    log.info("Prefetch data all to memory: %s", True)
    log.info("Train epochs: %s", epochs)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
    config.log_device_placement = True  # to log device placement (on which device the operation ran)
    sess = tf.Session(config=config)
    ktf.set_session(
        sess)  # set this TensorFlow session as the default session for Keras

    steps_per_epoch = cifar10_data.train_len() / batch_size
    log.info("Steps per epoch: %s", steps_per_epoch)
    if eval_steps is None:
        eval_steps = steps_per_epoch
    log.info("Evaluating each %i steps", eval_steps)

    if mname == "cnn":
        model = cifar10_model_cnn.cifar_model()
    else:
        model = cifar10_model_resnet.cifar_model()
        global input_name
        input_name = 'input_1'

    model.summary()

    def train_input_fn():
        dataset = tf.data.Dataset.from_generator(
            generator=cifar10_data.generator_train,
            output_types=(tf.float32, tf.float32),
            output_shapes=shapes)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(buffer_size=batch_size)
        # dataset = dataset.repeat(20)
        iterator = dataset.make_one_shot_iterator()
        features_tensors, labels = iterator.get_next()
        features = {input_name: features_tensors}
        return features, labels

    def eval_input_fn():
        dataset = tf.data.Dataset.from_generator(
            generator=cifar10_data.generator_test,
            output_types=(tf.float32, tf.float32),
            output_shapes=shapes)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(buffer_size=batch_size)
        iterator = dataset.make_one_shot_iterator()
        features_tensors, labels = iterator.get_next()
        features = {input_name: features_tensors}
        return features, labels

    my_config = RunConfig(
        save_checkpoints_steps=
        eval_steps  # Save checkpoints every n steps and run the evaluation.
        # keep_checkpoint_max = 5    # Retain the n most recent checkpoints (default 5).
    )
    estimator = tf.keras.estimator.model_to_estimator(model,
                                                      config=my_config,
                                                      model_dir=model_dir)

    examples_sec_hook = ExamplesPerSecondHook(batch_size,
                                              every_n_steps=eps_log_steps)
    # stopping_hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.5)

    train_hooks = [examples_sec_hook]

    train_spec = TrainSpec(input_fn=train_input_fn,
                           hooks=train_hooks,
                           max_steps=cifar10_data.train_len() / batch_size *
                           epochs)
    eval_spec = EvalSpec(input_fn=eval_input_fn,
                         steps=cifar10_data.val_len() / batch_size,
                         throttle_secs=5)  # default 100 steps

    global is_training
    is_training = True
    threading.Thread(target=lambda: collect_stats(log_dir)).start()
    start = time.time()

    train_and_evaluate(estimator, train_spec, eval_spec)

    elapsed = time.time() - start
    is_training = False
    log.info("total time taken (seconds): %s ", elapsed)
    if model_dir_hdfs:
        parse_res = parse.urlsplit(model_dir)
        netloc = parse_res[1]
        path = parse_res[2]
        webhdfs_model_dir = 'http://' + netloc + ':50070/webhdfs/v1' + path
        username = getpass.getuser()
        component_name = estimator.config.task_type + str(
            estimator.config.task_id)
        log.info("Uploading log files for %s as %s to HDFS path: %s",
                 component_name, username, webhdfs_model_dir)
        logging.shutdown()
        os.system('curl -L -i -T tensorflow.log "' + webhdfs_model_dir +
                  '/tensorflow-' + component_name +
                  '.log?op=CREATE&overwrite=false&user.name=' + username + '"')
        os.system('curl -L -i -T cpu.csv "' + webhdfs_model_dir + '/cpu-' +
                  component_name +
                  '.csv?op=CREATE&overwrite=false&user.name=' + username + '"')
        os.system('curl -L -i -T gpu.csv "' + webhdfs_model_dir + '/gpu-' +
                  component_name +
                  '.csv?op=CREATE&overwrite=false&user.name=' + username + '"')
    else:
        log.info("Creating zip archive of job results")
        logging.shutdown()
        shutil.make_archive(model_dir, 'zip', model_dir)
Example #22
0
class Config:

    #
    #   general config
    #
    epoch_display_periods = 10  # epoch display periods
    summaries_dir = "./summaries"  # tensorboard writer target directory
    model_dir = "checkpoints"  # save model in this directory
    save_periods = 100  # save periods
    sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    sess_config.gpu_options.allow_growth = True
    keras_sess = tf.Session(config=sess_config)
    K.set_session(keras_sess)


    #
    #   environment config
    #
    environment_combination_len = 3
    environment_combinations_num = 10

    #
    #   actor config
    #
    lr = 0.001  # learning rate
    gamma = 0.5  # the discount factor in G
    value_scale = 0.5  # the weight of value function approximation in total loss
    reinforce_batch_size = 100  # batch size used in Reinforce algorithm
    gradient_clip = 40  # graient clip, avoid too large gradient

    #
    #   encoder config
    #
    encoder_dim = 64

    #
    #   reinforce config
    reinforce_logdir = "./summaries/reinforce_logdir"
    reinforce_learning_rate = 0.001


    #
    #   evaluator configs
    #
    evaluator_model_name = "lr"  #  'pin', 'lr'
    evaluator_optimizer_name = 'adam'
    evaluator_learning_rate = 0.03
    evaluator_epsilon = 1e-4
    evaluator_max_rounds = 2000
    evaluator_early_stop = 8
    evaluator_embedding_size = 20
    evaluator_log_step_frequency = 0
    evaluator_eval_round_frequency = 1
    evaluator_train_logdir = "./summaries/evaluator_train"
    evaluator_valid_logdir = "./summaries/evaluator_valid"
    evaluator_graph_logdir = "./summaries/evaluator_graph"


    #
    #   dataset
    #
    data_name = "Couple"
    dataset = as_dataset(data_name, True)
    dataset.load_data(gen_type='train')
    dataset.load_data(gen_type='test')
    dataset.summary()
    num_fields = dataset.num_fields
    feat_sizes = dataset.feat_sizes
    feat_min = dataset.feat_min
    target_combination_num = 30
    target_combination_len = 4
 def save_model(self, filename):
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.save(filename)
Example #24
0
def train_model():

    #to use GPU on colab
    warnings.filterwarnings('ignore')
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    config = tf.ConfigProto()
    # dynamically grow the memory used on the GPU
    config.gpu_options.allow_growth = True
    # to log device placement (on which device the operation ran)
    config.log_device_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    # (nothing gets printed in Jupyter, only if you run it standalone)
    sess = tf.Session(config=config)
    # set this TensorFlow session as the default session for Keras
    set_session(sess)

    batch_size = 32
    model = create_multi_task_model_keras(VOCAB_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, LOSS_LIST, METRICS)

    model.summary()

    tensorboard = K.callbacks.TensorBoard("logging/keras_model", histogram_freq=50)

    print("\nStarting training...")

    checkpoint = K.callbacks.ModelCheckpoint(resources_path_model, monitor='val_loss', verbose=1, save_best_only=True,
                                             mode='min')
    cbk = [checkpoint, tensorboard]

    history = model.fit(x_train, [np.expand_dims(y_train_babelnet, axis=2), np.expand_dims(y_train_domains, axis=2),
                                  np.expand_dims(y_train_lexnames, axis=2)],
                        epochs=EPOCHS, shuffle=True, batch_size=batch_size, validation_data=(x_dev, [
            np.expand_dims(y_dev_babelnet, axis=2), np.expand_dims(y_dev_domains, axis=2),
            np.expand_dims(y_dev_lexnames, axis=2)]), callbacks=cbk)

    # list all data in history
    print(history.history.keys())

    # summarize history for accuracy babelnet
    plt.plot(history.history['babelnet_acc'])
    plt.plot(history.history['val_babelnet_acc'])
    plt.title('model accuracy babelnet')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss babelnet
    plt.plot(history.history['babelnet_loss'])
    plt.plot(history.history['val_babelnet_loss'])
    plt.title('model loss babelnet')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for accuracy domains
    plt.plot(history.history['domains_acc'])
    plt.plot(history.history['val_domains_acc'])
    plt.title('model accuracy domains')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss domains
    plt.plot(history.history['domains_loss'])
    plt.plot(history.history['val_domains_loss'])
    plt.title('model loss domains')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for accuracy lexnames
    plt.plot(history.history['lexnames_acc'])
    plt.plot(history.history['val_lexnames_acc'])
    plt.title('model accuracy lexnames')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss lexnames
    plt.plot(history.history['lexnames_loss'])
    plt.plot(history.history['val_lexnames_loss'])
    plt.title('model loss lexnames')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # save model and wheights of model (json)
    model_json = model.to_json()
    with open(resources_path_model, "w") as json_file:
        json_file.write(model_json)

        print("Saved model to file (format json): " + resources_path_model)

    # serialize weights to HDF5
    model.save_weights(resources_path_weights)
    print("Saved weights of model to file : " + resources_path_weights)
Example #25
0
"""

import tensorflow as tf
from tensorflow.keras import backend as K

from futils import segmentor as v_seg
from futils.compute_distance_metrics_and_save import write_all_metrics
from futils.find_connect_parts import write_connected_lobes
from futils.generate_fissure_from_masks import gntFissure
from futils.mypath import Mypath
from futils.write_batch_preds import write_preds_to_disk

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
sess = tf.Session(config=config)
K.set_session(
    sess)  # set this TensorFlow session as the default session for Keras
'''
'1585000573.7211952_0.00011a_o_0ds2dr1bn1fs16ptsz144ptzsz64',
             '1584924602.9965076_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64',
             '1584925363.1298258_0.00010a_o_0ds2dr1bn1fs16ptsz96ptzsz64'
             1587041504.5222292_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64
             1587846165.2829812_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64
             1587858645.924413_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96
             1587858294.826981_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96
             1587857822.602289_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96
             1587852304.1056986_0.00010a_o_0ds2dr1bn1fs4ptsz144ptzsz96
             1587852304.1056986_0.00010a_o_0ds2dr1bn1fs4ptsz144ptzsz96
             1587848974.2342067_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96
             1587848927.819794_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz96
             1587846165.2829812_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64
             
Example #26
0
def test_run(model_path, rnn_type="simple", abbr_test_mods="U", device="0"):
    ### Set tensorflow session
    tf.reset_default_graph()
    os.environ["CUDA_VISIBLE_DEVICES"] = device
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    ### Save path to the prediction result
    model_info = get_model_info(model_path)
    model_root = os.path.split(model_path)[0]
    test_root = os.path.join(model_root, "test", std_mods(abbr_test_mods))
    if not os.path.exists(test_root):
        os.makedirs(test_root)
    pred_path = os.path.join(test_root, "predict.npy")

    ### Get the test data generator
    feature_root = os.path.join("data")
    split_root = os.path.join(feature_root, "split", str(model_info["split"]))
    target_root = os.path.join(feature_root,
                               "len_{}".format(model_info["length"]))

    ### Get the model for prediction
    if model_info["encodertype"] == "user":
        train_mods = ["user"]
        mod_pos_dict = {"user": 0}
        uemb_path = os.path.join(feature_root, "user_emb.npy")
        test_mods = train_mods
        train_shapes = [[1], [3]] + [[model_info["length"], 1]]
        test_model = build_test_model(model_path, train_shapes, test_mods,
                                      rnn_type, mod_pos_dict, train_mods)
        test_gen = get_testgen(feature_root,
                               target_root,
                               split_root,
                               test_mods,
                               phase="test")

        ### Evaluation
        preds, truth = user_predict(test_model, test_gen, pred_path)
        ### User embeddings output
        uemb_gen = get_testgen(feature_root,
                               target_root,
                               split_root,
                               test_mods,
                               phase="all")
        uemb_output(test_model, uemb_gen, uemb_path)

    else:
        train_mods = ["resnet50", "audiovgg", "fudannlp"]
        mod_pos_dict = {
            mod: train_mods.index(mod)
            for mod in mod_rep_dict.keys()
        }
        test_mods = rep2mods(ord_rep(abbr_test_mods))
        train_shapes = [[2, 8]] + [[mod_shape_dict[mod]] for mod in train_mods
                                   ] + [[model_info["length"], 1]]
        test_model = build_test_model(model_path, train_shapes, test_mods,
                                      rnn_type, mod_pos_dict, train_mods)
        test_gen = get_testgen(feature_root,
                               target_root,
                               split_root,
                               test_mods,
                               phase="test")
        preds, truth = predict(test_model, test_gen, pred_path)

    ### Evaluate model with numerous indexes
    eval_path = os.path.join(test_root, "eval.txt")
    nmse, corr, srcc = evaluate(preds, truth, eval_path)

    K.clear_session()
    return nmse, corr, srcc
Example #27
0
def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    K.set_session(get_session())

    # create the generators
    train_generator, validation_generator = create_generators(args)

    num_classes = train_generator.num_classes()
    model, prediction_model = efficientdet(args.phi,
                                           num_classes=num_classes,
                                           weighted_bifpn=args.weighted_bifpn)

    # load pretrained weights
    if args.snapshot:
        if args.snapshot == 'imagenet':
            model_name = 'efficientnet-b{}'.format(args.phi)
            file_name = '{}_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'.format(
                model_name)
            file_hash = WEIGHTS_HASHES[model_name][1]
            weights_path = keras.utils.get_file(file_name,
                                                BASE_WEIGHTS_PATH + file_name,
                                                cache_subdir='models',
                                                file_hash=file_hash)
            model.load_weights(weights_path, by_name=True)
        else:
            print('Loading model, this may take a second...')
            model.load_weights(args.snapshot, by_name=True)

    # freeze backbone layers
    if args.freeze_backbone:
        # 227, 329, 329, 374, 464, 566, 656
        for i in range(1, [227, 329, 329, 374, 464, 566, 656][args.phi]):
            model.layers[i].trainable = False

    # compile model
    model.compile(
        optimizer=Adam(lr=1e-3),
        loss={
            'regression': smooth_l1(),
            'classification': focal()
        },
    )

    # print(model.summary())

    # create the callbacks
    callbacks = create_callbacks(
        model,
        prediction_model,
        validation_generator,
        args,
    )

    if not args.compute_val_loss:
        validation_generator = None

    # start training
    return model.fit_generator(generator=train_generator,
                               steps_per_epoch=args.steps,
                               initial_epoch=0,
                               epochs=args.epochs,
                               verbose=1,
                               callbacks=callbacks,
                               workers=args.workers,
                               use_multiprocessing=args.multiprocessing,
                               max_queue_size=args.max_queue_size,
                               validation_data=validation_generator)
Example #28
0
def main(args):
    # Horovod: initialize Horovod.
    hvd.init()

    if not args.use_only_cpu:
        # Horovod: pin GPU to be used to process local rank (one GPU per process)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = str(hvd.local_rank())
    else:
        config = None

    K.set_session(tf.Session(config=config))

    batch_size = 128
    num_classes = 10

    # Horovod: adjust number of epochs based on number of GPUs.
    epochs = int(math.ceil(args.num_epochs / hvd.size()))

    # Input image dimensions
    img_rows, img_cols = 28, 28

    # The data, shuffled and split between train and test sets
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    if K.image_data_format() == "channels_first":
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")
    x_train /= 255
    x_test /= 255
    print("x_train shape:", x_train.shape)
    print(x_train.shape[0], "train samples")
    print(x_test.shape[0], "test samples")

    # Convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    model = Sequential()
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               activation="relu",
               input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))

    # Horovod: adjust learning rate based on number of GPUs.
    opt = keras.optimizers.Adadelta(1.0 * hvd.size())

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)

    ##### Enabling SageMaker Debugger ###########
    # Create hook from the configuration provided through sagemaker python sdk
    smd_hook = smd.KerasHook.create_from_json_file()

    ##### Enabling SageMaker Debugger ###########
    # wrap the optimizer so the hook can identify the gradients
    opt = smd_hook.wrap_optimizer(opt)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt,
                  metrics=["accuracy"])

    callbacks = [
        # Horovod: broadcast initial variable states from rank 0 to all other processes.
        # This is necessary to ensure consistent initialization of all workers when
        # training is started with random weights or restored from a checkpoint.
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        ##### Enabling SageMaker Debugger ###########
        # pass smd_hook as a callback
        smd_hook,
    ]

    # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
    if hvd.rank() == 0:
        callbacks.append(
            keras.callbacks.ModelCheckpoint(
                os.path.join(args.model_dir, "checkpoint-{epoch}.h5")))

    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        callbacks=callbacks,
        epochs=epochs,
        verbose=1 if hvd.rank() == 0 else 0,
        validation_data=(x_test, y_test),
    )
    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])
    def train_fn(model_bytes):
        # Make sure pyarrow is referenced before anything else to avoid segfault due to conflict
        # with TensorFlow libraries.  Use `pa` package reference to ensure it's loaded before
        # functions like `deserialize_model` which are implemented at the top level.
        # See https://jira.apache.org/jira/browse/ARROW-3346
        pa

        import atexit
        import horovod.tensorflow.keras as hvd
        from horovod.spark.task import get_available_devices
        import os
        from petastorm import make_batch_reader
        from petastorm.tf_utils import make_petastorm_dataset
        import tempfile
        import tensorflow as tf
        import tensorflow.keras.backend as K
        import shutil

        # Horovod: initialize Horovod inside the trainer.
        hvd.init()

        # Horovod: pin GPU to be used to process local rank (one GPU per process), if GPUs are available.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = get_available_devices()[0]
        K.set_session(tf.Session(config=config))

        # Horovod: restore from checkpoint, use hvd.load_model under the hood.
        model = deserialize_model(model_bytes, hvd.load_model)

        # Horovod: adjust learning rate based on number of processes.
        K.set_value(model.optimizer.lr, K.get_value(model.optimizer.lr) * hvd.size())

        # Horovod: print summary logs on the first worker.
        verbose = 2 if hvd.rank() == 0 else 0

        callbacks = [
            # Horovod: broadcast initial variable states from rank 0 to all other processes.
            # This is necessary to ensure consistent initialization of all workers when
            # training is started with random weights or restored from a checkpoint.
            hvd.callbacks.BroadcastGlobalVariablesCallback(root_rank=0),

            # Horovod: average metrics among workers at the end of every epoch.
            #
            # Note: This callback must be in the list before the ReduceLROnPlateau,
            # TensorBoard, or other metrics-based callbacks.
            hvd.callbacks.MetricAverageCallback(),

            # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
            # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
            # the first five epochs. See https://arxiv.org/abs/1706.02677 for details.
            hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=verbose),

            # Reduce LR if the metric is not improved for 10 epochs, and stop training
            # if it has not improved for 20 epochs.
            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_exp_rmspe', patience=10, verbose=verbose),
            tf.keras.callbacks.EarlyStopping(monitor='val_exp_rmspe', mode='min', patience=20, verbose=verbose),
            tf.keras.callbacks.TerminateOnNaN()
        ]

        # Model checkpoint location.
        ckpt_dir = tempfile.mkdtemp()
        ckpt_file = os.path.join(ckpt_dir, 'checkpoint.h5')
        atexit.register(lambda: shutil.rmtree(ckpt_dir))

        # Horovod: save checkpoints only on the first worker to prevent other workers from corrupting them.
        if hvd.rank() == 0:
            callbacks.append(tf.keras.callbacks.ModelCheckpoint(ckpt_file, monitor='val_exp_rmspe', mode='min',
                                                                save_best_only=True))

        # Make Petastorm readers.
        with make_batch_reader('%s/train_df.parquet' % args.data_dir, num_epochs=None,
                               cur_shard=hvd.rank(), shard_count=hvd.size(),
                               hdfs_driver=PETASTORM_HDFS_DRIVER) as train_reader:
            with make_batch_reader('%s/val_df.parquet' % args.data_dir, num_epochs=None,
                                   cur_shard=hvd.rank(), shard_count=hvd.size(),
                                   hdfs_driver=PETASTORM_HDFS_DRIVER) as val_reader:
                # Convert readers to tf.data.Dataset.
                train_ds = make_petastorm_dataset(train_reader) \
                    .apply(tf.data.experimental.unbatch()) \
                    .shuffle(int(train_rows / hvd.size())) \
                    .batch(args.batch_size) \
                    .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales)))

                val_ds = make_petastorm_dataset(val_reader) \
                    .apply(tf.data.experimental.unbatch()) \
                    .batch(args.batch_size) \
                    .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales)))

                history = model.fit(train_ds,
                                    validation_data=val_ds,
                                    steps_per_epoch=int(train_rows / args.batch_size / hvd.size()),
                                    validation_steps=int(val_rows / args.batch_size / hvd.size()),
                                    callbacks=callbacks,
                                    verbose=verbose,
                                    epochs=args.epochs)

        # Dataset API usage currently displays a wall of errors upon termination.
        # This global model registration ensures clean termination.
        # Tracked in https://github.com/tensorflow/tensorflow/issues/24570
        globals()['_DATASET_FINALIZATION_HACK'] = model

        if hvd.rank() == 0:
            with open(ckpt_file, 'rb') as f:
                return history.history, f.read()
    def _evaluate(
        self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool]
    ) -> dict:
        """
        Evaluate a config file for classification robustness against attack.

        Note: num_eval_batches shouldn't be set for poisoning scenario and will raise an
        error if it is
        """
        if config["sysconfig"].get("use_gpu"):
            os.environ["TF_CUDNN_DETERMINISM"] = "1"
        if num_eval_batches:
            raise ValueError("num_eval_batches shouldn't be set for poisoning scenario")
        if skip_benign:
            raise ValueError("skip_benign shouldn't be set for poisoning scenario")

        model_config = config["model"]
        # Scenario assumes preprocessing_fn makes images all same size
        classifier, preprocessing_fn = load_model(model_config)

        config_adhoc = config.get("adhoc") or {}
        train_epochs = config_adhoc["train_epochs"]
        src_class = config_adhoc["source_class"]
        tgt_class = config_adhoc["target_class"]
        fit_batch_size = config_adhoc.get(
            "fit_batch_size", config["dataset"]["batch_size"]
        )

        if not config["sysconfig"].get("use_gpu"):
            conf = ConfigProto(intra_op_parallelism_threads=1)
            set_session(Session(config=conf))

        # Set random seed due to large variance in attack and defense success
        np.random.seed(config_adhoc["split_id"])
        set_random_seed(config_adhoc["split_id"])
        random.seed(config_adhoc["split_id"])
        use_poison_filtering_defense = config_adhoc.get(
            "use_poison_filtering_defense", True
        )
        if self.check_run:
            # filtering defense requires more than a single batch to run properly
            use_poison_filtering_defense = False

        logger.info(f"Loading dataset {config['dataset']['name']}...")

        clean_data = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="train",
            preprocessing_fn=preprocessing_fn,
            shuffle_files=False,
        )

        attack_config = config["attack"]
        attack_type = attack_config.get("type")

        fraction_poisoned = config["adhoc"]["fraction_poisoned"]
        # Flag for whether to poison dataset -- used to evaluate
        #     performance of defense on clean data
        poison_dataset_flag = config["adhoc"]["poison_dataset"]
        # detect_poison does not currently support data generators
        #     therefore, make in memory dataset
        x_train_all, y_train_all = [], []

        if attack_type == "preloaded":
            # Number of datapoints in train split of target clasc
            num_images_tgt_class = config_adhoc["num_images_target_class"]
            logger.info(
                f"Loading poison dataset {config_adhoc['poison_samples']['name']}..."
            )
            num_poisoned = int(config_adhoc["fraction_poisoned"] * num_images_tgt_class)
            if num_poisoned == 0:
                raise ValueError(
                    "For the preloaded attack, fraction_poisoned must be set so that at least on data point is poisoned."
                )
            # Set batch size to number of poisons -- read only one batch of preloaded poisons
            config_adhoc["poison_samples"]["batch_size"] = num_poisoned
            poison_data = load_dataset(
                config["adhoc"]["poison_samples"],
                epochs=1,
                split_type="poison",
                preprocessing_fn=None,
            )

            logger.info(
                "Building in-memory dataset for poisoning detection and training"
            )
            for x_clean, y_clean in clean_data:
                x_train_all.append(x_clean)
                y_train_all.append(y_clean)
            x_poison, y_poison = poison_data.get_batch()
            x_poison = np.array([xp for xp in x_poison], dtype=np.float32)
            x_train_all.append(x_poison)
            y_train_all.append(y_poison)
            x_train_all = np.concatenate(x_train_all, axis=0)
            y_train_all = np.concatenate(y_train_all, axis=0)
        else:
            attack = load(attack_config)
            logger.info(
                "Building in-memory dataset for poisoning detection and training"
            )
            for x_train, y_train in clean_data:
                x_train_all.append(x_train)
                y_train_all.append(y_train)
            x_train_all = np.concatenate(x_train_all, axis=0)
            y_train_all = np.concatenate(y_train_all, axis=0)
            if poison_dataset_flag:
                total_count = np.bincount(y_train_all)[src_class]
                poison_count = int(fraction_poisoned * total_count)
                if poison_count == 0:
                    logger.warning(
                        f"No poisons generated with fraction_poisoned {fraction_poisoned} for class {src_class}."
                    )
                src_indices = np.where(y_train_all == src_class)[0]
                poisoned_indices = np.sort(
                    np.random.choice(src_indices, size=poison_count, replace=False)
                )
                x_train_all, y_train_all = poison_dataset(
                    x_train_all,
                    y_train_all,
                    src_class,
                    tgt_class,
                    y_train_all.shape[0],
                    attack,
                    poisoned_indices,
                )

        y_train_all_categorical = to_categorical(y_train_all)

        # Flag to determine whether defense_classifier is trained directly
        #     (default API) or is trained as part of detect_poisons method
        fit_defense_classifier_outside_defense = config_adhoc.get(
            "fit_defense_classifier_outside_defense", True
        )
        # Flag to determine whether defense_classifier uses sparse
        #     or categorical labels
        defense_categorical_labels = config_adhoc.get(
            "defense_categorical_labels", True
        )
        if use_poison_filtering_defense:
            if defense_categorical_labels:
                y_train_defense = y_train_all_categorical
            else:
                y_train_defense = y_train_all

            defense_config = config["defense"]
            detection_kwargs = config_adhoc.get("detection_kwargs", dict())

            defense_model_config = config_adhoc.get("defense_model", model_config)
            defense_train_epochs = config_adhoc.get(
                "defense_train_epochs", train_epochs
            )

            # Assumes classifier_for_defense and classifier use same preprocessing function
            classifier_for_defense, _ = load_model(defense_model_config)
            logger.info(
                f"Fitting model {defense_model_config['module']}.{defense_model_config['name']} "
                f"for defense {defense_config['name']}..."
            )
            if fit_defense_classifier_outside_defense:
                classifier_for_defense.fit(
                    x_train_all,
                    y_train_defense,
                    batch_size=fit_batch_size,
                    nb_epochs=defense_train_epochs,
                    verbose=False,
                    shuffle=True,
                )
            defense_fn = load_fn(defense_config)
            defense = defense_fn(classifier_for_defense, x_train_all, y_train_defense)

            _, is_clean = defense.detect_poison(**detection_kwargs)
            is_clean = np.array(is_clean)
            logger.info(f"Total clean data points: {np.sum(is_clean)}")

            logger.info("Filtering out detected poisoned samples")
            indices_to_keep = is_clean == 1
            x_train_final = x_train_all[indices_to_keep]
            y_train_final = y_train_all_categorical[indices_to_keep]
        else:
            logger.info(
                "Defense does not require filtering. Model fitting will use all data."
            )
            x_train_final = x_train_all
            y_train_final = y_train_all_categorical
        if len(x_train_final):
            logger.info(
                f"Fitting model of {model_config['module']}.{model_config['name']}..."
            )
            classifier.fit(
                x_train_final,
                y_train_final,
                batch_size=fit_batch_size,
                nb_epochs=train_epochs,
                verbose=False,
                shuffle=True,
            )
        else:
            logger.warning("All data points filtered by defense. Skipping training")

        logger.info("Validating on clean test data")
        test_data = load_dataset(
            config["dataset"],
            epochs=1,
            split_type="test",
            preprocessing_fn=preprocessing_fn,
            shuffle_files=False,
        )
        benign_validation_metric = metrics.MetricList("categorical_accuracy")
        target_class_benign_metric = metrics.MetricList("categorical_accuracy")
        for x, y in tqdm(test_data, desc="Testing"):
            # Ensure that input sample isn't overwritten by classifier
            x.flags.writeable = False
            y_pred = classifier.predict(x)
            benign_validation_metric.append(y, y_pred)
            y_pred_tgt_class = y_pred[y == src_class]
            if len(y_pred_tgt_class):
                target_class_benign_metric.append(
                    [src_class] * len(y_pred_tgt_class), y_pred_tgt_class
                )
        logger.info(
            f"Unpoisoned validation accuracy: {benign_validation_metric.mean():.2%}"
        )
        logger.info(
            f"Unpoisoned validation accuracy on targeted class: {target_class_benign_metric.mean():.2%}"
        )
        results = {
            "benign_validation_accuracy": benign_validation_metric.mean(),
            "benign_validation_accuracy_targeted_class": target_class_benign_metric.mean(),
        }

        poisoned_test_metric = metrics.MetricList("categorical_accuracy")
        poisoned_targeted_test_metric = metrics.MetricList("categorical_accuracy")

        logger.info("Testing on poisoned test data")
        if attack_type == "preloaded":
            test_data_poison = load_dataset(
                config_adhoc["poison_samples"],
                epochs=1,
                split_type="poison_test",
                preprocessing_fn=None,
            )
            for x_poison_test, y_poison_test in tqdm(
                test_data_poison, desc="Testing poison"
            ):
                x_poison_test = np.array([xp for xp in x_poison_test], dtype=np.float32)
                y_pred = classifier.predict(x_poison_test)
                y_true = [src_class] * len(y_pred)
                poisoned_targeted_test_metric.append(y_poison_test, y_pred)
                poisoned_test_metric.append(y_true, y_pred)
            test_data_clean = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                shuffle_files=False,
            )
            for x_clean_test, y_clean_test in tqdm(
                test_data_clean, desc="Testing clean"
            ):
                x_clean_test = np.array([xp for xp in x_clean_test], dtype=np.float32)
                y_pred = classifier.predict(x_clean_test)
                poisoned_test_metric.append(y_clean_test, y_pred)

        elif poison_dataset_flag:
            logger.info("Testing on poisoned test data")
            test_data = load_dataset(
                config["dataset"],
                epochs=1,
                split_type="test",
                preprocessing_fn=preprocessing_fn,
                shuffle_files=False,
            )
            for x_test, y_test in tqdm(test_data, desc="Testing"):
                src_indices = np.where(y_test == src_class)[0]
                poisoned_indices = src_indices  # Poison entire class
                x_test, _ = poison_dataset(
                    x_test,
                    y_test,
                    src_class,
                    tgt_class,
                    len(y_test),
                    attack,
                    poisoned_indices,
                )
                y_pred = classifier.predict(x_test)
                poisoned_test_metric.append(y_test, y_pred)

                y_pred_targeted = y_pred[y_test == src_class]
                if not len(y_pred_targeted):
                    continue
                poisoned_targeted_test_metric.append(
                    [tgt_class] * len(y_pred_targeted), y_pred_targeted
                )

        if poison_dataset_flag or attack_type == "preloaded":
            results["poisoned_test_accuracy"] = poisoned_test_metric.mean()
            results[
                "poisoned_targeted_misclassification_accuracy"
            ] = poisoned_targeted_test_metric.mean()
            logger.info(f"Test accuracy: {poisoned_test_metric.mean():.2%}")
            logger.info(
                f"Test targeted misclassification accuracy: {poisoned_targeted_test_metric.mean():.2%}"
            )

        return results
    # Data, model, and output directories. These are required.
    parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
    parser.add_argument('--model_dir', type=str)
    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
    parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])

    args, _ = parser.parse_known_args()

    # Horovod: initialize Horovod.
    hvd.init()

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())
    K.set_session(tf.Session(config=config))

    batch_size = 128
    num_classes = 10

    # Horovod: adjust number of epochs based on number of GPUs.
    epochs = int(math.ceil(12.0 / hvd.size()))

    # Input image dimensions
    img_rows, img_cols = 28, 28

    # The data, shuffled and split between train and test sets

    x_train = np.load(os.path.join(args.train, 'train.npz'))['data']
    y_train = np.load(os.path.join(args.train, 'train.npz'))['labels']
    print("Train dataset loaded from: {}".format(os.path.join(args.train, 'train.npz')))
Example #32
0
 def _predict(self, x):
     with graph.as_default():
         set_session(sess)
         predict_result = self.model.predict(x)
         return predict_result