def run(base_path, gpu_mon, num_GPUs, continue_training, force_GPU, just_one, no_val, no_images, debug, wait_for, logger, train_images_per_epoch, val_images_per_epoch, **kwargs): from MultiPlanarUNet.train import Trainer, YAMLHParams from MultiPlanarUNet.models import model_initializer from MultiPlanarUNet.preprocessing import get_preprocessing_func # Read in hyperparameters from YAML file hparams = YAMLHParams(base_path + "/train_hparams.yaml", logger=logger) validate_hparams(hparams) # Wait for PID? if wait_for: from MultiPlanarUNet.utils import await_PIDs await_PIDs(wait_for) # Prepare Sequence generators and potential model specific hparam changes f = get_preprocessing_func(hparams["build"].get("model_class_name")) train, val, hparams = f(hparams, logger=logger, just_one=just_one, no_val=no_val, continue_training=continue_training, base_path=base_path) if gpu_mon: # Wait for free GPU if not force_GPU: gpu_mon.await_and_set_free_GPU(N=num_GPUs, sleep_seconds=120) else: gpu_mon.set_GPUs = force_GPU num_GPUs = len(force_GPU.split(",")) gpu_mon.stop() # Build new model (or continue training an existing one) org_model = model_initializer(hparams, continue_training, base_path, logger) # Initialize weights in final layer? if not continue_training and hparams["build"].get("biased_output_layer"): from MultiPlanarUNet.utils.utils import set_bias_weights_on_all_outputs set_bias_weights_on_all_outputs(org_model, train, hparams, logger) # Multi-GPU? if num_GPUs > 1: from tensorflow.keras.utils import multi_gpu_model model = multi_gpu_model(org_model, gpus=num_GPUs, cpu_merge=False, cpu_relocation=False) logger("Creating multi-GPU model: N=%i" % num_GPUs) else: model = org_model # Init trainer trainer = Trainer(model, logger=logger) trainer.org_model = org_model # Compile model trainer.compile_model(n_classes=hparams["build"].get("n_classes"), **hparams["fit"]) # Debug mode? if debug: from tensorflow.python import debug as tfdbg from tensorflow.keras import backend as k k.set_session(tfdbg.LocalCLIDebugWrapperSession(k.get_session())) # Fit the model _ = trainer.fit(train=train, val=val, train_im_per_epoch=train_images_per_epoch, val_im_per_epoch=val_images_per_epoch, hparams=hparams, no_im=no_images, **hparams["fit"]) # Save final model weights (usually not used, but maybe....?) if not os.path.exists("%s/model" % base_path): os.mkdir("%s/model" % base_path) model_path = "%s/model/model_weights.h5" % base_path logger("Saving current model to: %s" % model_path) org_model.save_weights(model_path) # Plot learning curves from MultiPlanarUNet.utils.plotting import plot_training_curves try: plot_training_curves(os.path.join(base_path, "logs", "training.csv"), os.path.join(base_path, "logs", "learning_curve.png"), logy=True) except Exception as e: logger("Could not plot learning curves due to error:") logger(e)
import numpy as np import matplotlib.pyplot as plt from tensorflow import keras from tensorflow.keras.models import load_model import tensorflow.keras.backend as K import os os.environ['CUDA_VISIBLE_DEVICES'] = '-1' def weighted_mse(y_true, y_pred): return K.mean(K.square((y_pred - y_true)), axis=-1) sess = tf.Session() K.set_session(sess) nonlin_idx_all = [] np.random.seed(11) rg1 = np.array([0.02, 1.5, 0.2, 0.5, 1 / 3, 25, 2]) rg2 = np.array([0.03, 3, 0.5, 1, 2 / 3, 3000, 6]) rs = (rg2 - rg1)[np.newaxis, :] #rg2[5] = 3000 num = 1000000 para_pre = np.random.rand(num, 7) para_cand = rg1 + para_pre * (rg2 - rg1) name = 'DNN_800_200_sigmoid_Adam_Tsample20000_Epoch20000_X7d_Y2d__20191129-032138_trn_ 0.0491_tst_ 0.0850'
bio = io.BytesIO() with h5py.File(bio) as f: model.save(f) return bio.getvalue() def deserialize_model(model_bytes, load_model_fn): """Deserialize model from byte array.""" bio = io.BytesIO(model_bytes) with h5py.File(bio) as f: return load_model_fn(f, custom_objects=CUSTOM_OBJECTS) # Do not use GPU for the session creation. config = tf.ConfigProto(device_count={'GPU': 0}) K.set_session(tf.Session(config=config)) # Build the model. inputs = {col: Input(shape=(1,), name=col) for col in all_cols} embeddings = [Embedding(len(vocab[col]), 10, input_length=1, name='emb_' + col)(inputs[col]) for col in categorical_cols] continuous_bn = Concatenate()([Reshape((1, 1), name='reshape_' + col)(inputs[col]) for col in continuous_cols]) continuous_bn = BatchNormalization()(continuous_bn) x = Concatenate()(embeddings + [continuous_bn]) x = Flatten()(x) x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x) x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x) x = Dense(1000, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x) x = Dense(500, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00005))(x) x = Dropout(0.5)(x)
from tensorflow.keras.utils import plot_model from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping from tensorflow.keras import backend as K from math import ceil import argparse from nn_utils.load_data import load_data, load_images_and_maps from nn_utils.utils import listdir_fullpath, get_model_memory_usage import cv2 config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) K.set_session(session) parser = argparse.ArgumentParser(fromfile_prefix_chars='@', description='train neural network') parser.add_argument('--images', help='folder with input images', required=True) parser.add_argument('--maps', help='folder with saliency maps', required=True) parser.add_argument('--loss', help='loss function', required=True) parser.add_argument('--optimizer', help='optimizer', required=True) parser.add_argument('--conv_layers', help='number of conv layers', type=int, default=3) parser.add_argument('--batch_size', help='batch size', type=int, default=10) parser.add_argument('--epochs', help='number of epochs', type=int, default=500) parser.add_argument('--samples', help='number of samples',
def mutate(self): with self.graph.as_default(): K.set_session(self.sess) super().mutate()
import argparse import os.path from data import DataSet from processor import process_image from tensorflow.keras.models import load_model import tensorflow as tf import tensorflow.keras.backend as KTF config = tf.ConfigProto() config.gpu_options.allow_growth = True #dynamic alloc GPU resource config.gpu_options.per_process_gpu_memory_fraction = 0.3 #GPU memory threshold 0.3 session = tf.Session(config=config) # set session KTF.set_session(session) def validate_cnn_model(model_file): data = DataSet() model = load_model(model_file) # Get all our test images. images = glob.glob(os.path.join('data', 'test_full', '**', '*.jpg')) # Count the correct predict result_count = 0 for image in images: print('-' * 80) # Get a random row.
def initialize_vars(sess): sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) K.set_session(sess)
def _init_session(): from tensorflow.python.keras import backend sess = backend.get_session() tf.get_default_graph() set_session(sess) return sess
def run_ddqn(args): ''' run a DDQN training session, or test it's result, with the donkey simulator ''' # only needed if TF==1.13.1 config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) # Construct gym environment. Starts the simulator if path is given. env = gym.make(args.env_name, exe_path=args.sim, port=args.port) # not working on windows... def signal_handler(signal, frame): print("catching ctrl+c") env.unwrapped.close() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGABRT, signal_handler) # Get size of state and action from environment state_size = (img_rows, img_cols, img_channels) action_space = env.action_space # Steering and Throttle try: agent = DQNAgent(state_size, action_space, train=not args.test) throttle = args.throttle # Set throttle as constant value episodes = [] if os.path.exists(args.model): print("load the saved model") agent.load_model(args.model) for e in range(EPISODES): print("Episode: ", e) done = False obs = env.reset() episode_len = 0 x_t = agent.process_image(obs) s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # In Keras, need to reshape s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) #1*80*80*4 while not done: # Get action for the current state and go one step in environment steering = agent.get_action(s_t) action = [steering, throttle] next_obs, reward, done, info = env.step(action) x_t1 = agent.process_image(next_obs) x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x80x80x1 s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) #1x80x80x4 # Save the sample <s, a, r, s'> to the replay memory agent.replay_memory(s_t, np.argmax(linear_bin(steering)), reward, s_t1, done) agent.update_epsilon() if agent.train: agent.train_replay() s_t = s_t1 agent.t = agent.t + 1 episode_len = episode_len + 1 if agent.t % 30 == 0: print("EPISODE", e, "TIMESTEP", agent.t, "/ ACTION", action, "/ REWARD", reward, "/ EPISODE LENGTH", episode_len, "/ Q_MAX ", agent.max_Q) if done: # Every episode update the target model to be same with model agent.update_target_model() episodes.append(e) # Save model for each episode if agent.train: agent.save_model(args.model) print("episode:", e, " memory length:", len(agent.memory), " epsilon:", agent.epsilon, " episode length:", episode_len) except KeyboardInterrupt: print("stopping run...") finally: env.unwrapped.close()
def predict(args): """Inference program.""" logger_level = logging.getLogger(__package__).level if logger_level > logging.DEBUG: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" import tensorflow as tf from tensorflow.keras import backend as K args.regions = medaka.common.get_regions(args.bam, region_strs=args.regions) logger = medaka.common.get_named_logger('Predict') logger.info('Processing region(s): {}'.format(' '.join( str(r) for r in args.regions))) # create output and copy meta with medaka.datastore.DataStore(args.model) as ds: ds.copy_meta(args.output) feature_encoder = ds.get_meta('feature_encoder') feature_encoder.tag_name = args.tag_name feature_encoder.tag_value = args.tag_value feature_encoder.tag_keep_missing = args.tag_keep_missing logger.info("Setting tensorflow threads to {}.".format(args.threads)) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) K.set_session( tf.Session( config=tf.ConfigProto(intra_op_parallelism_threads=args.threads, inter_op_parallelism_threads=args.threads))) if tf.test.is_gpu_available(cuda_only=True): logger.info("Found a GPU.") logger.info( "If cuDNN errors are observed, try setting the environment " "variable `TF_FORCE_GPU_ALLOW_GROWTH=true`. To explicitely " "disable use of cuDNN use the commandline option " "`--disable_cudnn. If OOM (out of memory) errors are found " "please reduce batch size.") # Split overly long regions to maximum size so as to not create # massive feature matrices MAX_REGION_SIZE = int(1e6) # 1Mb regions = [] for region in args.regions: if region.size > MAX_REGION_SIZE: # chunk_ovlp is mostly used in overlapping pileups (which generally # end up being expanded compared to the draft coordinate system) regs = region.split(MAX_REGION_SIZE, overlap=args.chunk_ovlp, fixed_size=False) else: regs = [region] regions.extend(regs) logger.info("Processing {} long region(s) with batching.".format( len(regions))) logger.info("Using model: {}.".format(args.model)) model = medaka.models.load_model(args.model, time_steps=args.chunk_len, allow_cudnn=args.allow_cudnn) # the returned regions are those where the pileup width is smaller than # chunk_len remainder_regions = run_prediction(args.output, args.bam, regions, model, feature_encoder, args.chunk_len, args.chunk_ovlp, batch_size=args.batch_size, save_features=args.save_features) # short/remainder regions: just do things without chunking. We can do this # here because we now have the size of all pileups (and know they are # small). # TODO: can we avoid calculating pileups twice whilst controlling memory? if len(remainder_regions) > 0: logger.info("Processing {} short region(s).".format( len(remainder_regions))) model = medaka.models.load_model(args.model, time_steps=None, allow_cudnn=args.allow_cudnn) for region in remainder_regions: new_remainders = run_prediction( args.output, args.bam, [region[0]], model, feature_encoder, args.chunk_len, args.chunk_ovlp, # these won't be used batch_size=args.batch_size, save_features=args.save_features, enable_chunking=False) if len(new_remainders) > 0: # shouldn't get here ignored = [x[0] for x in new_remainders] n_ignored = len(ignored) logger.warning("{} regions were not processed: {}.".format( n_ignored, ignored)) logger.info("Finished processing all regions.") if args.check_output: logger.info("Validating and finalising output data.") with medaka.datastore.DataStore(args.output, 'a') as ds: pass
def predict(self, sample): with self.lock: with graph.as_default(): if sess is not None: set_session(sess) return self.model.predict(sample).flatten()
from keras.optimizers import Adam from keras import backend as K from keras.layers.merge import add sys.path.append('..') from utils.modifiedRNN import LSTM1, LSTM2, LSTM3, LSTM_forget from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.keras.backend import set_session tf.keras.backend.clear_session() # For easy reset of notebook state. config_proto = tf.compat.v1.ConfigProto() off = rewriter_config_pb2.RewriterConfig.OFF config_proto.graph_options.rewrite_options.arithmetic_optimization = off session = tf.Session(config=config_proto) set_session(session) def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length) class brnn_keras(object): def __init__(self, args, max_seq_length): self.max_seq_length = max_seq_length self.args = args self.model_init(args) self.ctc_init(args) self.opt_init(args)
def __initialize_vars(self, session): session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) K.set_session(session)
def ctc_lamba_func(args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) def get_session(): config = tf.ConfigProto() config.gpu_options.allow_growth = False return tf.Session(config=config) if __name__ == '__main__': K.set_session(get_session()) n_class = len(chars) + 1 print(n_class) input_tensor = Input((96, 64, 3)) x = input_tensor conv_shape = x.get_shape() x = Conv2D(64, (3, 3), strides=(1, 1), padding="same")(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(128, (3, 3), strides=(1, 1), padding="same")(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x) x = Conv2D(256, (3, 3), strides=(1, 1), padding="same")(x) x = BatchNormalization()(x)
#!/usr/bin/env python3 # if version.parse(tf.__version__).release[0] >= 2: # THIS SEEMS TO WORK IN BOTH tf 2 and 1 from packaging import version import tensorflow as tf from tensorflow.keras.backend import set_session from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession, Session config = ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = True # to log device placement (on which device the operation ran) # session = InteractiveSession(config=config) import tensorflow as tf session = Session(config=config) set_session(session) # set this TensorFlow session as the default session for Keras import tensorflow.keras as keras from tensorflow.keras.datasets import mnist from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.layers import Flatten, MaxPooling2D, Conv2D from tensorflow.keras.callbacks import TensorBoard (X_train,y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(60000,28,28,1).astype('float32') X_test = X_test.reshape(10000,28,28,1).astype('float32') X_train /= 255 X_test /= 255
epochs=40, validation_data=val_generator, validation_steps=80, callbacks=[ callbacks.EarlyStopping(monitor="val_loss", min_delta=1e-7, patience=5, restore_best_weights=True), callbacks.ModelCheckpoint(filepath=file_save, monitor="val_loss", verbose=1, save_best_only=True), ]) model.save(file_save) config = ConfigProto() config.gpu_options.allow_growth = True backend.set_session(Session(config=config)) base_dir = 'myDataset2' original_dataset_dir = 'pedro-dataset' # create_folders(original_dataset_dir,base_dir) lr_rate = [0.00001, 0.0001, 0.0005, 0.0010, 0.005, 0.01, 0.05, 0.1, 0.5, 1] train_gen = read_train_dataset(base_dir) val_gen = read_val_dataset(base_dir) test_gen = read_test_dataset(base_dir) for i in lr_rate: print(i) model = create_conv_net() train_conv_net(model, train_gen, val_gen, i)
return self.model.evaluate(X, y, verbose=0)[1] mem_softmaxes = [] mem_accuracies = [] with controllerGraph.as_default(): controller = Controller() with open("subpolicy_result", "w"): pass controller_iter = tqdm(range(CONTROLLER_EPOCHS), desc='Controller Epoch: ', position=0, file=sys.stdout, leave=False) for epoch in controller_iter: tf.Graph().as_default() session = tf.compat.v1.Session(graph=tf.get_default_graph(), config=config) backend.set_session(session) child = Child(Xtr.shape[1:]) attack_func_map = { 'fgsm' : fgsm(child.model), 'df' : df(child.model), 'mim' : mim(child.model), } with controllerGraph.as_default(): softmaxes, subpolicies = controller.predict(SUBPOLICIES, argmax=epoch % 10 == 9) # mem_softmaxes.append(softmaxes) child.fit(subpolicies, Xtr, ytr, log_file=f'runs/{epoch}.json', save_file=f'runs/{epoch}.h5') raw_accuracy = child.evaluate(Xts, yts)
def train(dataset): flag_values_dict = FLAGS.flag_values_dict() pprint(flag_values_dict, indent=4) with open(os.path.join(FLAGS.model, FLAGS_FILENAME), 'w') as f: json.dump(flag_values_dict, f, indent=4) # FLAGS.weight_ad is parsed to [coverage_min, coverage_max], threshold to apply adaptive weight if FLAGS.weight_ad is not None: weight_adaptive = [float(x) for x in FLAGS.weight_ad] else: weight_adaptive = None with tf.device('/cpu:0'): iter_train, iter_valid = dataset.gen_train_valid( n_splits=N_SPLITS, idx_kfold=FLAGS.cv, batch_size=FLAGS.batch_size, adjust=FLAGS.adjust, weight_fg=FLAGS.weight_fg, weight_bg=FLAGS.weight_bg, weight_adaptive=weight_adaptive, filter_vert_hori=FLAGS.filter_vert_hori, ignore_tiny=FLAGS.ignore_tiny, augment_dict=augment_dict(), deep_supervised=FLAGS.deep_supervised, mask_padding=FLAGS.mask_padding, with_depth=FLAGS.with_depth) sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9, allow_growth=True))) K.set_session(sess) if FLAGS.debug: debug_img_show(iter_train, iter_valid, sess) with tf.device('/gpu:0'): if FLAGS.restore is not None: path_restore = os.path.join(FLAGS.restore, NAME_MODEL) print("Restoring model from {}".format(path_restore)) model = load_model(path_restore, compile=False) elif FLAGS.contrib is not None: model = build_model_contrib(IM_HEIGHT, IM_WIDTH, IM_CHAN, encoder=FLAGS.contrib, residual_unit=FLAGS.residual_unit, spatial_dropout=FLAGS.spatial_dropout, preprocess=FLAGS.preprocess, last_kernel=FLAGS.last_kernel, last_1x1=FLAGS.last_1x1) elif FLAGS.pretrained is not None: if not FLAGS.deep_supervised: model = build_model_pretrained( IM_HEIGHT, IM_WIDTH, IM_CHAN, encoder=FLAGS.pretrained, spatial_dropout=FLAGS.spatial_dropout, retrain=FLAGS.retrain, preprocess=FLAGS.preprocess, renorm=FLAGS.renorm, last_kernel=FLAGS.last_kernel, last_1x1=FLAGS.last_1x1) else: model = build_model_pretrained_deep_supervised( IM_HEIGHT, IM_WIDTH, IM_CHAN, encoder=FLAGS.pretrained, spatial_dropout=FLAGS.spatial_dropout, retrain=FLAGS.retrain, preprocess=FLAGS.preprocess, last_kernel=FLAGS.last_kernel, last_1x1=FLAGS.last_1x1) elif FLAGS.use_ref2: model = build_model_ref2(IM_HEIGHT, IM_WIDTH, IM_CHAN, preprocess=FLAGS.preprocess) elif FLAGS.use_ref: model = build_model_ref(IM_HEIGHT, IM_WIDTH, IM_CHAN, batch_norm=FLAGS.batch_norm, drop_out=FLAGS.drop_out, depth=FLAGS.depth, start_ch=FLAGS.start_ch) else: model = build_model(IM_HEIGHT, IM_WIDTH, IM_CHAN, batch_norm=FLAGS.batch_norm, drop_out=FLAGS.drop_out) if FLAGS.restore_weight is not None: path_weight = os.path.join(FLAGS.restore_weight, NAME_MODEL) print("Restoring weights from {}".format(path_weight)) model.load_weights(path_weight, by_name=True) model = compile_model(model, optimizer=FLAGS.opt, loss=FLAGS.loss, weight_decay=FLAGS.weight_decay, exclude_bn=FLAGS.exclude_bn, deep_supervised=FLAGS.deep_supervised) write_summary(model, os.path.join(FLAGS.model, MODEL_SUMMARY_FILENAME)) model.summary() path_model = os.path.join(FLAGS.model, NAME_MODEL) if not FLAGS.deep_supervised: monitor = 'val_weighted_mean_score' else: monitor = 'val_output_final_weighted_mean_score' checkpointer = ModelCheckpoint(path_model, monitor=monitor, verbose=1, save_best_only=FLAGS.save_best_only, mode='max') tensorboarder = MyTensorBoard(FLAGS.log, model=model) if not FLAGS.cyclic: lrscheduler = LearningRateScheduler(StepDecay(FLAGS.lr, FLAGS.lr_decay, FLAGS.epochs_decay, FLAGS.freeze_once), verbose=1) else: lrscheduler = LearningRateScheduler(CLRDecay( FLAGS.lr, max_lr=FLAGS.max_lr, epoch_size=FLAGS.epoch_size, mode=FLAGS.mode_clr, freeze_once=FLAGS.freeze_once), verbose=1) callbacks = [checkpointer, tensorboarder, lrscheduler] if FLAGS.early_stopping: callbacks.append(EarlyStopping(patience=5, verbose=1)) if FLAGS.reduce_on_plateau: lrreducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=8, verbose=1, mode='min', epsilon=0.0001, cooldown=4) callbacks.append(lrreducer) num_train, num_valid = dataset.len_train_valid(n_splits=N_SPLITS, idx_kfold=FLAGS.cv) steps_per_epoch = int(num_train / FLAGS.batch_size) validation_steps = int(num_valid / FLAGS.batch_size) results = model.fit(x=iter_train, validation_data=iter_valid, epochs=FLAGS.epochs, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, shuffle=True, callbacks=callbacks)
import tensorflow as tf from tensorflow.keras.backend import clear_session, set_session from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, BatchNormalization from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.image import ImageDataGenerator import cv2 import numpy as np import copy app = Flask(__name__) class_dict = ['Choroidal Neovascularization (CNV)', 'Diabetic Macular Edema (DME)', 'DRUSEN', 'NORMAL'] pred_datagen = ImageDataGenerator(rescale=1./255) sess = tf.Session() graph = tf.get_default_graph() set_session(sess) model = load_model('./model/model.hdf5') # model._make_predict_function() # print(model.predict(np.ones((1,256,256,1)))) def predict(img): return model.predict(img) @app.route('/') def index(): return render_template('index.html') @app.route('/predict', methods = ['POST']) def get_result(): global model global graph
def __init__(self, config, filenames, labels, classes_semantics, num_run=0, resume_model_path=False, resume=False): ############################################################################################# # LIBRARIES ############################################################################################# import os import numpy as np import tensorflow as tf from tensorflow.python import pywrap_tensorflow from tensorflow.keras import optimizers, losses, models, backend, layers, metrics from tensorflow.keras.utils import multi_gpu_model self.run_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(self.run_path) utils = local_module("utils") logger = local_module("logger") lossnet = local_module("lossnet") data_pipeline = local_module("data_pipeline") backbones = local_module("backbones") ############################################################################################# # PARAMETERS RUN ############################################################################################# self.config = config self.filenames = filenames self.labels = labels self.num_run = num_run self.group = "Stage_" + str(num_run) self.name_run = "Train_" + self.group self.run_dir = os.path.join(config["PROJECT"]["group_dir"], self.group) self.run_dir_check = os.path.join(self.run_dir, 'checkpoints') self.checkpoints_path = os.path.join(self.run_dir_check, 'checkpoint.{epoch:03d}.hdf5') self.user = get_user() self.training_thread = None self.resume_training = resume self.list_classes = classes_semantics #self.num_data_train = len(labeled_set) self.resume_model_path = resume_model_path self.transfer_weight_path = self.config['TRAIN'][ "transfer_weight_path"] self.input_shape = [ self.config["NETWORK"]["INPUT_SIZE"], self.config["NETWORK"]["INPUT_SIZE"], 3 ] self.pre = '\033[1;36m' + self.name_run + '\033[0;0m' #"____" # self.problem = '\033[1;31m' + self.name_run + '\033[0;0m' # Creating the train folde import shutil # create base dir and gr if os.path.exists(config["PROJECT"]["project_dir"]) is False: os.mkdir(config["PROJECT"]["project_dir"]) if os.path.exists(self.run_dir) and self.resume_model_path is False: shutil.rmtree(config["PROJECT"]["group_dir"]) os.mkdir(config["PROJECT"]["group_dir"]) if os.path.exists(config["PROJECT"]["group_dir"]) is False: os.mkdir(config["PROJECT"]["group_dir"]) if os.path.exists(self.run_dir) is False: os.mkdir(self.run_dir) if os.path.exists(self.run_dir_check) is False: os.mkdir(self.run_dir_check) ############################################################################################# # SETUP TENSORFLOW SESSION ############################################################################################# # Create a MirroredStrategy. #self.strategy = tf.distribute.MirroredStrategy() #print(self.pre,'Number of devices: {}'.format(self.strategy.num_replicas_in_sync)) #with self.strategy.scope(): if True: self.graph = tf.Graph() with self.graph.as_default(): config_tf = tf.ConfigProto(allow_soft_placement=True) config_tf.gpu_options.allow_growth = True self.sess = tf.Session(config=config_tf, graph=self.graph) backend.set_session(self.sess) with self.sess.as_default(): ############################################################################################# # SETUP WANDB ############################################################################################# import wandb self.wandb = wandb self.wandb.init(project=config["PROJECT"]["project"], group=config["PROJECT"]["group"], name="Train_" + str(num_run), job_type=self.group, sync_tensorboard=True, config=config) ############################################################################################# # LOAD DATA ############################################################################################# self.DataGen = data_pipeline.ClassificationDataset_AL( config["TRAIN"]["batch_size"], self.filenames, self.labels, self.list_classes, subset="train", original_size=config["DATASET"]["original_size"], data_augmentation=config["DATASET"] ["Data_augementation"], random_flip=config["DATASET"]["random_flip"], pad=config["DATASET"]["pad"], random_crop_pad=config["DATASET"]["random_crop_pad"], random_hue=config["DATASET"]["random_hue"], random_brightness=config["DATASET"] ["random_brightness"], random_saturation=config["DATASET"] ["random_saturation"]) self.num_class = len(self.DataGen.list_classes) ############################################################################################# # GLOBAL PROGRESS ############################################################################################# self.steps_per_epoch = int( np.ceil(self.DataGen.nb_elements / config["TRAIN"]["batch_size"])) self.split_epoch = self.config['TRAIN']["EPOCH_WHOLE"] self.total_epochs = self.config['TRAIN'][ "EPOCH_WHOLE"] + self.config['TRAIN']["EPOCH_SLIT"] self.total_steps = self.steps_per_epoch * self.total_epochs ############################################################################################# # DEFINE CLASSIFIER ############################################################################################# # set input img_input = tf.keras.Input( tensor=self.DataGen.images_tensor, name='input_image') #img_input = tf.keras.Input(self.input_shape,name= 'input_image') include_top = True # Get the selected backbone """ ResNet18 ResNet50 ResNet101 ResNet152 ResNet50V2 ResNet101V2 ResNet152V2 ResNeXt50 ResNeXt101 """ print(self.pre, "The backbone is: ", self.config["NETWORK"]["Backbone"]) self.backbone = getattr(backbones, self.config["NETWORK"]["Backbone"]) # c_pred_features = self.backbone(input_tensor=img_input, classes=self.num_class, include_top=include_top) self.c_pred_features = c_pred_features if include_top: # include top classifier # class predictions c_pred = c_pred_features[0] else: x = layers.GlobalAveragePooling2D(name='pool1')( c_pred_features[0]) x = layers.Dense(self.num_class, name='fc1')(x) c_pred = layers.Activation('softmax', name='c_pred')(x) c_pred_features[0] = c_pred #self.classifier = models.Model(inputs=[img_input], outputs=c_pred_features,name='Classifier') ############################################################################################# # DEFINE FULL MODEL ############################################################################################# #c_pred_features_1 = self.classifier(img_input) #c_pred_1 = c_pred_features[0] loss_pred_embeddings = lossnet.Lossnet( c_pred_features, self.config["NETWORK"]["embedding_size"]) model_inputs = [img_input] model_outputs = [c_pred] + loss_pred_embeddings self.model = models.Model( inputs=model_inputs, outputs=model_outputs) #, embedding_s] ) ######################################## # INIT GLOBAL VARIABLES ####################################### self.sess.run(tf.global_variables_initializer()) ############################################################################################# # LOAD PREVIUS WEIGTHS ############################################################################################# if self.resume_model_path: # check the epoch where is loaded try: loaded_epoch = int( self.resume_model_path.split('.')[-2]) print(self.pre, "Loading weigths from: ", self.resume_model_path) print(self.pre, "The detected epoch is: ", loaded_epoch) # load weigths self.model.load_weights(self.resume_model_path) except: print(self.problem, "=> Problem loading the weights from ", self.resume_model_path) print(self.problem, '=> It will rain from scratch') elif self.transfer_weight_path: try: print( self.pre, "(transfer learning) Loading weigths by name from: ", self.transfer_weight_path) # load weigths self.model.load_weights(self.transfer_weight_path, by_name=True) except: print( self.problem, "=>(transfer learning) Problem loading the weights from ", self.transfer_weight_path) print(self.problem, '=> It will rain from scratch') if self.resume_training: self.current_epoch = loaded_epoch self.current_step = loaded_epoch * self.steps_per_epoch if self.current_epoch > self.total_epochs: raise ValueError( "The starting epoch is higher that the total epochs" ) else: print(self.pre, "Resuming the training from stage: ", self.num_run, " at epoch ", self.current_epoch) else: self.current_epoch = 0 self.current_step = 0 ############################################################################################# # DEFINE WEIGHT DECAY ############################################################################################# if self.config['TRAIN']['apply_weight_decay']: utils.add_weight_decay( self.model, self.config['TRAIN']['weight_decay']) ############################################################################################# # DEFINE LOSSES ############################################################################################# # losses self.loss_dict = {} self.loss_dict[ 'c_pred'] = losses.sparse_categorical_crossentropy self.loss_dict['l_pred_w'] = lossnet.Loss_Lossnet self.loss_dict['l_pred_s'] = lossnet.Loss_Lossnet # weights self.weight_w = backend.variable( self.config['TRAIN']['weight_lossnet_loss']) self.weight_s = backend.variable(0) self.loss_w_dict = {} self.loss_w_dict['c_pred'] = 1.0 self.loss_w_dict['l_pred_w'] = self.weight_w self.loss_w_dict['l_pred_s'] = self.weight_s #self.loss_w_dict['Embedding'] = 0 ############################################################################################# # DEFINE METRICS ############################################################################################# # metrics self.metrics_dict = {} self.metrics_dict[ 'c_pred'] = tf.keras.metrics.SparseCategoricalAccuracy( ) #self.metrics_dict['l_pred_w'] = lossnet.MAE_Lossnet #self.metrics_dict['l_pred_s'] = lossnet.MAE_Lossnet ############################################################################################# # DEFINE OPTIMIZER ############################################################################################# self.opt = optimizers.Adam(lr=self.config['TRAIN']['lr']) ############################################################################################# # DEFINE CALLBACKS ############################################################################################# # Checkpoint saver self.callbacks = [] model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=self.checkpoints_path, save_weights_only=True, period=self.config["TRAIN"]["test_each"]) self.callbacks.append(model_checkpoint_callback) # Callback to wandb # self.callbacks.append(self.wandb.keras.WandbCallback()) # Callback Learning Rate def scheduler(epoch): lr = self.config['TRAIN']['lr'] for i in self.config['TRAIN']['MILESTONES']: if epoch > i: lr *= 0.1 return lr self.callbacks.append( tf.keras.callbacks.LearningRateScheduler(scheduler)) # callback to change the weigths for the split training: self.callbacks.append( lossnet.Change_loss_weights( self.weight_w, self.weight_s, self.split_epoch, self.config['TRAIN']['weight_lossnet_loss'])) ################## # SETUP WATCHER ################## self.run_watcher = get_run_watcher() self.run_watcher.add_run.remote( name=self.name_run, user=self.user, progress=0, wandb_url=self.wandb.run.get_url(), status="Idle") # Callback update progress self.Update_progress = logger.Update_progress( self.run_watcher, self.wandb, self.name_run, self.steps_per_epoch, self.total_epochs, self.total_steps, self.current_epoch, self.current_step) self.callbacks.append(self.Update_progress) ############################################################################################# # COMPILE MODEL ############################################################################################# self.model.compile( loss=self.loss_dict, loss_weights=self.loss_w_dict, metrics=self.metrics_dict, optimizer=self.opt, target_tensors=self.DataGen.labels_tensor) ######################################## # INIT LOCAL VARIABLES ####################################### self.sess.run(tf.local_variables_initializer()) print(self.pre, 'Init done')
def main(mname, model_dir, batch_size, epochs, eval_steps, eps_log_steps): global model_dir_hdfs if model_dir.startswith('hdfs'): model_dir_hdfs = True tf.logging.set_verbosity(tf.logging.DEBUG) # get TF logger log.setLevel(logging.DEBUG) # create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # create file handler which logs even debug messages if model_dir_hdfs is False: if os.path.exists(model_dir) is False: os.makedirs(model_dir) log_dir = model_dir else: model_dir = os.path.join( model_dir, "job_cifar10_" + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) log_dir = '.' # clear old log files with open(log_dir + '/tensorflow.log', 'w'): pass with open(log_dir + '/gpu.csv', 'w'): pass with open(log_dir + '/cpu.csv', 'w'): pass fh = logging.FileHandler(log_dir + '/tensorflow.log') fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) log.addHandler(fh) log.info("TF version: %s", tf.__version__) log.info("Model directory: %s", model_dir) log.info("Batch size: %s", batch_size) log.info("Prefetch data all to memory: %s", True) log.info("Train epochs: %s", epochs) config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = True # to log device placement (on which device the operation ran) sess = tf.Session(config=config) ktf.set_session( sess) # set this TensorFlow session as the default session for Keras steps_per_epoch = cifar10_data.train_len() / batch_size log.info("Steps per epoch: %s", steps_per_epoch) if eval_steps is None: eval_steps = steps_per_epoch log.info("Evaluating each %i steps", eval_steps) if mname == "cnn": model = cifar10_model_cnn.cifar_model() else: model = cifar10_model_resnet.cifar_model() global input_name input_name = 'input_1' model.summary() def train_input_fn(): dataset = tf.data.Dataset.from_generator( generator=cifar10_data.generator_train, output_types=(tf.float32, tf.float32), output_shapes=shapes) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(buffer_size=batch_size) # dataset = dataset.repeat(20) iterator = dataset.make_one_shot_iterator() features_tensors, labels = iterator.get_next() features = {input_name: features_tensors} return features, labels def eval_input_fn(): dataset = tf.data.Dataset.from_generator( generator=cifar10_data.generator_test, output_types=(tf.float32, tf.float32), output_shapes=shapes) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(buffer_size=batch_size) iterator = dataset.make_one_shot_iterator() features_tensors, labels = iterator.get_next() features = {input_name: features_tensors} return features, labels my_config = RunConfig( save_checkpoints_steps= eval_steps # Save checkpoints every n steps and run the evaluation. # keep_checkpoint_max = 5 # Retain the n most recent checkpoints (default 5). ) estimator = tf.keras.estimator.model_to_estimator(model, config=my_config, model_dir=model_dir) examples_sec_hook = ExamplesPerSecondHook(batch_size, every_n_steps=eps_log_steps) # stopping_hook = early_stopping.stop_if_higher_hook(estimator, "accuracy", 0.5) train_hooks = [examples_sec_hook] train_spec = TrainSpec(input_fn=train_input_fn, hooks=train_hooks, max_steps=cifar10_data.train_len() / batch_size * epochs) eval_spec = EvalSpec(input_fn=eval_input_fn, steps=cifar10_data.val_len() / batch_size, throttle_secs=5) # default 100 steps global is_training is_training = True threading.Thread(target=lambda: collect_stats(log_dir)).start() start = time.time() train_and_evaluate(estimator, train_spec, eval_spec) elapsed = time.time() - start is_training = False log.info("total time taken (seconds): %s ", elapsed) if model_dir_hdfs: parse_res = parse.urlsplit(model_dir) netloc = parse_res[1] path = parse_res[2] webhdfs_model_dir = 'http://' + netloc + ':50070/webhdfs/v1' + path username = getpass.getuser() component_name = estimator.config.task_type + str( estimator.config.task_id) log.info("Uploading log files for %s as %s to HDFS path: %s", component_name, username, webhdfs_model_dir) logging.shutdown() os.system('curl -L -i -T tensorflow.log "' + webhdfs_model_dir + '/tensorflow-' + component_name + '.log?op=CREATE&overwrite=false&user.name=' + username + '"') os.system('curl -L -i -T cpu.csv "' + webhdfs_model_dir + '/cpu-' + component_name + '.csv?op=CREATE&overwrite=false&user.name=' + username + '"') os.system('curl -L -i -T gpu.csv "' + webhdfs_model_dir + '/gpu-' + component_name + '.csv?op=CREATE&overwrite=false&user.name=' + username + '"') else: log.info("Creating zip archive of job results") logging.shutdown() shutil.make_archive(model_dir, 'zip', model_dir)
class Config: # # general config # epoch_display_periods = 10 # epoch display periods summaries_dir = "./summaries" # tensorboard writer target directory model_dir = "checkpoints" # save model in this directory save_periods = 100 # save periods sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True keras_sess = tf.Session(config=sess_config) K.set_session(keras_sess) # # environment config # environment_combination_len = 3 environment_combinations_num = 10 # # actor config # lr = 0.001 # learning rate gamma = 0.5 # the discount factor in G value_scale = 0.5 # the weight of value function approximation in total loss reinforce_batch_size = 100 # batch size used in Reinforce algorithm gradient_clip = 40 # graient clip, avoid too large gradient # # encoder config # encoder_dim = 64 # # reinforce config reinforce_logdir = "./summaries/reinforce_logdir" reinforce_learning_rate = 0.001 # # evaluator configs # evaluator_model_name = "lr" # 'pin', 'lr' evaluator_optimizer_name = 'adam' evaluator_learning_rate = 0.03 evaluator_epsilon = 1e-4 evaluator_max_rounds = 2000 evaluator_early_stop = 8 evaluator_embedding_size = 20 evaluator_log_step_frequency = 0 evaluator_eval_round_frequency = 1 evaluator_train_logdir = "./summaries/evaluator_train" evaluator_valid_logdir = "./summaries/evaluator_valid" evaluator_graph_logdir = "./summaries/evaluator_graph" # # dataset # data_name = "Couple" dataset = as_dataset(data_name, True) dataset.load_data(gen_type='train') dataset.load_data(gen_type='test') dataset.summary() num_fields = dataset.num_fields feat_sizes = dataset.feat_sizes feat_min = dataset.feat_min target_combination_num = 30 target_combination_len = 4
def save_model(self, filename): with self.graph.as_default(): K.set_session(self.sess) self.model.save(filename)
def train_model(): #to use GPU on colab warnings.filterwarnings('ignore') os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' config = tf.ConfigProto() # dynamically grow the memory used on the GPU config.gpu_options.allow_growth = True # to log device placement (on which device the operation ran) config.log_device_placement = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 # (nothing gets printed in Jupyter, only if you run it standalone) sess = tf.Session(config=config) # set this TensorFlow session as the default session for Keras set_session(sess) batch_size = 32 model = create_multi_task_model_keras(VOCAB_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, LOSS_LIST, METRICS) model.summary() tensorboard = K.callbacks.TensorBoard("logging/keras_model", histogram_freq=50) print("\nStarting training...") checkpoint = K.callbacks.ModelCheckpoint(resources_path_model, monitor='val_loss', verbose=1, save_best_only=True, mode='min') cbk = [checkpoint, tensorboard] history = model.fit(x_train, [np.expand_dims(y_train_babelnet, axis=2), np.expand_dims(y_train_domains, axis=2), np.expand_dims(y_train_lexnames, axis=2)], epochs=EPOCHS, shuffle=True, batch_size=batch_size, validation_data=(x_dev, [ np.expand_dims(y_dev_babelnet, axis=2), np.expand_dims(y_dev_domains, axis=2), np.expand_dims(y_dev_lexnames, axis=2)]), callbacks=cbk) # list all data in history print(history.history.keys()) # summarize history for accuracy babelnet plt.plot(history.history['babelnet_acc']) plt.plot(history.history['val_babelnet_acc']) plt.title('model accuracy babelnet') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # summarize history for loss babelnet plt.plot(history.history['babelnet_loss']) plt.plot(history.history['val_babelnet_loss']) plt.title('model loss babelnet') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # summarize history for accuracy domains plt.plot(history.history['domains_acc']) plt.plot(history.history['val_domains_acc']) plt.title('model accuracy domains') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # summarize history for loss domains plt.plot(history.history['domains_loss']) plt.plot(history.history['val_domains_loss']) plt.title('model loss domains') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # summarize history for accuracy lexnames plt.plot(history.history['lexnames_acc']) plt.plot(history.history['val_lexnames_acc']) plt.title('model accuracy lexnames') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # summarize history for loss lexnames plt.plot(history.history['lexnames_loss']) plt.plot(history.history['val_lexnames_loss']) plt.title('model loss lexnames') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() # save model and wheights of model (json) model_json = model.to_json() with open(resources_path_model, "w") as json_file: json_file.write(model_json) print("Saved model to file (format json): " + resources_path_model) # serialize weights to HDF5 model.save_weights(resources_path_weights) print("Saved weights of model to file : " + resources_path_weights)
""" import tensorflow as tf from tensorflow.keras import backend as K from futils import segmentor as v_seg from futils.compute_distance_metrics_and_save import write_all_metrics from futils.find_connect_parts import write_connected_lobes from futils.generate_fissure_from_masks import gntFissure from futils.mypath import Mypath from futils.write_batch_preds import write_preds_to_disk config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU sess = tf.Session(config=config) K.set_session( sess) # set this TensorFlow session as the default session for Keras ''' '1585000573.7211952_0.00011a_o_0ds2dr1bn1fs16ptsz144ptzsz64', '1584924602.9965076_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64', '1584925363.1298258_0.00010a_o_0ds2dr1bn1fs16ptsz96ptzsz64' 1587041504.5222292_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64 1587846165.2829812_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64 1587858645.924413_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96 1587858294.826981_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96 1587857822.602289_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96 1587852304.1056986_0.00010a_o_0ds2dr1bn1fs4ptsz144ptzsz96 1587852304.1056986_0.00010a_o_0ds2dr1bn1fs4ptsz144ptzsz96 1587848974.2342067_0.00010a_o_0ds2dr1bn1fs8ptsz144ptzsz96 1587848927.819794_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz96 1587846165.2829812_0.00010a_o_0ds2dr1bn1fs16ptsz144ptzsz64
def test_run(model_path, rnn_type="simple", abbr_test_mods="U", device="0"): ### Set tensorflow session tf.reset_default_graph() os.environ["CUDA_VISIBLE_DEVICES"] = device config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) ### Save path to the prediction result model_info = get_model_info(model_path) model_root = os.path.split(model_path)[0] test_root = os.path.join(model_root, "test", std_mods(abbr_test_mods)) if not os.path.exists(test_root): os.makedirs(test_root) pred_path = os.path.join(test_root, "predict.npy") ### Get the test data generator feature_root = os.path.join("data") split_root = os.path.join(feature_root, "split", str(model_info["split"])) target_root = os.path.join(feature_root, "len_{}".format(model_info["length"])) ### Get the model for prediction if model_info["encodertype"] == "user": train_mods = ["user"] mod_pos_dict = {"user": 0} uemb_path = os.path.join(feature_root, "user_emb.npy") test_mods = train_mods train_shapes = [[1], [3]] + [[model_info["length"], 1]] test_model = build_test_model(model_path, train_shapes, test_mods, rnn_type, mod_pos_dict, train_mods) test_gen = get_testgen(feature_root, target_root, split_root, test_mods, phase="test") ### Evaluation preds, truth = user_predict(test_model, test_gen, pred_path) ### User embeddings output uemb_gen = get_testgen(feature_root, target_root, split_root, test_mods, phase="all") uemb_output(test_model, uemb_gen, uemb_path) else: train_mods = ["resnet50", "audiovgg", "fudannlp"] mod_pos_dict = { mod: train_mods.index(mod) for mod in mod_rep_dict.keys() } test_mods = rep2mods(ord_rep(abbr_test_mods)) train_shapes = [[2, 8]] + [[mod_shape_dict[mod]] for mod in train_mods ] + [[model_info["length"], 1]] test_model = build_test_model(model_path, train_shapes, test_mods, rnn_type, mod_pos_dict, train_mods) test_gen = get_testgen(feature_root, target_root, split_root, test_mods, phase="test") preds, truth = predict(test_model, test_gen, pred_path) ### Evaluate model with numerous indexes eval_path = os.path.join(test_root, "eval.txt") nmse, corr, srcc = evaluate(preds, truth, eval_path) K.clear_session() return nmse, corr, srcc
def main(args=None): # parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # optionally choose specific GPU if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu K.set_session(get_session()) # create the generators train_generator, validation_generator = create_generators(args) num_classes = train_generator.num_classes() model, prediction_model = efficientdet(args.phi, num_classes=num_classes, weighted_bifpn=args.weighted_bifpn) # load pretrained weights if args.snapshot: if args.snapshot == 'imagenet': model_name = 'efficientnet-b{}'.format(args.phi) file_name = '{}_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'.format( model_name) file_hash = WEIGHTS_HASHES[model_name][1] weights_path = keras.utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path, by_name=True) else: print('Loading model, this may take a second...') model.load_weights(args.snapshot, by_name=True) # freeze backbone layers if args.freeze_backbone: # 227, 329, 329, 374, 464, 566, 656 for i in range(1, [227, 329, 329, 374, 464, 566, 656][args.phi]): model.layers[i].trainable = False # compile model model.compile( optimizer=Adam(lr=1e-3), loss={ 'regression': smooth_l1(), 'classification': focal() }, ) # print(model.summary()) # create the callbacks callbacks = create_callbacks( model, prediction_model, validation_generator, args, ) if not args.compute_val_loss: validation_generator = None # start training return model.fit_generator(generator=train_generator, steps_per_epoch=args.steps, initial_epoch=0, epochs=args.epochs, verbose=1, callbacks=callbacks, workers=args.workers, use_multiprocessing=args.multiprocessing, max_queue_size=args.max_queue_size, validation_data=validation_generator)
def main(args): # Horovod: initialize Horovod. hvd.init() if not args.use_only_cpu: # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) else: config = None K.set_session(tf.Session(config=config)) batch_size = 128 num_classes = 10 # Horovod: adjust number of epochs based on number of GPUs. epochs = int(math.ceil(args.num_epochs / hvd.size())) # Input image dimensions img_rows, img_cols = 28, 28 # The data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if K.image_data_format() == "channels_first": x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 print("x_train shape:", x_train.shape) print(x_train.shape[0], "train samples") print(x_test.shape[0], "test samples") # Convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation="softmax")) # Horovod: adjust learning rate based on number of GPUs. opt = keras.optimizers.Adadelta(1.0 * hvd.size()) # Horovod: add Horovod Distributed Optimizer. opt = hvd.DistributedOptimizer(opt) ##### Enabling SageMaker Debugger ########### # Create hook from the configuration provided through sagemaker python sdk smd_hook = smd.KerasHook.create_from_json_file() ##### Enabling SageMaker Debugger ########### # wrap the optimizer so the hook can identify the gradients opt = smd_hook.wrap_optimizer(opt) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=["accuracy"]) callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(0), ##### Enabling SageMaker Debugger ########### # pass smd_hook as a callback smd_hook, ] # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them. if hvd.rank() == 0: callbacks.append( keras.callbacks.ModelCheckpoint( os.path.join(args.model_dir, "checkpoint-{epoch}.h5"))) model.fit( x_train, y_train, batch_size=batch_size, callbacks=callbacks, epochs=epochs, verbose=1 if hvd.rank() == 0 else 0, validation_data=(x_test, y_test), ) score = model.evaluate(x_test, y_test, verbose=0) print("Test loss:", score[0]) print("Test accuracy:", score[1])
def train_fn(model_bytes): # Make sure pyarrow is referenced before anything else to avoid segfault due to conflict # with TensorFlow libraries. Use `pa` package reference to ensure it's loaded before # functions like `deserialize_model` which are implemented at the top level. # See https://jira.apache.org/jira/browse/ARROW-3346 pa import atexit import horovod.tensorflow.keras as hvd from horovod.spark.task import get_available_devices import os from petastorm import make_batch_reader from petastorm.tf_utils import make_petastorm_dataset import tempfile import tensorflow as tf import tensorflow.keras.backend as K import shutil # Horovod: initialize Horovod inside the trainer. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process), if GPUs are available. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = get_available_devices()[0] K.set_session(tf.Session(config=config)) # Horovod: restore from checkpoint, use hvd.load_model under the hood. model = deserialize_model(model_bytes, hvd.load_model) # Horovod: adjust learning rate based on number of processes. K.set_value(model.optimizer.lr, K.get_value(model.optimizer.lr) * hvd.size()) # Horovod: print summary logs on the first worker. verbose = 2 if hvd.rank() == 0 else 0 callbacks = [ # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. hvd.callbacks.BroadcastGlobalVariablesCallback(root_rank=0), # Horovod: average metrics among workers at the end of every epoch. # # Note: This callback must be in the list before the ReduceLROnPlateau, # TensorBoard, or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during # the first five epochs. See https://arxiv.org/abs/1706.02677 for details. hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=verbose), # Reduce LR if the metric is not improved for 10 epochs, and stop training # if it has not improved for 20 epochs. tf.keras.callbacks.ReduceLROnPlateau(monitor='val_exp_rmspe', patience=10, verbose=verbose), tf.keras.callbacks.EarlyStopping(monitor='val_exp_rmspe', mode='min', patience=20, verbose=verbose), tf.keras.callbacks.TerminateOnNaN() ] # Model checkpoint location. ckpt_dir = tempfile.mkdtemp() ckpt_file = os.path.join(ckpt_dir, 'checkpoint.h5') atexit.register(lambda: shutil.rmtree(ckpt_dir)) # Horovod: save checkpoints only on the first worker to prevent other workers from corrupting them. if hvd.rank() == 0: callbacks.append(tf.keras.callbacks.ModelCheckpoint(ckpt_file, monitor='val_exp_rmspe', mode='min', save_best_only=True)) # Make Petastorm readers. with make_batch_reader('%s/train_df.parquet' % args.data_dir, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as train_reader: with make_batch_reader('%s/val_df.parquet' % args.data_dir, num_epochs=None, cur_shard=hvd.rank(), shard_count=hvd.size(), hdfs_driver=PETASTORM_HDFS_DRIVER) as val_reader: # Convert readers to tf.data.Dataset. train_ds = make_petastorm_dataset(train_reader) \ .apply(tf.data.experimental.unbatch()) \ .shuffle(int(train_rows / hvd.size())) \ .batch(args.batch_size) \ .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales))) val_ds = make_petastorm_dataset(val_reader) \ .apply(tf.data.experimental.unbatch()) \ .batch(args.batch_size) \ .map(lambda x: (tuple(getattr(x, col) for col in all_cols), tf.log(x.Sales))) history = model.fit(train_ds, validation_data=val_ds, steps_per_epoch=int(train_rows / args.batch_size / hvd.size()), validation_steps=int(val_rows / args.batch_size / hvd.size()), callbacks=callbacks, verbose=verbose, epochs=args.epochs) # Dataset API usage currently displays a wall of errors upon termination. # This global model registration ensures clean termination. # Tracked in https://github.com/tensorflow/tensorflow/issues/24570 globals()['_DATASET_FINALIZATION_HACK'] = model if hvd.rank() == 0: with open(ckpt_file, 'rb') as f: return history.history, f.read()
def _evaluate( self, config: dict, num_eval_batches: Optional[int], skip_benign: Optional[bool] ) -> dict: """ Evaluate a config file for classification robustness against attack. Note: num_eval_batches shouldn't be set for poisoning scenario and will raise an error if it is """ if config["sysconfig"].get("use_gpu"): os.environ["TF_CUDNN_DETERMINISM"] = "1" if num_eval_batches: raise ValueError("num_eval_batches shouldn't be set for poisoning scenario") if skip_benign: raise ValueError("skip_benign shouldn't be set for poisoning scenario") model_config = config["model"] # Scenario assumes preprocessing_fn makes images all same size classifier, preprocessing_fn = load_model(model_config) config_adhoc = config.get("adhoc") or {} train_epochs = config_adhoc["train_epochs"] src_class = config_adhoc["source_class"] tgt_class = config_adhoc["target_class"] fit_batch_size = config_adhoc.get( "fit_batch_size", config["dataset"]["batch_size"] ) if not config["sysconfig"].get("use_gpu"): conf = ConfigProto(intra_op_parallelism_threads=1) set_session(Session(config=conf)) # Set random seed due to large variance in attack and defense success np.random.seed(config_adhoc["split_id"]) set_random_seed(config_adhoc["split_id"]) random.seed(config_adhoc["split_id"]) use_poison_filtering_defense = config_adhoc.get( "use_poison_filtering_defense", True ) if self.check_run: # filtering defense requires more than a single batch to run properly use_poison_filtering_defense = False logger.info(f"Loading dataset {config['dataset']['name']}...") clean_data = load_dataset( config["dataset"], epochs=1, split_type="train", preprocessing_fn=preprocessing_fn, shuffle_files=False, ) attack_config = config["attack"] attack_type = attack_config.get("type") fraction_poisoned = config["adhoc"]["fraction_poisoned"] # Flag for whether to poison dataset -- used to evaluate # performance of defense on clean data poison_dataset_flag = config["adhoc"]["poison_dataset"] # detect_poison does not currently support data generators # therefore, make in memory dataset x_train_all, y_train_all = [], [] if attack_type == "preloaded": # Number of datapoints in train split of target clasc num_images_tgt_class = config_adhoc["num_images_target_class"] logger.info( f"Loading poison dataset {config_adhoc['poison_samples']['name']}..." ) num_poisoned = int(config_adhoc["fraction_poisoned"] * num_images_tgt_class) if num_poisoned == 0: raise ValueError( "For the preloaded attack, fraction_poisoned must be set so that at least on data point is poisoned." ) # Set batch size to number of poisons -- read only one batch of preloaded poisons config_adhoc["poison_samples"]["batch_size"] = num_poisoned poison_data = load_dataset( config["adhoc"]["poison_samples"], epochs=1, split_type="poison", preprocessing_fn=None, ) logger.info( "Building in-memory dataset for poisoning detection and training" ) for x_clean, y_clean in clean_data: x_train_all.append(x_clean) y_train_all.append(y_clean) x_poison, y_poison = poison_data.get_batch() x_poison = np.array([xp for xp in x_poison], dtype=np.float32) x_train_all.append(x_poison) y_train_all.append(y_poison) x_train_all = np.concatenate(x_train_all, axis=0) y_train_all = np.concatenate(y_train_all, axis=0) else: attack = load(attack_config) logger.info( "Building in-memory dataset for poisoning detection and training" ) for x_train, y_train in clean_data: x_train_all.append(x_train) y_train_all.append(y_train) x_train_all = np.concatenate(x_train_all, axis=0) y_train_all = np.concatenate(y_train_all, axis=0) if poison_dataset_flag: total_count = np.bincount(y_train_all)[src_class] poison_count = int(fraction_poisoned * total_count) if poison_count == 0: logger.warning( f"No poisons generated with fraction_poisoned {fraction_poisoned} for class {src_class}." ) src_indices = np.where(y_train_all == src_class)[0] poisoned_indices = np.sort( np.random.choice(src_indices, size=poison_count, replace=False) ) x_train_all, y_train_all = poison_dataset( x_train_all, y_train_all, src_class, tgt_class, y_train_all.shape[0], attack, poisoned_indices, ) y_train_all_categorical = to_categorical(y_train_all) # Flag to determine whether defense_classifier is trained directly # (default API) or is trained as part of detect_poisons method fit_defense_classifier_outside_defense = config_adhoc.get( "fit_defense_classifier_outside_defense", True ) # Flag to determine whether defense_classifier uses sparse # or categorical labels defense_categorical_labels = config_adhoc.get( "defense_categorical_labels", True ) if use_poison_filtering_defense: if defense_categorical_labels: y_train_defense = y_train_all_categorical else: y_train_defense = y_train_all defense_config = config["defense"] detection_kwargs = config_adhoc.get("detection_kwargs", dict()) defense_model_config = config_adhoc.get("defense_model", model_config) defense_train_epochs = config_adhoc.get( "defense_train_epochs", train_epochs ) # Assumes classifier_for_defense and classifier use same preprocessing function classifier_for_defense, _ = load_model(defense_model_config) logger.info( f"Fitting model {defense_model_config['module']}.{defense_model_config['name']} " f"for defense {defense_config['name']}..." ) if fit_defense_classifier_outside_defense: classifier_for_defense.fit( x_train_all, y_train_defense, batch_size=fit_batch_size, nb_epochs=defense_train_epochs, verbose=False, shuffle=True, ) defense_fn = load_fn(defense_config) defense = defense_fn(classifier_for_defense, x_train_all, y_train_defense) _, is_clean = defense.detect_poison(**detection_kwargs) is_clean = np.array(is_clean) logger.info(f"Total clean data points: {np.sum(is_clean)}") logger.info("Filtering out detected poisoned samples") indices_to_keep = is_clean == 1 x_train_final = x_train_all[indices_to_keep] y_train_final = y_train_all_categorical[indices_to_keep] else: logger.info( "Defense does not require filtering. Model fitting will use all data." ) x_train_final = x_train_all y_train_final = y_train_all_categorical if len(x_train_final): logger.info( f"Fitting model of {model_config['module']}.{model_config['name']}..." ) classifier.fit( x_train_final, y_train_final, batch_size=fit_batch_size, nb_epochs=train_epochs, verbose=False, shuffle=True, ) else: logger.warning("All data points filtered by defense. Skipping training") logger.info("Validating on clean test data") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, shuffle_files=False, ) benign_validation_metric = metrics.MetricList("categorical_accuracy") target_class_benign_metric = metrics.MetricList("categorical_accuracy") for x, y in tqdm(test_data, desc="Testing"): # Ensure that input sample isn't overwritten by classifier x.flags.writeable = False y_pred = classifier.predict(x) benign_validation_metric.append(y, y_pred) y_pred_tgt_class = y_pred[y == src_class] if len(y_pred_tgt_class): target_class_benign_metric.append( [src_class] * len(y_pred_tgt_class), y_pred_tgt_class ) logger.info( f"Unpoisoned validation accuracy: {benign_validation_metric.mean():.2%}" ) logger.info( f"Unpoisoned validation accuracy on targeted class: {target_class_benign_metric.mean():.2%}" ) results = { "benign_validation_accuracy": benign_validation_metric.mean(), "benign_validation_accuracy_targeted_class": target_class_benign_metric.mean(), } poisoned_test_metric = metrics.MetricList("categorical_accuracy") poisoned_targeted_test_metric = metrics.MetricList("categorical_accuracy") logger.info("Testing on poisoned test data") if attack_type == "preloaded": test_data_poison = load_dataset( config_adhoc["poison_samples"], epochs=1, split_type="poison_test", preprocessing_fn=None, ) for x_poison_test, y_poison_test in tqdm( test_data_poison, desc="Testing poison" ): x_poison_test = np.array([xp for xp in x_poison_test], dtype=np.float32) y_pred = classifier.predict(x_poison_test) y_true = [src_class] * len(y_pred) poisoned_targeted_test_metric.append(y_poison_test, y_pred) poisoned_test_metric.append(y_true, y_pred) test_data_clean = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, shuffle_files=False, ) for x_clean_test, y_clean_test in tqdm( test_data_clean, desc="Testing clean" ): x_clean_test = np.array([xp for xp in x_clean_test], dtype=np.float32) y_pred = classifier.predict(x_clean_test) poisoned_test_metric.append(y_clean_test, y_pred) elif poison_dataset_flag: logger.info("Testing on poisoned test data") test_data = load_dataset( config["dataset"], epochs=1, split_type="test", preprocessing_fn=preprocessing_fn, shuffle_files=False, ) for x_test, y_test in tqdm(test_data, desc="Testing"): src_indices = np.where(y_test == src_class)[0] poisoned_indices = src_indices # Poison entire class x_test, _ = poison_dataset( x_test, y_test, src_class, tgt_class, len(y_test), attack, poisoned_indices, ) y_pred = classifier.predict(x_test) poisoned_test_metric.append(y_test, y_pred) y_pred_targeted = y_pred[y_test == src_class] if not len(y_pred_targeted): continue poisoned_targeted_test_metric.append( [tgt_class] * len(y_pred_targeted), y_pred_targeted ) if poison_dataset_flag or attack_type == "preloaded": results["poisoned_test_accuracy"] = poisoned_test_metric.mean() results[ "poisoned_targeted_misclassification_accuracy" ] = poisoned_targeted_test_metric.mean() logger.info(f"Test accuracy: {poisoned_test_metric.mean():.2%}") logger.info( f"Test targeted misclassification accuracy: {poisoned_targeted_test_metric.mean():.2%}" ) return results
# Data, model, and output directories. These are required. parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR']) parser.add_argument('--model_dir', type=str) parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN']) parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST']) args, _ = parser.parse_known_args() # Horovod: initialize Horovod. hvd.init() # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) batch_size = 128 num_classes = 10 # Horovod: adjust number of epochs based on number of GPUs. epochs = int(math.ceil(12.0 / hvd.size())) # Input image dimensions img_rows, img_cols = 28, 28 # The data, shuffled and split between train and test sets x_train = np.load(os.path.join(args.train, 'train.npz'))['data'] y_train = np.load(os.path.join(args.train, 'train.npz'))['labels'] print("Train dataset loaded from: {}".format(os.path.join(args.train, 'train.npz')))
def _predict(self, x): with graph.as_default(): set_session(sess) predict_result = self.model.predict(x) return predict_result