def hyperparameter_search(hp_domain, n_iter, n_replicas, save_dir, model): """ Random hyperparameter search on DNN model """ dataset = utils.load_cifar(USE_CIFAR100) start_time = time.time() gpu_devices = utils.get_available_gpus() threads = [] for rep_id in range(n_replicas): threads.append(threading.Thread(target=_hp_search_replica, kwargs={'start_time': start_time, 'gpu_devices': gpu_devices, 'hp_domain': hp_domain, 'save_dir': save_dir, 'dataset': dataset, 'rep_id': rep_id, 'n_iter': n_iter, 'model': model})) threads[-1].start() for thread in threads: thread.join() # Look for best hyperparameter set we found best_results = {'acc': float('-inf'), 'model_name': None, 'hp': None} for root, dirs, files in os.walk(save_dir): for d in dirs: if re.match('model_([0-9]+)_([0-9]+)', d) is not None: results = np.load(os.path.join(root, d, 'results.npy')).tolist() if best_results['acc'] < results['acc']: best_results = results best_results['model_name'] = d print('\n\nHyperparameter search done!\n\tbest_acc=%4f\n\tbest_model_name=%s' % (best_results['acc'], best_results['model_name']) + '\n' + '#' * 100) return best_results
def __init__(self, in_steps, out_steps, img_shape, devices, filters=[32,1], num_blocks=1, mode="train", starter_learning_rate=0.001, decay_step=500, decay_rate=1.0, verbose_step=1): self.in_steps = in_steps self.out_steps = out_steps self.img_height, self.img_width = img_shape self.num_blocks = num_blocks self.input_shape = [self.img_height, self.img_width, 1] self.filters = filters self.starter_learning_rate = starter_learning_rate self.devices = devices self.gpus = len(devices) self.decay_step = decay_step self.decay_rate = decay_rate self.global_step = None self.learning_rate = None self.keep_rate = None self.verbose_step = verbose_step self.mode = mode self.x = None self.pw = None self.y = None self.y_pw = None self.y_hat = None self.is_training = None self.loss = None self.loss_1 = None self.gpus_names = get_available_gpus()[:self.gpus] self.block_idx_per_gpu = get_block_idx_per_gpu(self.num_blocks, self.gpus)
def arg_parse(): # python train.py --mode train --data_dir sequence_data --gpus 4 --num_blocks 4 --batch_size 1 --train_epochs 20 --lr 0.001 --verbose_step 10 parser = argparse.ArgumentParser() parser.add_argument("--mode", default="train") parser.add_argument("--data_dir", default=gen_data.sequence_data_dir, help="data dir") parser.add_argument("--gpus", default=len(get_available_gpus()), type=int, help="number of gpus") parser.add_argument("--num_blocks", default=1, type=int, help="depth of network") parser.add_argument("--batch_size", default=1, type=int, help="batch size") parser.add_argument("--train_epochs", default=50, type=int, help="train epochs") parser.add_argument("--lr", default=0.00002, type=float, help="starter learning rate") parser.add_argument("--verbose_step", default=10, type=int, help="verbose step") return parser.parse_args()
def __init__(self, stage, croplen, nclasses, optimization=None, momentum=None, reuse_variable=False): self.stage = stage self.croplen = croplen self.nclasses = nclasses self.dataloader = None self.queue_coord = None self.queue_threads = None self._optimization = optimization self._momentum = momentum self.summaries = [] self.towers = [] self._train = None self._reuse = reuse_variable self._accum = None self._init = None self.small_chunk = 1 self.nccl = False self.replica = False self.devices = digits.get_available_gpus( ) # it will append cpu further if empty self.gpus = len(self.devices) self._nesterov = False
def train(self, xs1, xs2, scores): global_step = tf.train.get_or_create_global_step() lr = noam_scheme(self.context.lr, global_step, self.context.warmup_steps) optimizer = tf.train.AdamOptimizer(lr) gpus = get_available_gpus() if gpus: num_gpu = len(gpus) assert self.context.hparams.batch_size % num_gpu == 0 xs1s, xs2s = tf.split(xs1, num_gpu, axis=0), tf.split(xs2, num_gpu, axis=0) scoress = tf.split(scores, num_gpu, axis=0) tower_grads = [] losses = [] with tf.variable_scope(tf.get_variable_scope()) as scope: list_predictions = [] for i in range(num_gpu): with tf.device("/gpu:%d" % i): with tf.name_scope("tower_%d" % i): predictions = self._get_prediction( xs1s[i], xs2s[i]) list_predictions.append(predictions) # square loss partial_loss = tf.reduce_sum(tf.squared_difference( predictions, scoress[i]), name="loss") losses.append(partial_loss) tf.get_variable_scope().reuse_variables() grad = get_gradients_by_loss_and_optimizer( partial_loss, optimizer) tower_grads.append(grad) predictions = tf.concat(list_predictions, axis=0) loss = tf.reduce_mean(losses) grads_and_vars = average_gradients(tower_grads) else: predictions = self._get_prediction(xs1, xs2) loss = tf.reduce_sum(tf.squared_difference(predictions, scores), name="loss") grads_and_vars = get_gradients_by_loss_and_optimizer( loss, optimizer) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) for g, v in grads_and_vars: tf.summary.histogram(v.name, v) tf.summary.histogram(v.name + '_grad', g) tf.summary.scalar("pred_avg", tf.reduce_mean(predictions)) tf.summary.scalar("label_avg", tf.reduce_mean(scores)) tf.summary.scalar('lr', lr) tf.summary.scalar("loss", loss) tf.summary.scalar("global_step", global_step) summaries = tf.summary.merge_all() return loss, train_op, global_step, summaries
def train(self, inputs, targets): global_step = tf.train.get_or_create_global_step() lr = noam_scheme(self._context.lr, global_step, self._context.warmup_steps) optimizer = tf.train.AdamOptimizer(lr) gpus = get_available_gpus() loss_func = self._loss_func_dict.get(self._context.loss_func, self._get_loss) if gpus: num_gpu = len(gpus) assert self._context.hparams.batch_size % num_gpu == 0 partial_inputs = [[] for _ in range(num_gpu)] for input_tmp in inputs: input_tmps = tf.split(input_tmp, num_gpu, axis=0) for i in range(num_gpu): partial_inputs[i].append(input_tmps[i]) targetses = tf.split(targets, num_gpu, axis=0) tower_grads = [] losses = [] with tf.variable_scope(tf.get_variable_scope()) as scope: for i in range(num_gpu): with tf.device("/gpu:%d" % i): with tf.name_scope("tower_%d" % i): partial_loss = loss_func(partial_inputs[i], targetses[i]) losses.append(partial_loss) tf.get_variable_scope().reuse_variables() grad = get_gradients_by_loss_and_optimizer( partial_loss, optimizer) tower_grads.append(grad) loss = tf.reduce_mean(losses) grads_and_vars = average_gradients(tower_grads) else: loss = tf.reduce_mean(loss_func(inputs, targets)) grads_and_vars = get_gradients_by_loss_and_optimizer( loss, optimizer) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) for g, v in grads_and_vars: if g is None: # 无梯度 continue tf.summary.histogram(v.name, v) tf.summary.histogram(v.name + '_grad', g) tf.summary.scalar("pred_avg", tf.reduce_mean(self._outputs)) tf.summary.scalar("infr_avg", tf.reduce_mean(self._inferences)) tf.summary.scalar("label_avg", tf.reduce_mean(targets)) tf.summary.scalar('lr', lr) tf.summary.scalar("loss", loss) tf.summary.scalar("global_step", global_step) summaries = tf.summary.merge_all() return loss, train_op, global_step, summaries
def __init__(self, target_bos_id, target_eos_id, params): self.params = params self.available_gpus = get_available_gpus() self.current_gpu_index = 0 self.total_gpu_num = len(self.available_gpus) self.target_bos_id = target_bos_id self.target_eos_id = target_eos_id print("learning_rate:", self.params.learning_rate)
def get_session(): tf.reset_default_graph() tf_config=tf.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1 ) session=tf.Session(config=tf_config) print('AVAILABLE GPUS:',utils.get_available_gpus()) return session
def get_model(pretrained): """ 按照机器配置构建合理模型 :return: """ if get_available_gpus() > 1: model = build_model() print(pretrained) if pretrained: my_model.load_weights(pretrained) else: pass model = multi_gpu_model(model, get_available_gpus()) else: model = build_model() if pretrained: model.load_weights(pretrained) return model
def _load_model(self): """ Load model on GPU if available, else on CPU, and get placeholders for input and outputs :return: None """ self._logger.info('Loading model...') self._logger.debug('creating TF session') self._tf_sess = tf.Session( graph=self._graph, config=tf.ConfigProto(allow_soft_placement=True)) gpus = get_available_gpus() if gpus: self._logger.debug('loading graph on GPU') with tf.device(gpus[0]): new_saver = tf.train.import_meta_graph(self._model_file + '.meta', clear_devices=True) new_saver.restore(self._tf_sess, self._model_file) self._graph = self._tf_sess.graph else: self._logger.debug('loading graph on CPU') new_saver = tf.train.import_meta_graph(self._model_file + '.meta', clear_devices=True) new_saver.restore(self._tf_sess, self._model_file) self._graph = self._tf_sess.graph self._logger.debug('getting placeholders') # get graph input placeholder self._input_placeholder = self._graph.get_tensor_by_name( 'image_tensor:0') # get output placeholders self._output_nb_detections = self._graph.get_tensor_by_name( 'num_detections:0') self._output_classes = self._graph.get_tensor_by_name( 'detection_classes:0') self._output_boxes = self._graph.get_tensor_by_name( 'detection_boxes:0') self._output_scores = self._graph.get_tensor_by_name( 'detection_scores:0') self._logger.info('... model loaded')
def __init__(self, model_dir, data_directory, data_format="NHWC", lr=0.001, n_gpus=2, n_fold=5, seed=42, save_best=5, **kwargs): """ High level class to perform multi GPU training with tf.contrib.distribute.MirroredStrategy and tf.Estimator. The base models are built using tf.slim. All the data is processed using tf.data.Dataset and tf.image. The preprocessing currently runs on the CPU (optimal?) Additionally built ResNet with NCHW format support for (potentially) faster GPU and MKL optimised CPU operations Currently NCHW format support is experimental and the speed up is minor (about 10%) :param model_dir: :param data_directory: :param data_format: :param lr: :param n_gpus: :param n_fold: :param seed: """ if data_format in ["NCHW", "NHWC"]: self.data_format = data_format else: raise ValueError( f"Unknown data format {data_format}. Has to be either NCHW or NHWC" ) if "weight_decay" in kwargs: self.weight_decay = kwargs["weight_decay"] else: self.weight_decay = WEIGHT_DECAY if "batch_norm_decay" in kwargs: self.batch_norm_decay = kwargs["weight_decay"] else: self.batch_norm_decay = BATCH_NORM_DECAY if "batch_norm_epsilon" in kwargs: self.batch_norm_epsilon = kwargs["batch_norm_epsilon"] else: self.batch_norm_epsilon = BATCH_NORM_EPSILON if "batch_norm_scale" in kwargs: self.batch_norm_scale = kwargs["batch_norm_scale"] else: self.batch_norm_scale = BATCH_NORM_SCALE if "output_stride" in kwargs: self.output_stride = kwargs["output_stride"] else: self.output_stride = OUTPUT_STRIDE if "base_depth" in kwargs: self.base_depth = kwargs["base_depth"] else: self.base_depth = BASE_DEPTH if "input_shape" in kwargs: self.input_shape = kwargs["input_shape"] else: self.input_shape = INPUT_SHAPE if "n_blocks" in kwargs: self.n_blocks = kwargs["n_blocks"] else: self.n_blocks = (3, 4, 6) if "block_type" in kwargs: self.block_type = kwargs["block_type"] else: self.block_type = "bottleneck" # Pathing stuff self.model_name = model_dir.split('/')[-1] self.model_dir = model_dir self.data_dir = data_directory # Estimator stuff available_gpus = get_available_gpus() distribution = tf.contrib.distribute.MirroredStrategy( devices=available_gpus[:n_gpus]) self.config = tf.estimator.RunConfig( save_checkpoints_steps=500, train_distribute=distribution, save_summary_steps=0 # running summary manually ) # Additional args to fine tune training at high level self.data_format = data_format self.n_gpus = n_gpus self.n_folds = n_fold self.seed = seed self.lr = lr self.save_best = save_best _prepare_directory(self.model_dir, self.n_folds) self.skf = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=self.seed)
def __init__(self, config, cc_config, model_config=None): self.config = config self.cc_config = cc_config self.model_dir = config.model_dir self.cc_config.model_dir = config.model_dir self.model_config = model_config if self.model_config: self.model_config.model_dir = config.model_dir self.save_model_dir = os.path.join(self.model_dir, 'checkpoints') if not os.path.exists(self.save_model_dir): os.mkdir(self.save_model_dir) self.summary_dir = os.path.join(self.model_dir, 'summaries') if not os.path.exists(self.summary_dir): os.mkdir(self.summary_dir) self.load_path = config.load_path self.use_gpu = config.use_gpu #This tensor controls batch_size for all models #Not expected to change during training, but during testing it can be #helpful to change it self.batch_size = tf.placeholder_with_default(self.config.batch_size, [], name='batch_size') loader_batch_size = config.num_devices * config.batch_size #Always need to build CC print('setting up CausalController') cc_batch_size = config.num_devices * self.batch_size #Tensor/placeholder self.cc = CausalController(cc_batch_size, cc_config) self.step = self.cc.step #Data print('setting up data') self.data = DataLoader(self.cc.label_names, config) if self.cc_config.is_pretrain or self.config.build_pretrain: print('setup pretrain') #queue system to feed labels quickly. This does not queue images label_queue = self.data.get_label_queue(loader_batch_size) self.cc.build_pretrain(label_queue) #Build Model if self.model_config: #Will build both gen and discrim self.model = self.config.Model(self.batch_size, self.model_config) #Trainer step is defined as cc.step+model.step #e.g. 10k iter pretrain and 100k iter image model #will have image summaries at 100k but trainer model saved at Model-110k self.step += self.model.step # This queue holds (image,label) pairs, and is used for training conditional GANs data_queue = self.data.get_data_queue(loader_batch_size) self.real_data_by_gpu = distribute_input_data( data_queue, config.num_gpu) self.fake_data_by_gpu = distribute_input_data( self.cc.label_dict, config.num_gpu) with tf.variable_scope('tower'): for gpu in get_available_gpus(): print('using device:', gpu) real_data = self.real_data_by_gpu[gpu] fake_data = self.fake_data_by_gpu[gpu] tower = gpu.replace('/', '').replace(':', '_') with tf.device(gpu), tf.name_scope(tower): #Build num_gpu copies of graph: inputs->gradient #Updates self.tower_dict self.model(real_data, fake_data) #allow future gpu to use same variables tf.get_variable_scope().reuse_variables() if self.model_config.is_train or self.config.build_train: self.model.build_train_op() self.model.build_summary_op() else: print('Image model not built') self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=2) self.summary_writer = tf.summary.FileWriter(self.summary_dir) print('trainer.model_dir:', self.model_dir) gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.333) sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sv = tf.train.Supervisor(logdir=self.save_model_dir, is_chief=True, saver=self.saver, summary_op=None, summary_writer=self.summary_writer, save_model_secs=300, global_step=self.step, ready_for_local_init_op=None) self.sess = sv.prepare_or_wait_for_session(config=sess_config) if cc_config.pt_load_path: print('Attempting to load pretrain model:', cc_config.pt_load_path) self.cc.load(self.sess, cc_config.pt_load_path) print('Check tvd after restore') info = crosstab(self, report_tvd=True) print('tvd after load:', info['tvd']) #save copy of cc model in new dir cc_step = self.sess.run(self.cc.step) self.cc.saver.save(self.sess, self.cc.save_model_name, cc_step) if config.load_path: #Declare loading point pnt_str = 'Loaded variables at ccStep:{}' cc_step = self.sess.run(self.cc.step) pnt_str = pnt_str.format(cc_step) print('pntstr', pnt_str) if self.model_config: pnt_str += ' imagemodelStep:{}' model_step = self.sess.run pnt_str = pnt_str.format(model_step) print(pnt_str) #PREPARE training: #TODO save as Variables so they are restored to same values when load model fixed_batch_size = 256 #get this many fixed z values self.fetch_fixed_z = {n.z: n.z for n in self.cc.nodes} if model_config: self.fetch_fixed_z[self.model.z_gen] = self.model.z_gen #feed_dict that ensures constant inputs #add feed_fixed_z[self.cc.Male.label]=1*ones() to intervene self.feed_fixed_z = self.sess.run(self.fetch_fixed_z, {self.batch_size: fixed_batch_size})
parser.add_argument('--print_freq', type=int, default=50) parser.add_argument('--eval_freq', type=int, default=5) parser.add_argument('--lr', type=float, default=1e-1) parser.add_argument('--decay', type=float, default=5e-4) parser.add_argument('--savedir', type=str, default=None) parser.add_argument('--mode', type=str, default='train') parser.add_argument('--n_gpus', type=int, default=None) args = parser.parse_args() os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' savedir = ('./results/%d_%d' % (args.depth, args.K)) \ if args.savedir is None else args.savedir if not os.path.isdir(savedir): os.makedirs(savedir) available_gpus = get_available_gpus() n_gpus = len(available_gpus) if args.n_gpus is None else args.n_gpus available_gpus = available_gpus[:n_gpus] print ('GPUs to be used: ' + str(available_gpus)) batch_size = args.batch_size n_train_batches = NUM_TRAIN / batch_size n_test_batches = NUM_TEST / args.batch_size with tf.device('/cpu:0'): x, y = cifar10_input(batch_size, True) xs = tf.split(x, n_gpus, axis=0) ys = tf.split(y, n_gpus, axis=0) x, y = cifar10_input(batch_size, False) txs = tf.split(x, n_gpus, axis=0) tys = tf.split(y, n_gpus, axis=0)
import subprocess import tensorflow as tf from utils import get_available_gpus GPUS = 4 GPUS = min(GPUS, len(get_available_gpus())) python_cmd = "python" def run_server(idx): subprocess.call([python_cmd, "helper_server.py", str(GPUS), str(idx)]) processes = [] for i in range(1, GPUS): p = Process(target=run_server, args=(i, )) processes.append(p) for p in processes: p.start() for p in processes: p.join()
model_checkpoint = ModelCheckpoint(model_names, monitor='val_loss', verbose=1, save_best_only=True) early_stop = EarlyStopping('val_loss', patience=patience) reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience / 4), verbose=1) class MyCbk(keras.callbacks.Callback): def __init__(self, model): keras.callbacks.Callback.__init__(self) self.model_to_save = model def on_epoch_end(self, epoch, logs=None): fmt = checkpoint_models_path + 'model.%02d-%.4f.hdf5' self.model_to_save.save(fmt % (epoch, logs['val_loss'])) num_gpu = len(get_available_gpus()) if num_gpu >= 2: with tf.device("/cpu:0"): # Load our model, added support for Multi-GPUs model = build_model() if pretrained_path is not None: model.load_weights(pretrained_path) new_model = multi_gpu_model(model, gpus=num_gpu) # rewrite the callback: saving through the original model and not the multi-gpu model. model_checkpoint = MyCbk(model) else: new_model = build_model() if pretrained_path is not None: new_model.load_weights(pretrained_path)
from keras import backend as K import tensorflow as tf import h5py import cv2 import numpy as np import matplotlib; matplotlib.use('Agg') import matplotlib.pyplot as plt # In[5]: from keras.backend.tensorflow_backend import set_session from utils import limited_gpu_memory_session, get_available_gpus set_session(limited_gpu_memory_session(0.95)) print(get_available_gpus()) # In[6]: DATA_DIR = '/home/Drive2/rishabh/' INIT_WEIGHTS = os.path.join(DATA_DIR, 'init_weights_omniglot.hdf5') CHECKPOINTED_WEIGHTS = os.path.join(DATA_DIR, 'checkpointed_weights_omniglot.hdf5') # In[7]: ### Load the test and train datasets
import tensorflow as tf from collections import namedtuple from utils import get_available_gpus ALL_DEVICES = get_available_gpus() NUM_DEVICES = 3 if len(ALL_DEVICES) > NUM_DEVICES: ALL_DEVICES = ALL_DEVICES[-NUM_DEVICES:] CHECKPOINT_DIR = './checkpoint/{}_{}' DICTIONARY_DIR = './data/{}/dictionary.pkl' TRAIN_PARAMS = { 'batch_size': 128 * max(1, len(ALL_DEVICES)), 'epochs': 1000, 'emb_size': 50, 'learning_rate': 0.01, 'ng_sample': 25, 'buffer_size': 128, 'min_count': 5, 'max_count': 10000, 'decay_steps': -1, 'decay_rate': 0.01, 'lower_gradient': -10, 'upper_gradient': 10, } RUN_CONFIG = { 'devices': ALL_DEVICES, 'summary_steps': 50,
def create_model(self, obj_UserModel, stage_scope, batch_x=None): if batch_x is None: self.init_dataloader() batch_x = self.dataloader.batch_x if self.stage != digits.STAGE_INF: batch_y = self.dataloader.batch_y else: assert self.stage == digits.STAGE_INF batch_x = batch_x available_devices = digits.get_available_gpus() if not available_devices: available_devices.append('/cpu:0') # available_devices = ['/gpu:0', '/gpu:1'] # DEVELOPMENT : virtual multi-gpu # Split the batch over the batch dimension over the number of available gpu's if len(available_devices) == 1: batch_x_split = [batch_x] if self.stage != digits.STAGE_INF: # Has no labels batch_y_split = [batch_y] else: with tf.name_scope('parallelize'): # Split them up batch_x_split = tf.split(batch_x, len(available_devices), 0, name='split_batch') if self.stage != digits.STAGE_INF: # Has no labels batch_y_split = tf.split(batch_y, len(available_devices), 0, name='split_batch') # Get global regularizaion loss collection reference as a list named r_loss_global. # Now we can edit regularizaion loss collection by operation r_loss_global list r_loss_global = tf.get_collection_ref( ops.GraphKeys.REGULARIZATION_LOSSES) # Note: # (In training stage) # r_loss_train_bak = [] (a bak to store all tower's regularizaion loss) # r_loss_global = (global regularizaion loss)'s reference # For each Tower: # empty r_loss_global # Tower.inference (may add regularizaion loss globally) # r_loss_tain_bak += r_loss_global # ... # # (restore all tower's reg. loss so validation stage could use it) # r_loss_global[:] = r_loss_train_bak[:] # # (In validation stage) # r_loss_global = (global regularizaion loss)'s reference # r_loss_val_bak = list(r_loss_global) <= deep copy # For each Tower: # empty r_loss_global # parse element name start with 'tower_%d' % dev_i in r_loss_val_bak # ... and save to r_loss_global # # Tower.inference (will not add any regularizaion loss cause reuse=True) # ( Some operations only catch regularizaion losses belong to current tower) # ... # if self.replica: # Save regularizaion loss of all tower in training stage if self.stage != digits.STAGE_TRAIN: r_loss_val_bak = list(r_loss_global) # Create a list to store regularizaion loss if self.stage == digits.STAGE_TRAIN: r_loss_train_bak = list() # Run the user model through the build_model function that should be filled in grad_towers = [] for dev_i, dev_name in enumerate(available_devices): with tf.device(dev_name): if self.replica: r_loss_global[:] = [] if self.stage != digits.STAGE_TRAIN: r_loss_global = [ loss for loss in r_loss_val_bak if loss.name.startswith('train/tower_%d' % dev_i) ] with tf.name_scope('tower_%d' % dev_i) as scope_tower: if self.stage != digits.STAGE_INF: tower_model = self.add_tower(obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=batch_y_split[dev_i]) else: tower_model = self.add_tower(obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=None) with tf.variable_scope( 'tower_0' if not self.replica else 'tower_%d' % dev_i, reuse=(False if self.replica else dev_i > 0) or self._reuse): tower_model.inference # touch to initialize # Reuse the variables in this scope for the next tower/device tf.get_variable_scope().reuse_variables() if self.stage == digits.STAGE_INF: # For inferencing we will only use the inference part of the graph continue with tf.name_scope(digits.GraphKeys.LOSS): for loss in self.get_tower_losses( tower_model, dev_i): tf.add_to_collection(digits.GraphKeys.LOSSES, loss['loss']) # Assemble all made within this scope so far. The user can add custom # losses to the digits.GraphKeys.LOSSES collection losses = tf.get_collection(digits.GraphKeys.LOSSES, scope=scope_tower) if (self.replica ) and self.stage == digits.STAGE_TRAIN: r_loss_train_bak += r_loss_global losses += ops.get_collection( ops.GraphKeys.REGULARIZATION_LOSSES, scope=None) tower_loss = tf.add_n(losses, name='loss') self.summaries.append( tf.summary.scalar(tower_loss.op.name, tower_loss)) if self.stage == digits.STAGE_TRAIN: grad_tower_losses = [] for loss in self.get_tower_losses( tower_model, dev_i): # use loss + regularization loss instead of loss only grad_tower_loss = self.optimizer.compute_gradients( tower_loss, loss['vars']) grad_tower_loss = tower_model.gradientUpdate( grad_tower_loss) grad_tower_losses.append(grad_tower_loss) grad_towers.append(grad_tower_losses) # Assemble and average the gradients from all towers if self.stage == digits.STAGE_TRAIN: if self.replica: r_loss_global[:] = r_loss_train_bak[:] grad_accum = [] grad_averages = [] n_gpus = len(available_devices) if n_gpus == 1: n_losses = len(grad_towers[0]) for loss in xrange(n_losses): if (self.replica): grad_averages.append([grad_towers[0][loss]]) else: grad_averages.append(grad_towers[0][loss]) for g, _ in grad_towers[0][loss]: grad_accum.append(g) else: n_losses = len(grad_towers[0]) for loss in xrange(n_losses): if not self.nccl: if (self.replica): grad_averages.append( average_grads([ grad_towers[gpu][loss] for gpu in xrange(n_gpus) ])) else: grad_averages.append( average_gradients([ grad_towers[gpu][loss] for gpu in xrange(n_gpus) ], 0)) else: if (self.replica): grad_averages.append( allreduce_gradients_bak([ grad_towers[gpu][loss] for gpu in xrange(n_gpus) ])) else: grad_averages.append( allreduce_gradients([ grad_towers[gpu][loss] for gpu in xrange(n_gpus) ], 0)) for gpu in xrange(n_gpus): for g, _ in grad_towers[gpu][loss]: grad_accum.append(g) apply_gradient_ops = [] for grad_avg in grad_averages: if (self.replica): tmp = [] for grad_and_vars in grad_avg: for (g, v) in grad_and_vars: tmp.append((g, v)) else: tmp = grad_avg apply_gradient_ops.append( self.optimizer.apply_gradients( tmp, global_step=self.global_step)) self._train = apply_gradient_ops self._accum = tf.group(*grad_accum) if (self.replica): self._init = self.get_post_init_ops() else: self._init = []
def create_model(self, obj_UserModel, stage_scope, batch_x=None): if batch_x is None: self.init_dataloader() batch_x = self.dataloader.batch_x if self.stage != digits.STAGE_INF: batch_y = self.dataloader.batch_y else: assert self.stage == digits.STAGE_INF batch_x = batch_x available_devices = digits.get_available_gpus() if not available_devices: available_devices.append('/cpu:0') # available_devices = ['/gpu:0', '/gpu:1'] # DEVELOPMENT : virtual multi-gpu # Split the batch over the batch dimension over the number of available gpu's if len(available_devices) == 1: batch_x_split = [batch_x] if self.stage != digits.STAGE_INF: # Has no labels batch_y_split = [batch_y] else: with tf.name_scope('parallelize'): # Split them up batch_x_split = tf.split(0, len(available_devices), batch_x, name='split_batch') if self.stage != digits.STAGE_INF: # Has no labels batch_y_split = tf.split(0, len(available_devices), batch_y, name='split_batch') # Run the user model through the build_model function that should be filled in grad_towers = [] for dev_i, dev_name in enumerate(available_devices): with tf.device(dev_name): current_scope = stage_scope if len(available_devices) == 1 else ('tower_%d' % dev_i) with tf.name_scope(current_scope) as scope_tower: if self.stage != digits.STAGE_INF: tower_model = self.add_tower(obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=batch_y_split[dev_i]) else: tower_model = self.add_tower(obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=None) with tf.variable_scope(digits.GraphKeys.MODEL, reuse=dev_i > 0): tower_model.inference # touch to initialize if self.stage == digits.STAGE_INF: # For inferencing we will only use the inference part of the graph continue with tf.name_scope(digits.GraphKeys.LOSS): for loss in self.get_tower_losses(tower_model): tf.add_to_collection(digits.GraphKeys.LOSSES, loss['loss']) # Assemble all made within this scope so far. The user can add custom # losses to the digits.GraphKeys.LOSSES collection losses = tf.get_collection(digits.GraphKeys.LOSSES, scope=scope_tower) losses += ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES, scope=None) tower_loss = tf.add_n(losses, name='loss') self.summaries.append(tf.scalar_summary(tower_loss.op.name, tower_loss)) # Reuse the variables in this scope for the next tower/device tf.get_variable_scope().reuse_variables() if self.stage == digits.STAGE_TRAIN: grad_tower_losses = [] for loss in self.get_tower_losses(tower_model): grad_tower_loss = self.optimizer.compute_gradients(loss['loss'], loss['vars']) grad_tower_loss = tower_model.gradientUpdate(grad_tower_loss) grad_tower_losses.append(grad_tower_loss) grad_towers.append(grad_tower_losses) # Assemble and average the gradients from all towers if self.stage == digits.STAGE_TRAIN: n_gpus = len(available_devices) if n_gpus == 1: grad_averages = grad_towers[0] else: with tf.device(available_devices[0]): n_losses = len(grad_towers[0]) grad_averages = [] for loss in xrange(n_losses): grad_averages.append(average_gradients([grad_towers[gpu][loss] for gpu in xrange(n_gpus)])) apply_gradient_ops = [] for grad_avg in grad_averages: apply_gradient_ops.append(self.optimizer.apply_gradients(grad_avg, global_step=self.global_step)) self._train = apply_gradient_ops
def create_model(self, obj_UserModel, stage_scope, batch_x=None): logging.debug('Stage: {}'.format(stage_scope)) # get batch data if batch_x is None: self.init_dataloader() batch_x = self.dataloader.batch_x if self.stage != utils.STAGE_INF: batch_y = self.dataloader.batch_y else: assert self.stage == utils.STAGE_INF batch_x = batch_x logging.debug('batch_x shape={}'.format(batch_x.get_shape().as_list())) if self.stage != utils.STAGE_INF: logging.debug('batch_y shape={}'.format( batch_y.get_shape().as_list())) # get avilable gpu list available_devices = utils.get_available_gpus() logging.debug('GPUs {}'.format(available_devices)) if not available_devices: available_devices.append('/cpu:0') # available_devices = ['/gpu:0', '/gpu:1'] # DEVELOPMENT: virtual multi-gpu # Split the batch over the batch dimension over the number of available gpu's if len(available_devices) == 1: batch_x_split = [batch_x] if self.stage != utils.STAGE_INF: # Has no labels batch_y_split = [batch_y] else: with tf.name_scope('parallelize'): # Split them up batch_x_split = tf.split(batch_x, len(available_devices), 0, name='split_batch_x') if self.stage != utils.STAGE_INF: # Has no labels batch_y_split = tf.split(batch_y, len(available_devices), 0, name='split_batch_y') # Run the user model through the build_model function that should be filled in # collect all type of lossess and all gpus grad_towers = [] #with tf.variable_scope(tf.get_variable_scope()): for dev_i, dev_name in enumerate(available_devices): with tf.device(dev_name): current_scope = stage_scope if len( available_devices) == 1 else ('tower_{}'.format(dev_i)) with tf.name_scope(current_scope) as scope_tower: with tf.variable_scope(utils.GraphKeys.MODEL, reuse=dev_i > 0 or self._reuse): if self.stage != utils.STAGE_INF: tower_model = self.add_tower( obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=batch_y_split[dev_i]) else: tower_model = self.add_tower( obj_tower=obj_UserModel, x=batch_x_split[dev_i], y=None) tower_model.inference # touch to initialize # Reuse the variables int this scope for the next tower/device tf.get_variable_scope().reuse_variables() if self.stage == utils.STAGE_INF: # For inferencing we will only use the inference part of the graph continue with tf.name_scope(utils.GraphKeys.LOSS): for loss in self.get_tower_losses(tower_model): tf.add_to_collection(utils.GraphKeys.LOSSES, loss['loss']) # Assemble all made within this scope so far. The user can add custom # losses to the utils.GraphKeys.LOSSES collection losses = tf.get_collection(utils.GraphKeys.LOSSES, scope=scope_tower) #logging.debug('get_collection: graykeys.LOSSES : {}'.format(losses)) #logging.debug('get_collection: graykeys.REGULARIZATION_LOSSES : {}'.format(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES, scope=None))) losses += ops.get_collection( ops.GraphKeys.REGULARIZATION_LOSSES, scope=None) tower_loss = tf.add_n(losses, name='loss') self.summaries.append( scalar_summary(tower_loss.op.name, tower_loss)) if self.stage == utils.STAGE_TRAIN: # collect all type of losses on the gpu grad_tower_losses = [] # for each type loss for loss in self.get_tower_losses(tower_model): # compute gradients of this gpu grad_tower_loss = self.optimizer.compute_gradients( loss['loss'], loss['vars']) grad_tower_loss = tower_model.gradientUpdate( grad_tower_loss) grad_tower_losses.append(grad_tower_loss) grad_towers.append(grad_tower_losses) # Assemble and average the gradients from all towers if self.stage == utils.STAGE_TRAIN: n_gpus = len(available_devices) if n_gpus == 1: grad_averages = grad_towers[0] else: with tf.device(available_devices[0]): n_losses = len(grad_towers[0]) grad_averages = [] # for each loss, averages loss on all gpus for loss in xrange(n_losses): grad_averages.append( average_gradients([ grad_towers[gpu][loss] for gpu in xrange(n_gpus) ])) apply_gradient_ops = [] for grad_avg in grad_averages: # apply average gradients apply_gradient_ops.append( self.optimizer.apply_gradients( grad_avg, global_step=self.global_step)) # train op, list self._train = apply_gradient_ops
def train(log_dir, args, hp): # create dir os.makedirs(log_dir, exist_ok=True) checkpoint_dir = os.path.join(log_dir, 'checkpoints') event_dir = os.path.join(log_dir, 'events') os.makedirs(event_dir, exist_ok=True) os.makedirs(checkpoint_dir, exist_ok=True) checkpoint_path = os.path.join(checkpoint_dir, 'model_ckpt') audio_dir = os.path.join(log_dir, 'train_stats', 'wavs') plot_dir = os.path.join(log_dir, 'train_stats', 'plots') eval_audio_dir = os.path.join(log_dir, 'eval_stats', 'wavs') eval_plot_dir = os.path.join(log_dir, 'eval_stats', 'plots') os.makedirs(audio_dir, exist_ok=True) os.makedirs(plot_dir, exist_ok=True) os.makedirs(eval_audio_dir, exist_ok=True) os.makedirs(eval_plot_dir, exist_ok=True) # sess config config = tf.ConfigProto( gpu_options=tf.GPUOptions(force_gpu_compatible=True, allow_growth=True), allow_soft_placement=True, log_device_placement=False, ) # how many gpus will be used num_gpus = len(utils.get_available_gpus(config)) controller = "/gpu:0" if num_gpus == 1 else "/cpu:0" # create dataset and iterator train_dataset = get_dataset(args.train_file, True, hp, batch_size=hp.batch_size * num_gpus) val_dataset = get_dataset(args.val_file, False, hp, batch_size=hp.batch_size * num_gpus) iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) # feeder: inputs, targets, input_lengths, local_condition, global_condition next_inputs = iterator.get_next() # To Do: multi gpu training feeder = get_inputs(next_inputs, 1) train_init = iterator.make_initializer(train_dataset) val_init = iterator.make_initializer(val_dataset) # global step global_step = tf.Variable(name='global_step', initial_value=-1, trainable=False, dtype=tf.int64) global_val_step = tf.Variable(name='global_val_step', initial_value=-1, trainable=False, dtype=tf.int64) global_val_step_op = tf.assign_add(global_val_step, 1, name='global_val_step_add') # apply ema to variable ema = tf.train.ExponentialMovingAverage(decay=hp.ema_decay) # create model # use multi gpu to train train_model = create_train_model(feeder, ema, hp, global_step) eval_model = create_eval_model(feeder, hp) # save info saver = tf.train.Saver(max_to_keep=5) train_stats = add_stats(train_model) train_loss_window = ValueWindow(100) val_loss_window = ValueWindow(100) with tf.Session(config=config) as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(event_dir, sess.graph) # restore from checkpoint if args.restore_step is not None: restore_path = '{}-{}'.format(checkpoint_path, args.restore_step) # we don't load the ema to continue training, that is just for evaluating saver.restore(sess, restore_path) print('Resuming from checkpoint: {}...'.format(args.restore_step)) else: print('Start new training....') for epoch in range(args.epochs): sess.run(train_init) while True: try: start_time = time.time() step, loss, _, = sess.run( [global_step, train_model.loss, train_model.optimize]) train_loss_window.append(loss) if step % 10 == 0: message = 'Epoch {:4d} Train Step {:7d} [{:.3f} sec/step step_loss={:.5f} avg_loss={:.5f}]'.format( epoch, step, time.time() - start_time, loss, train_loss_window.average) print(message) if step % args.checkpoint_interval == 0: saver.save(sess, checkpoint_path, step) save_log(sess, step, train_model, plot_dir, audio_dir, hp) if step % args.summary_interval == 0: print('Writing summary at step {}'.format(step)) summary_writer.add_summary(sess.run(train_stats), step) sys.stdout.flush() except tf.errors.OutOfRangeError: break sess.run(val_init) while True: try: start_time = time.time() loss = sess.run(train_model.loss) val_loss_window.append(loss) step = sess.run(global_val_step_op) message = 'Epoch {:4d} Val Step {:7d} [{:.3f} sec/step step_loss={:.5f} avg_loss={:.5f}]'.format( epoch, step, time.time() - start_time, loss, val_loss_window.average) print(message) if step % args.eval_interval == 0: eval_step(eval_model, sess, step, eval_plot_dir, eval_audio_dir) if step % args.summary_val_interval == 0: add_test_stats(summary_writer, step, loss) sys.stdout.flush() except tf.errors.OutOfRangeError: break
def get_model_fn(features, labels, mode, params): """Returns a function that will build the TargetedLearning framework.""" """Model body. Args: features: a list of tensors labels: a list of tensors mode: ModeKeys.TRAIN or EVAL params: Hyperparameters suitable for tuning Returns: A EstimatorSpec object. """ is_training = (mode == tf.estimator.ModeKeys.TRAIN) weight_decay = params.weight_decay out_lr = 0.1 #params.learning_rate train_features = features[0] train_labels = labels[0] if is_training: val_features = features[1] val_labels = labels[1] else: val_features = features[0] val_labels = labels[0] # channels first (NCHW) is normally optimal on GPU and channels last (NHWC) # on CPU. The exception is Intel MKL on CPU which is optimal with # channels_last. num_gpus = len(utils.get_available_gpus()) data_format = params.data_format if not data_format: if num_gpus == 0: data_format = 'channels_last' else: data_format = 'channels_first' train_op = [] # Building the base model with tf.compat.v1.variable_scope('base_model') as var_scope: if params.dataset == 'mnist': base_model = model.BilevelLenet(num_class=params.num_class) else: base_model = model.BilevelResNet(resnet_size=params.num_layers, num_classes=params.num_class, resnet_version=params.version) base_model_logits = base_model(train_features, is_training) update_ops = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.UPDATE_OPS, var_scope.name) extra_update_ops = base_model.get_updates_for(train_features) update_ops.extend(extra_update_ops) # Get the params of the model base_model_params = tf.compat.v1.trainable_variables( scope=var_scope.name) # Set initial weights class_init = np.array([[1.0 / params.num_class] for _ in range(params.num_class) ]).astype(np.float32) class_weights = tf.compat.v1.get_variable('class_weight', initializer=class_init) weight = tf.matmul( tf.cast( tf.one_hot(train_labels, len(class_init), on_value=1, off_value=0), tf.float32), class_weights) # Get the loss of the main model base_model_loss, base_model_preds = _loss_fn( base_model_logits, tf.one_hot(train_labels, params.num_class, on_value=1, off_value=0)) base_model_loss_reduced = tf.reduce_mean( tf.squeeze(weight) * base_model_loss) + weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in base_model_params]) # Define the outer model's logits, which is the bilevel model with tf.compat.v1.variable_scope( 'bilevel_model', reuse=tf.compat.v1.AUTO_REUSE) as var_scope1: base_model.perturb_model_weights(base_model_loss_reduced, params.learning_rate, var_scope.name) target_logits = base_model(val_features, False) target_params = tf.compat.v1.trainable_variables(scope=var_scope1.name) target_loss, target_preds = _loss_fn( target_logits, tf.one_hot(val_labels, params.num_class, on_value=1, off_value=0)) target_loss = tf.reduce_mean(target_loss) + weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in target_params]) # Calculate the gradients with respect to the class weights and normalize it class_weight_gradient = tf.gradients(target_loss, class_weights) update_class_weights = tf.clip_by_value(class_weights - out_lr * class_weight_gradient[0], clip_value_min=0.0, clip_value_max=100.0) sum_class_weights = tf.reduce_sum(update_class_weights) + 2e-12 update_class_weights /= sum_class_weights # Update the weight every n steps. weight_update_hook = utils.WeightUpdateHook1( class_weights, update_class_weights, every_n_steps=10, log_every_n_step=params.log_freq) # Calculate the base model grads base_model_grads = tf.gradients(base_model_loss_reduced, base_model_params) base_model_gradvars = zip(base_model_grads, base_model_params) boundaries = [ params.num_batches_per_epoch * x for x in np.array([91, 136, 182], dtype=np.int64) ] staged_lr = [params.learning_rate * x for x in [1, 0.1, 0.01, 0.001]] learning_rate = tf.compat.v1.train.piecewise_constant( tf.compat.v1.train.get_global_step(), boundaries, staged_lr) # Define optimizer optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=learning_rate, momentum=params.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate) train_op.append( optimizer.apply_gradients( base_model_gradvars, global_step=tf.compat.v1.train.get_global_step())) # Calculate metrics target_accuracy = tf.compat.v1.metrics.accuracy(val_labels, target_preds['classes']) accuracy = tf.compat.v1.metrics.accuracy(train_labels, base_model_preds['classes']) # The following metrics are for the binary classification scenario. # They should be adopted for multiclass classification tasks. if params.num_class == 2: train_labels_mask = tf.cast(train_labels, tf.bool) inverse_train_labels_mask = tf.cast( tf.math.logical_not(train_labels_mask), tf.float32) inverse_prediction_mask = tf.cast( tf.math.logical_not(tf.cast(base_model_preds['classes'], tf.bool)), tf.float32) recall_minor = tf.compat.v1.metrics.recall(inverse_train_labels_mask, inverse_prediction_mask) recall_major = tf.compat.v1.metrics.recall(train_labels, base_model_preds['classes']) precision_minor = tf.compat.v1.metrics.precision( inverse_train_labels_mask, inverse_prediction_mask) metrics = { 'obj/accuracy': accuracy, 'metrics/recall_minor': recall_minor, 'metrics/recall_major': recall_major, 'metrics/precision_minor': precision_minor } else: metrics = {'obj/accuracy': accuracy} examples_sec_hook = utils.ExamplesPerSecondHook( params.train_batch_size, every_n_steps=params.log_freq) tensors_to_log = { 'Target loss': target_loss, 'Main loss': base_model_loss_reduced, 'Target accuracy': target_accuracy[1], 'Main accuracy': accuracy[1], 'learning_rates': learning_rate, 'step': tf.compat.v1.train.get_global_step() } logging_hook = tf.estimator.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=params.log_freq) train_hooks = [weight_update_hook, logging_hook, examples_sec_hook] train_op.extend(update_ops) train_op = tf.group(*train_op) return tf.estimator.EstimatorSpec(mode=mode, predictions=target_preds, loss=base_model_loss_reduced, train_op=train_op, training_hooks=train_hooks, eval_metric_ops=metrics)
'eurosat': ['resnet56'] } attack_names = ['fgsm', 'pgd', 'deepfool', 'bim', 'apgd','cw'] ### add combinations into queues manager = multiprocessing.Manager() q = manager.Queue() for attack_name in attack_names: for dataset in datasets: for model_name in model_dict[dataset]: q.put((attack_name, dataset, model_name)) p_list = [] for i in range(len(get_available_gpus())): gpu_id = i p = multiprocessing.Process(target=train, args=(str(gpu_id), )) p_list.append(p) p.start() for i in p_list: i.join() print("All processed finished.")
patience=int(patience / 4), verbose=1) class MyCbk(keras.callbacks.Callback): def __init__(self, model): keras.callbacks.Callback.__init__(self) self.model_to_save = model def on_epoch_end(self, epoch, logs=None): fmt = checkpoint_models_path + 'model.%02d-%.4f.hdf5' highest_acc = get_highest_acc() if float(logs['val_acc']) > highest_acc: self.model_to_save.save(fmt % (epoch, logs['val_acc'])) # Load our model, added support for Multi-GPUs num_gpu = len(get_available_gpus()) if num_gpu >= 2: with tf.device("/cpu:0"): model = build_model() if pretrained_path is not None: model.load_weights(pretrained_path) new_model = multi_gpu_model(model, gpus=num_gpu) # rewrite the callback: saving through the original model and not the multi-gpu model. model_checkpoint = MyCbk(model) else: new_model = build_model() if pretrained_path is not None: new_model.load_weights(pretrained_path) adam = keras.optimizers.Adam(lr=1e-4,
def train(data_dict, emebedding_path, language): # pre-process train data # data_dict = to_normal(data_dict) MAX_NB_WORDS = 100000 max_seq_len = 1000 # load train data raw_docs_train = [data['content'] for data in data_dict] labels = [data['steam_weight'] for data in data_dict] word_seq, word_index = kt_tokenizer(raw_docs_train, language, MAX_NB_WORDS, max_seq_len) print('{0} reviews in {1}'.format(len(word_seq), language)) print('loading word embeddings...') emebedding_path = EMBEDDING_DIR + embedding_model_path(language) # TODO embeddings_index = {} f = codecs.open(emebedding_path, encoding='utf-8') for line in f: values = line.rstrip().rsplit(' ') word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('found %s word vectors' % len(embeddings_index)) y_all = np.array(labels) # training params batch_size = 256 num_epochs = 100 num_gpus = get_available_gpus() # model parameters num_filters = 32 embed_dim = 300 weight_decay = 1e-4 learning_rate = 0.001 # output parameters num_classes = 4 # split data split_persentage = 0.8 split_index = int(len(word_seq) * split_persentage) word_seq_train = word_seq[:split_index] word_seq_test = word_seq[split_index:] y_train = y_all[:split_index] y_test = y_all[split_index:] # embedding matrix print('preparing embedding matrix...') words_not_found = [] nb_words = min(MAX_NB_WORDS, len(word_index)) embedding_matrix = np.zeros((nb_words, embed_dim)) for word, i in word_index.items(): if i >= nb_words: continue embedding_vector = embeddings_index.get(word) if (embedding_vector is not None) and len(embedding_vector) > 0: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector else: words_not_found.append(word) print('number of null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0)) print("sample words not found: ", np.random.choice(words_not_found, 20)) # CNN architecture print("Defining CNN ...") model = Sequential() model.add( Embedding(nb_words, embed_dim, weights=[embedding_matrix], input_length=max_seq_len, trainable=False)) model.add(Conv1D(num_filters, 7, activation='relu', padding='same')) model.add(MaxPooling1D(2)) model.add(Conv1D(num_filters, 7, activation='relu', padding='same')) model.add(GlobalMaxPooling1D()) # model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) # model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(weight_decay))) # model.add(Dropout(0.5)) model.add( Dense(16, activation='relu', kernel_regularizer=regularizers.l2(weight_decay))) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) adam = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) try: model = multi_gpu_model(model, gpus=num_gpus) print("Training using {0} GPUs..".format(num_gpus)) except: print("Training using single GPU or CPU..") model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) print(model.summary()) # save model filepath = 'models/' + language + '.' + 'weights.ep{epoch:03d}.loss{loss:.3f}.val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) callbacks_list = [checkpoint] # model training hist = model.fit(word_seq_train, y_train, batch_size=batch_size, epochs=num_epochs, callbacks=callbacks_list, validation_split=0.1, shuffle=True, verbose=2) # plot loss train_history = hist loss = train_history.history['loss'] val_loss = train_history.history['val_loss'] plt.title(language + ' ' + 'model') plt.plot(loss) plt.plot(val_loss) plt.legend(['loss', 'val_loss']) plt.show() # plot y_predict plt.title(language + ' ' + 'predict') plt.plot(y_test) plt.plot(model.predict(word_seq_test)) plt.legend(['y_text', 'y_predict']) plt.show()