def run_eval(): """Evaluate on test or validation.""" with tf.Graph().as_default(): # Input images and labels. features = get_features(False, 5) model = f_model.multi_gpu_model result = model(features) merged = result['summary'] correct_prediction_sum = result['correct'] almost_correct_sum = result['almost'] saver = tf.train.Saver() test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test') seen_step = -1 time.sleep(3 * 60) paused = 0 while paused < 360: ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir + '/train/') if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoin global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] else: time.sleep(2 * 60) paused += 2 continue while seen_step == int(global_step): time.sleep(2 * 60) ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir + '/train/') global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] paused += 2 if paused > 360: test_writer.close() return paused = 0 seen_step = int(global_step) print(seen_step) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) saver.restore(sess, ckpt.model_checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: total_tp = 0 total_almost = 0 for i in range(FLAGS.eval_size // 5): summary_j, tp, almost = sess.run( [merged, correct_prediction_sum, almost_correct_sum]) total_tp += tp total_almost += almost total_false = FLAGS.eval_size - total_tp total_almost_false = FLAGS.eval_size - total_almost summary_tp = tf.Summary.FromString(summary_j) summary_tp.value.add(tag='correct_prediction', simple_value=total_tp) summary_tp.value.add(tag='wrong_prediction', simple_value=total_false) summary_tp.value.add(tag='almost_wrong_prediction', simple_value=total_almost_false) test_writer.add_summary(summary_tp, global_step) print('write done') except tf.errors.OutOfRangeError: print('Done eval for %d steps.' % i) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close() test_writer.close()
def _build_session(self): sess_config = tf.ConfigProto() if self.use_xla: sess_config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_2) return tf.Session(config=sess_config)
os.environ['CUDA_VISIBLE_DEVICES'] = '0' random.seed(1111) np.random.seed(1111) tf.set_random_seed(1111) train_batch_size = 128 test_batch_size = 128 predict_batch_size = 1 predict_users_num = 100 predict_ads_num = 99 info = pkl.load(open('ali_test_info_4days.pkl', 'rb')) tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) model = Model(info[0], info[1], info[2], info[3], predict_batch_size, predict_ads_num) sess.run(tf.global_variables_initializer()) model.restore_(sess, './save_path_alibaba_new/ckpt') knn_key = pkl.load(open('knn_table/ali_knn_key.pkl', 'rb')) mypath = './test_data' files = listdir(mypath) csv_list = [] for f in files: fullpath = join(mypath, f) if isfile(fullpath):
import facenet import detect_face import os import time import pickle import sys img_path = 'abc.jpg' modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy = './npy' train_img = "./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort()
def main(_): # If using update_damping_immediately resource variables must be enabled. # Would recommend always enabling them anyway. if FLAGS.update_damping_immediately: tf.enable_resource_variables() if FLAGS.use_control_flow_v2: tf.enable_control_flow_v2() if not FLAGS.auto_register_layers and FLAGS.use_keras_model: raise ValueError('Require auto_register_layers=True when using Keras ' 'model.') tf.set_random_seed(FLAGS.seed) (train_op, opt, batch_loss, batch_error, batch_size_schedule, batch_size) = construct_train_quants() global_step = tf.train.get_or_create_global_step() if FLAGS.optimizer == 'kfac': # We need to put the control depenency on train_op here so that we are # guaranteed to get the up-to-date values of these various quantities. # Otherwise there is a race condition and we might get the old values, # nondeterministically. Another solution would be to get these values in # a separate sess.run call, but this can sometimes cause problems with # training frameworks that use hooks (see the comments below). with tf.control_dependencies([train_op]): learning_rate = opt.learning_rate momentum = opt.momentum damping = opt.damping rho = opt.rho qmodel_change = opt.qmodel_change # Without setting allow_soft_placement=True there will be problems when # the optimizer tries to place certain ops like "mod" on the GPU (which isn't # supported). config = tf.ConfigProto(allow_soft_placement=True) # It's good practice to put everything into a single sess.run call. The # reason is that certain "training frameworks" like to run hooks at each # sess.run call, and there is an implicit expectation there will only # be one sess.run call every "iteration" of the "optimizer". For example, # a framework might try to print the loss at each sess.run call, causing # the mini-batch to be advanced, thus completely breaking the "cached # batch" mechanism that the damping adaptation method may rely on. (Plus # there will also be the extra cost of having to reevaluate the loss # twice.) That being said we don't completely do that here because it's # inconvenient. # Train model. with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30, config=config) as sess: for _ in range(FLAGS.train_steps): i = sess.run(global_step) if FLAGS.use_batch_size_schedule: batch_size_ = batch_size_schedule[min( i, len(batch_size_schedule) - 1)] else: batch_size_ = FLAGS.batch_size if FLAGS.optimizer == 'kfac': (_, batch_loss_, batch_error_, learning_rate_, momentum_, damping_, rho_, qmodel_change_) = sess.run( [ train_op, batch_loss, batch_error, learning_rate, momentum, damping, rho, qmodel_change ], feed_dict={batch_size: batch_size_}) else: _, batch_loss_, batch_error_ = sess.run( [train_op, batch_loss, batch_error], feed_dict={batch_size: batch_size_}) # Print training stats. tf.logging.info('iteration: %d', i) tf.logging.info( 'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ', batch_size_, batch_loss_, batch_error_) if FLAGS.optimizer == 'kfac': tf.logging.info('learning_rate = %f | momentum = %f', learning_rate_, momentum_) tf.logging.info('damping = %f | rho = %f | qmodel_change = %f', damping_, rho_, qmodel_change_) tf.logging.info('----')
def main(_): mesh_shape = [("row", 2), ("col", 2)] layout_rules = [("nx_lr", "row"), ("ny_lr", "col"), ("nx", "row"), ("ny", "col"), ("ty_lr", "row"), ("tz_lr", "col"), ("nx_block","row"), ("ny_block","col")] mesh_hosts = ["localhost:%d"%(8222+j) for j in range(4)] # Create a cluster from the mesh hosts. cluster = tf.train.ClusterSpec({"mesh": mesh_hosts, "master":["localhost:8488"]}) # Create a server for local mesh members server = tf.train.Server(cluster, job_name="master", task_index=0) mesh_devices = ['/job:mesh/task:%d'%i for i in range(cluster.num_tasks("mesh"))] print("List of devices", mesh_devices) mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(mesh_shape, layout_rules, mesh_devices) # Build the model # Create computational graphs and some initializations graph = mtf.Graph() mesh = mtf.Mesh(graph, "nbody_mesh") # Compute a few things first, using simple tensorflow a0=FLAGS.a0 a=FLAGS.af nsteps=FLAGS.nsteps bs, nc = FLAGS.box_size, FLAGS.nc klin = np.loadtxt('../flowpm/data/Planck15_a1p00.txt').T[0] plin = np.loadtxt('../flowpm/data/Planck15_a1p00.txt').T[1] ipklin = iuspline(klin, plin) stages = np.linspace(a0, a, nsteps, endpoint=True) #pt = PerturbationGrowth(cosmology, a=[a], a_normalize=1.0) # Generate a batch of 3D initial conditions initial_conditions = flowpm.linear_field(FLAGS.nc, # size of the cube FLAGS.box_size, # Physical size of the cube ipklin, # Initial power spectrum batch_size=FLAGS.batch_size) state = lpt_init(initial_conditions, a0=a0, order=1) final_state = state#nbody(state, stages, nc) tfinal_field = cic_paint(tf.zeros_like(initial_conditions), final_state[0]) # Compute necessary Fourier kernels kvec = flowpm.kernels.fftk((nc, nc, nc), symmetric=False) from flowpm.kernels import laplace_kernel, gradient_kernel lap = tf.cast(laplace_kernel(kvec), tf.complex64) grad_x = gradient_kernel(kvec, 0) grad_y = gradient_kernel(kvec, 1) grad_z = gradient_kernel(kvec, 2) derivs = [lap, grad_x, grad_y, grad_z] mesh_final_field = lpt_prototype(mesh, initial_conditions, derivs, bs = FLAGS.box_size, nc=FLAGS.nc,batch_size=FLAGS.batch_size) # Lower mesh computation lowering = mtf.Lowering(graph, {mesh:mesh_impl}) # Retrieve output of computation result = lowering.export_to_tf_tensor(mesh_final_field) with tf.Session(server.target, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) as sess: a,b,c = sess.run([initial_conditions, tfinal_field, result]) np.save('init', a) np.save('reference_final', b) np.save('mesh_pyramid', c) plt.figure(figsize=(15,3)) plt.subplot(141) plt.imshow(a[0].sum(axis=2)) plt.title('Initial Conditions') plt.subplot(142) plt.imshow(b[0].sum(axis=2)) plt.title('TensorFlow (single GPU)') plt.colorbar() plt.subplot(143) plt.imshow(c[0].sum(axis=2)) plt.title('Mesh TensorFlow') plt.colorbar() plt.subplot(144) plt.imshow((b[0] - c[0]).sum(axis=2)) plt.title('Residuals') plt.colorbar() plt.savefig("comparison.png") exit(0)
def train(flags): """Model training.""" flags.training = True # Set the verbosity based on flags (default is INFO, so we see all messages) logging.set_verbosity(flags.verbosity) # Start a new TensorFlow session. tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) tf.keras.backend.set_session(sess) audio_processor = input_data.AudioProcessor(flags) time_shift_samples = int((flags.time_shift_ms * flags.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list( map(int, flags.how_many_training_steps.split(','))) learning_rates_list = list(map(float, flags.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) logging.info(flags) model = models.MODELS[flags.model_name](flags) logging.info(model.summary()) # save model summary utils.save_model_summary(model, flags.train_dir) # save model and data flags with open(os.path.join(flags.train_dir, 'flags.txt'), 'wt') as f: pprint.pprint(flags, stream=f) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tf.keras.optimizers.Adam(epsilon=flags.optimizer_epsilon) if flags.optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(epsilon=flags.optimizer_epsilon) elif flags.optimizer == 'momentum': optimizer = tf.keras.optimizers.SGD(momentum=0.9) elif flags.optimizer == 'novograd': optimizer = tfa.optimizers.NovoGrad( lr=0.05, beta_1=flags.novograd_beta_1, beta_2=flags.novograd_beta_2, weight_decay=flags.novograd_weight_decay, grad_averaging=bool(flags.novograd_grad_averaging)) else: raise ValueError('Unsupported optimizer:%s' % flags.optimizer) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) train_writer = tf.summary.FileWriter(flags.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(flags.summaries_dir + '/validation') sess.run(tf.global_variables_initializer()) start_step = 1 logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, flags.train_dir, 'graph.pbtxt') # Save list of words. with tf.io.gfile.GFile(os.path.join(flags.train_dir, 'labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) best_accuracy = 0.0 # prepare parameters for exp learning rate decay training_steps_max = np.sum(training_steps_list) lr_init = learning_rates_list[0] exp_rate = -np.log(learning_rates_list[-1] / lr_init) / training_steps_max # Training loop. for training_step in range(start_step, training_steps_max + 1): # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( flags.batch_size, 0, flags, flags.background_frequency, flags.background_volume, time_shift_samples, 'training', flags.resample, flags.volume_resample, sess) if flags.lr_schedule == 'exp': learning_rate_value = lr_init * np.exp(-exp_rate * training_step) elif flags.lr_schedule == 'linear': # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break else: raise ValueError('Wrong lr_schedule: %s' % flags.lr_schedule) tf.keras.backend.set_value(model.optimizer.lr, learning_rate_value) result = model.train_on_batch(train_fingerprints, train_ground_truth) summary = tf.Summary(value=[ tf.Summary.Value(tag='accuracy', simple_value=result[1]), ]) train_writer.add_summary(summary, training_step) logging.info( 'Step #%d: rate %f, accuracy %.2f%%, cross entropy %f', *(training_step, learning_rate_value, result[1] * 100, result[0])) is_last_step = (training_step == training_steps_max) if (training_step % flags.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') set_size = int(set_size / flags.batch_size) * flags.batch_size total_accuracy = 0.0 count = 0.0 for i in range(0, set_size, flags.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(flags.batch_size, i, flags, 0.0, 0.0, 0, 'validation', 0.0, 0.0, sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. result = model.test_on_batch(validation_fingerprints, validation_ground_truth) summary = tf.Summary(value=[ tf.Summary.Value(tag='accuracy', simple_value=result[1]), ]) validation_writer.add_summary(summary, training_step) total_accuracy += result[1] count = count + 1.0 total_accuracy = total_accuracy / count logging.info('Step %d: Validation accuracy = %.2f%% (N=%d)', *(training_step, total_accuracy * 100, set_size)) model.save_weights(flags.train_dir + 'train/' + str(int(best_accuracy * 10000)) + 'weights_' + str(training_step)) # Save the model checkpoint when validation accuracy improves if total_accuracy >= best_accuracy: best_accuracy = total_accuracy # overwrite the best model weights model.save_weights(flags.train_dir + 'best_weights') logging.info('So far the best validation accuracy is %.2f%%', (best_accuracy * 100)) tf.keras.backend.set_learning_phase(0) set_size = audio_processor.set_size('testing') set_size = int(set_size / flags.batch_size) * flags.batch_size logging.info('set_size=%d', set_size) total_accuracy = 0.0 count = 0.0 for i in range(0, set_size, flags.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( flags.batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0, sess) result = model.test_on_batch(test_fingerprints, test_ground_truth) total_accuracy += result[1] count = count + 1.0 total_accuracy = total_accuracy / count logging.info('Final test accuracy = %.2f%% (N=%d)', *(total_accuracy * 100, set_size)) with open(os.path.join(flags.train_dir, 'accuracy_last.txt'), 'wt') as fd: fd.write(str(total_accuracy * 100)) model.save_weights(flags.train_dir + 'last_weights')
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = '0' config = tf.ConfigProto() config.gpu_options.allow_growth = True #print ("args: ", args) exps = pd.read_csv('exp.csv') for i, row in exps.iterrows(): gc.collect() args['expname'] = row['name'] args['sessionid'] = row['sessionid'] args['itemid'] = row['itemid'] args['data_folder'] = row['path'] args['valid_data'] = row['test'] args['train_data'] = row['train'] args['freq'] = row['freq'] print('Train:', args['train_data'], ' -- Test:', args['valid_data'], ' -- Freq:', args['freq']) with open("LOGGER_" + args['expname'] + ".txt", "a") as myfile: myfile.write(row['train'] + ", " + row['test'] + "\n") # split patterns to train_patterns and test_patterns print('Start Data Preprocessing: Training Set') train, itemsIDs, freqs, old_new = load_sequence( args['data_folder'] + '/' + args['train_data'], args['itemid'], args['sessionid'], itemsIDs=[]) args['n_items'] = len(itemsIDs) + 1 print('Start Data Preprocessing: Testing Set') valid, _, _, _ = load_sequence(args['data_folder'] + '/' + args['valid_data'], args['itemid'], args['sessionid'], Train=False, itemsIDs=itemsIDs, freq=args['freq'], old_new=old_new) #train, valid, test = data_process.load_data() print("%d train examples." % len(train[0])) print("%d valid examples." % len(valid[0])) keep_probability = np.array(args['keep_probability']) no_dropout = np.array(args['no_dropout']) result_path = "./save/" + args['dataset'] # Build model tf.reset_default_graph() with tf.Session(config=config) as sess: model = CSRM( sess=sess, n_items=args['n_items'], dim_proj=int(args['dim_proj']), hidden_units=int(args['hidden_units']), memory_size=args['memory_size'], memory_dim=args['memory_dim'], shift_range=args['shift_range'], lr=args['lr'], controller_layer_numbers=args['controller_layer_numbers'], batch_size=args['batch_size'], epoch=args['epoch'], keep_probability=keep_probability, no_dropout=no_dropout, display_frequency=args['display_frequency'], item_freqs=freqs, expname=args['expname']) hit, MRR, cov, pop, train_time, test_time = model.train( train, valid, valid, result_path) print("#########################################################") print("NEW_LOGGER_ " + args['expname']) print( str(hit[0]) + ',' + str(hit[1]) + ',' + str(hit[2]) + ',' + str(hit[3]) + ',' + str(hit[4]) + ',' + str(MRR[0]) + ',' + str(MRR[1]) + ',' + str(MRR[2]) + ',' + str(MRR[3]) + ',' + str(MRR[4])) print("\nCOV:" + str(cov[0]) + ',' + str(cov[1]) + ',' + str(cov[2]) + ',' + str(cov[3]) + ',' + str(cov[4])) print("\nPOP:" + str(pop[0]) + ',' + str(pop[1]) + ',' + str(pop[2]) + ',' + str(pop[3]) + ',' + str(pop[4])) print("\nTrainTime:" + str(train_time)) print("\nTestTime:" + str(test_time)) with open("NEW_LOGGER_" + args['expname'] + ".txt", "a") as myfile: myfile.write( str(hit[0]) + ',' + str(hit[1]) + ',' + str(hit[2]) + ',' + str(hit[3]) + ',' + str(hit[4]) + ',' + str(MRR[0]) + ',' + str(MRR[1]) + ',' + str(MRR[2]) + ',' + str(MRR[3]) + ',' + str(MRR[4])) myfile.write("\nCOV:" + str(cov[0]) + ',' + str(cov[1]) + ',' + str(cov[2]) + ',' + str(cov[3]) + ',' + str(cov[4])) myfile.write("\nPOP:" + str(pop[0]) + ',' + str(pop[1]) + ',' + str(pop[2]) + ',' + str(pop[3]) + ',' + str(pop[4])) myfile.write("\nTrainTime:" + str(train_time)) myfile.write("\nTestTime:" + str(test_time)) myfile.write("\n############################################\n")
def __init__(self, model_fn, params, tpu_cluster_resolver=None, keep_checkpoint_max=5): self._model_dir = params.model_dir self._params = params self._tpu_job_name = params.tpu_job_name self._evaluator = None self._tpu_cluster_resolver = tpu_cluster_resolver self._keep_checkpoint_max = keep_checkpoint_max input_partition_dims = None num_cores_per_replica = None if params.use_tpu or self._tpu_cluster_resolver: if not self._tpu_cluster_resolver: self._tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( params.platform.tpu, zone=params.platform.tpu_zone, project=params.platform.gcp_project) tpu_grpc_url = self._tpu_cluster_resolver.get_master() tf.Session.reset(tpu_grpc_url) # If the input image is transposed (from NHWC to HWCN), the partition # dimensions also need to be transposed the same way. def _maybe_transpose(input_partition_dims): if input_partition_dims and params.train.transpose_input: return [input_partition_dims[i] for i in [1, 2, 3, 0]] else: return input_partition_dims if params.train.input_partition_dims is not None: num_cores_per_replica = params.train.num_cores_per_replica input_partition_dims = params.train.input_partition_dims # Parse 'None' into None. input_partition_dims = [ None if x == 'None' else _maybe_transpose(x) for x in input_partition_dims ] # Sets up config for TPUEstimator. tpu_config = tf.estimator.tpu.TPUConfig( params.train.iterations_per_loop, num_cores_per_replica=num_cores_per_replica, input_partition_dims=input_partition_dims, tpu_job_name=self._tpu_job_name, per_host_input_for_training=tf.estimator.tpu. InputPipelineConfig.PER_HOST_V2 # pylint: disable=line-too-long ) run_config = tf.estimator.tpu.RunConfig( session_config=tf.ConfigProto( isolate_session_state=params.isolate_session_state), cluster=self._tpu_cluster_resolver, evaluation_master=params.platform.eval_master, model_dir=params.model_dir, log_step_count_steps=params.train.iterations_per_loop, tpu_config=tpu_config, keep_checkpoint_max=self._keep_checkpoint_max, ) self._estimator = tf.estimator.tpu.TPUEstimator( model_fn=model_fn, use_tpu=params.use_tpu, train_batch_size=params.train.train_batch_size, eval_batch_size=params.eval.eval_batch_size, predict_batch_size=params.predict.predict_batch_size, config=run_config, params=params.as_dict()) else: model_params = params.as_dict() # Uses `train_batch_size` as the `batch_size` for GPU train. model_params.update({'batch_size': params.train.train_batch_size}) gpu_devices = tf.config.experimental.list_physical_devices('GPU') tf.logging.info('gpu devices: %s', gpu_devices) devices = [ 'device:GPU:{}'.format(i) for i in range(len(gpu_devices)) ] strategy = tf.distribute.MirroredStrategy(devices=devices) tf.logging.info('Number of devices: %s', strategy.num_replicas_in_sync) run_config = tf.estimator.RunConfig(train_distribute=strategy, model_dir=params.model_dir) self._estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config, params=model_params)
def main(args): # Build nominal dataset classes = cfg.ml_classes + [ n + '_ss' for n in cfg.ml_classes if n not in ['ggh', 'qqh'] ] + ['data_ss'] x, y, w = build_dataset(os.path.join(args.workdir, 'fold{}.root'.format(args.fold)), classes, args.fold, use_class_weights=False, make_categorical=False) x_train, x_val, y_train, y_val, w_train, w_val = train_test_split( x, y, w, test_size=0.25, random_state=1234) logger.info( 'Number of train/val events in nominal dataset: {} / {}'.format( x_train.shape[0], x_val.shape[0])) # Scale to expectation in the full dataset scale_train = 4.0 / 3.0 * 2.0 # train/test split + two fold scale_val = 4.0 * 2.0 w_train = w_train * scale_train w_val = w_val * scale_val for i, name in enumerate(classes): s_train = np.sum(w_train[y_train == i]) s_val = np.sum(w_val[y_val == i]) logger.debug('Class / train / val: {} / {} / {}'.format( name, s_train, s_val)) # Build dataset for systematic shifts """ x_sys, y_sys, w_sys = build_dataset(os.path.join(args.workdir, 'fold{}.root'.format(args.fold)), ['htt', 'htt_jecUncRelativeSampleYearUp', 'htt_jecUncRelativeSampleYearDown'], args.fold, make_categorical=False, use_class_weights=True) x_sys_train, x_sys_val, w_sys_train, w_sys_val = train_test_split(x_sys, w_sys, test_size=0.25, random_state=1234) logger.info('Number of train/val events in varied datasets: {} / {}'.format(x_sys_train.shape[0], x_sys_val.shape[0])) logger.debug('Sum of weights for nominal/up/down: {} / {} / {}'.format( np.sum(w_sys[y_sys == 0]), np.sum(w_sys[y_sys == 1]), np.sum(w_sys[y_sys == 2]))) """ # Preprocessing preproc = StandardScaler() preproc.fit(x_train) pickle.dump( preproc, open( os.path.join(args.workdir, 'preproc_fold{}.pickle'.format(args.fold)), 'wb')) x_train_preproc = preproc.transform(x_train) x_val_preproc = preproc.transform(x_val) for i, (var, mean, std) in enumerate( zip(cfg.ml_variables, preproc.mean_, preproc.scale_)): logger.info('Variable: %s', var) logger.info('Preprocessing parameter (mean, std): %s, %s', mean, std) logger.info('Preprocessed data (mean, std): %s, %s', np.mean(x_train_preproc[:, i]), np.std(x_train_preproc[:, i])) # Create model x_ph = tf.placeholder(tf.float64, shape=(None, len(cfg.ml_variables))) logits, f, w_vars = model(x_ph, len(cfg.ml_variables), 1, args.fold) # Build NLL loss y_ph = tf.placeholder(tf.float64, shape=(None, )) w_ph = tf.placeholder(tf.float64, shape=(None, )) nll = 0.0 bins = np.array(cfg.analysis_binning) mu = tf.constant(1.0, tf.float64) nuisances = {} epsilon = tf.constant(1e-9, tf.float64) for i, (up, down) in enumerate(zip(bins[1:], bins[:-1])): logger.debug('Add NLL for bin {} with boundaries [{}, {}]'.format( i, down, up)) up = tf.constant(up, tf.float64) down = tf.constant(down, tf.float64) # Processes mask = count_masking(f, up, down) procs = {} for j, name in enumerate(classes): proc_w = mask * tf.cast(tf.equal(y_ph, tf.constant(j, tf.float64)), tf.float64) * w_ph procs[name] = tf.reduce_sum(proc_w) # QCD estimation procs['qcd'] = procs['data_ss'] for p in [n for n in cfg.ml_classes if not n in ['ggh', 'qqh']]: procs['qcd'] -= procs[p + '_ss'] procs['qcd'] = tf.maximum(procs['qcd'], 0) # Nominal signal and background sig = 0 for p in ['ggh', 'qqh']: sig += procs[p] bkg = 0 for p in ['ztt', 'zl', 'w', 'tt', 'vv', 'qcd']: bkg += procs[p] # Normalization uncertainties sys = 0.0 for n in nuisances: pass # Expectations obs = sig + bkg exp = mu * sig + bkg + sys # Likelihood nll -= tfp.distributions.Poisson(tf.maximum(exp, epsilon)).log_prob( tf.maximum(obs, epsilon)) # Nuisance constraints for n in nuisances: nll -= tfp.distributions.Normal( loc=tf.constant(0.0, dtype=tf.float64), scale=tf.constant(1.0, dtype=tf.float64)).log_prob(nuisances[n]) # Compute constraint of mu def get_constraint(nll, params): hessian = [ tf.gradients(g, params) for g in tf.unstack(tf.gradients(nll, params)) ] inverse = tf.matrix_inverse(hessian) covariance_poi = inverse[0][0] constraint = tf.sqrt(covariance_poi) return constraint loss_fullnll = get_constraint(nll, [mu] + [nuisances[n] for n in nuisances]) loss_statsonly = get_constraint(nll, [mu]) # Add minimization ops def get_minimize_op(loss): optimizer = tf.train.AdamOptimizer() return optimizer.minimize(loss, var_list=w_vars) minimize_fullnll = get_minimize_op(loss_fullnll) minimize_statsonly = get_minimize_op(loss_statsonly) # Train config = tf.ConfigProto(intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) session = tf.Session(config=config) session.run([tf.global_variables_initializer()]) saver = tf.train.Saver(max_to_keep=1) patience = 10 patience_count = patience min_loss = 1e9 tolerance = 0.001 step = 0 validation_steps = 20 warmup_steps = 100 while True: if step < warmup_steps: loss = loss_statsonly minimize = minimize_statsonly is_warmup = True else: loss = loss_fullnll minimize = minimize_fullnll is_warmup = False loss_train, _ = session.run([loss, minimize], feed_dict={ x_ph: x_train_preproc, y_ph: y_train, w_ph: w_train }) if step % validation_steps == 0: logger.info('Step / patience: {} / {}'.format( step, patience_count)) logger.info('Train loss: {:.5f}'.format(loss_train)) loss_val = session.run(loss, feed_dict={ x_ph: x_val_preproc, y_ph: y_val, w_ph: w_val }) logger.info('Validation loss: {:.5f}'.format(loss_val)) if is_warmup: logger.info('Warmup: {} / {}'.format(step, warmup_steps)) else: if min_loss > loss_val and np.abs( min_loss - loss_val) / min_loss > tolerance: min_loss = loss_val patience_count = patience path = saver.save(session, os.path.join( args.workdir, 'model_fold{}/model.ckpt'.format( args.fold)), global_step=step) logger.info('Save model to {}'.format(path)) else: patience_count -= 1 if patience_count == 0: logger.info('Stop training') break step += 1
def main(unused_argv): flags.mark_flag_as_required('model_dir') flags.mark_flag_as_required('pipeline_config_path') if FLAGS.gpu_device is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_device) session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True config = tf.estimator.RunConfig( model_dir=FLAGS.model_dir, session_config=session_config, save_checkpoints_secs=FLAGS.save_checkpoints_secs) train_and_eval_dict = model_lib.create_estimator_and_inputs( run_config=config, pipeline_config_path=FLAGS.pipeline_config_path, train_steps=FLAGS.num_train_steps, sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples, sample_1_of_n_eval_on_train_examples=( FLAGS.sample_1_of_n_eval_on_train_examples)) estimator = train_and_eval_dict['estimator'] train_input_fn = train_and_eval_dict['train_input_fn'] eval_input_fns = train_and_eval_dict['eval_input_fns'] eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn'] predict_input_fn = train_and_eval_dict['predict_input_fn'] train_steps = train_and_eval_dict['train_steps'] if FLAGS.checkpoint_dir: if FLAGS.eval_training_data: name = 'training_data' input_fn = eval_on_train_input_fn else: name = 'validation_data' # The first eval input will be evaluated. input_fn = eval_input_fns[0] if FLAGS.run_once: estimator.evaluate(input_fn, steps=None, checkpoint_path=tf.train.latest_checkpoint( FLAGS.checkpoint_dir)) else: model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir, input_fn, train_steps, name, FLAGS.max_eval_retries) else: train_spec, eval_specs = model_lib.create_train_and_eval_specs( train_input_fn, eval_input_fns, eval_on_train_input_fn, predict_input_fn, train_steps, eval_on_train_data=False) # Multiple Eval Specs allowed. # TODO: Fix name of saving_listeners saving_listeners = [ EvalCheckpointSaverListener(estimator, eval_specs[0].input_fn, 'validation') ] if len(eval_specs) > 1: saving_listeners.append( EvalCheckpointSaverListener(estimator, eval_specs[1].input_fn, 'training')) estimator.train(input_fn=train_spec.input_fn, max_steps=train_spec.max_steps, saving_listeners=saving_listeners)
def benchmark_model(self, warmup_runs, bm_runs, num_threads, trace_filename=None): """Benchmark model.""" if self.tensorrt: print('Using tensorrt ', self.tensorrt) self.build_and_save_model() graphdef = self.freeze_model() if num_threads > 0: print('num_threads for benchmarking: {}'.format(num_threads)) sess_config = tf.ConfigProto( intra_op_parallelism_threads=num_threads, inter_op_parallelism_threads=1) else: sess_config = tf.ConfigProto() # rewriter_config_pb2.RewriterConfig.OFF sess_config.graph_options.rewrite_options.dependency_optimization = 2 if self.use_xla: sess_config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_2) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: inputs = tf.placeholder(tf.float32, name='input', shape=self.inputs_shape) output = self.build_model(inputs, is_training=False) img = np.random.uniform(size=self.inputs_shape) sess.run(tf.global_variables_initializer()) if self.tensorrt: fetches = [inputs.name] + [i.name for i in output] goutput = self.convert_tr(graphdef, fetches) inputs, output = goutput[0], goutput[1:] if not self.use_xla: # Don't use tf.group because XLA removes the whole graph for tf.group. output = tf.group(*output) output_name = [output.name] input_name = inputs.name graphdef = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_name) with tf.Graph().as_default(), tf.Session(config=sess_config) as sess: tf.import_graph_def(graphdef, name='') for i in range(warmup_runs): start_time = time.time() sess.run(output_name, feed_dict={input_name: img}) print('Warm up: {} {:.4f}s'.format(i, time.time() - start_time)) print('Start benchmark runs total={}'.format(bm_runs)) start = time.perf_counter() for i in range(bm_runs): sess.run(output_name, feed_dict={input_name: img}) end = time.perf_counter() inference_time = (end - start) / 10 print('Per batch inference time: ', inference_time) print('FPS: ', self.batch_size / inference_time) if trace_filename: run_options = tf.RunOptions() run_options.trace_level = tf.RunOptions.FULL_TRACE run_metadata = tf.RunMetadata() sess.run(output_name, feed_dict={input_name: img}, options=run_options, run_metadata=run_metadata) logging.info('Dumping trace to %s', trace_filename) trace_dir = os.path.dirname(trace_filename) if not tf.io.gfile.exists(trace_dir): tf.io.gfile.makedirs(trace_dir) with tf.io.gfile.GFile(trace_filename, 'w') as trace_file: from tensorflow.python.client import timeline # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file.write( trace.generate_chrome_trace_format(show_memory=True))
def eval_once(ckpnt): """Evaluate on one checkpoint once.""" ptches = np.zeros((14, 14, 32, 32)) for i in range(14): for j in range(14): ind_x = i * 2 ind_y = j * 2 for k in range(5): for h in range(5): ptches[i, j, ind_x + k, ind_y + h] = 1 ptches = np.reshape(ptches, (14 * 14, 32, 32)) with tf.Graph().as_default(): features = get_features(False, 1)[0] if FLAGS.patching: features['images'] = features['cc_images'] features['recons_label'] = features['cc_recons_label'] features['labels'] = features['cc_labels'] model = f_model.multi_gpu_model result = model([features]) # merged = result['summary'] correct_prediction_sum = result['correct'] # almost_correct_sum = result['almost'] # mid_act = result['mid_act'] logits = result['logits'] saver = tf.train.Saver() test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test_once') config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.Session(config=config) # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpnt)) saver.restore(sess, ckpnt) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) i = 0 try: total_tp = 0 for i in range(FLAGS.eval_size): #, g_ac, ac lb, tp, lg = sess.run([ features['recons_label'], correct_prediction_sum, logits, ]) if FLAGS.patching: batched_lg = np.sum(lg / np.sum(lg, axis=1, keepdims=True), axis=0) batch_pred = np.argmax(batched_lg) tp = np.equal(batch_pred, lb[0]) total_tp += tp total_false = FLAGS.eval_size - total_tp print('false:{}, true:{}'.format(total_false, total_tp)) # summary_tp = tf.Summary.FromString(summary_j) # summary_tp.value.add(tag='correct_prediction', simple_value=total_tp) # summary_tp.value.add(tag='wrong_prediction', simple_value=total_false) # summary_tp.value.add( # tag='almost_wrong_prediction', simple_value=total_almost_false) # test_writer.add_summary(summary_tp, i + 1) except tf.errors.OutOfRangeError: print('Done eval for %d steps.' % i) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close() test_writer.close()
def eval_ensemble(ckpnts): """Evaluate on an ensemble of checkpoints.""" with tf.Graph().as_default(): first_features = get_features(False, 100)[0] h = first_features['height'] d = first_features['depth'] features = { 'images': tf.placeholder(tf.float32, shape=(100, d, h, h)), 'labels': tf.placeholder(tf.float32, shape=(100, 10)), 'recons_image': tf.placeholder(tf.float32, shape=(100, d, h, h)), 'recons_label': tf.placeholder(tf.int32, shape=(100)), 'height': first_features['height'], 'depth': first_features['depth'] } model = f_model.multi_gpu_model result = model([features]) logits = result['logits'] config = tf.ConfigProto(allow_soft_placement=True) # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpnt)) batch_logits = np.zeros((FLAGS.eval_size // 100, 100, 10), dtype=np.float32) batch_recons_label = np.zeros((FLAGS.eval_size // 100, 100), dtype=np.float32) batch_labels = np.zeros((FLAGS.eval_size // 100, 100, 10), dtype=np.float32) batch_images = np.zeros((FLAGS.eval_size // 100, 100, d, h, h), dtype=np.float32) batch_recons_image = np.zeros((FLAGS.eval_size // 100, 100, d, h, h), dtype=np.float32) saver = tf.train.Saver() sess = tf.Session(config=config) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for i in range(FLAGS.eval_size // 100): (batch_recons_label[i, Ellipsis], batch_labels[i, Ellipsis], batch_images[i, Ellipsis], batch_recons_image[i, Ellipsis]) = sess.run([ first_features['recons_label'], first_features['labels'], first_features['images'], first_features['recons_image'] ]) for ckpnt in ckpnts: saver.restore(sess, ckpnt) for i in range(FLAGS.eval_size // 100): logits_i = sess.run(logits, feed_dict={ features['recons_label']: batch_recons_label[i, Ellipsis], features['labels']: batch_labels[i, Ellipsis], features['images']: batch_images[i, Ellipsis], features['recons_image']: batch_recons_image[i, Ellipsis] }) # batch_logits[i, ...] += softmax(logits_i) batch_logits[i, Ellipsis] += logits_i except tf.errors.OutOfRangeError: print('Done eval for %d steps.' % i) finally: # When done, ask the threads to stop. coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close() batch_pred = np.argmax(batch_logits, axis=2) total_wrong = np.sum(np.not_equal(batch_pred, batch_recons_label)) print(total_wrong)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.data_type == "onehop": dataset_class = input_fns.OneHopDataset eval_fn = evaluate.multihop_eval_fn elif FLAGS.data_type == "twohop": dataset_class = input_fns.TwoHopDataset eval_fn = evaluate.multihop_eval_fn elif FLAGS.data_type == "threehop": dataset_class = input_fns.ThreeHopDataset eval_fn = evaluate.multihop_eval_fn elif (FLAGS.data_type == "wikimovie" or FLAGS.data_type == "wikimovie-2hop" or FLAGS.data_type == "wikimovie-3hop"): dataset_class = input_fns.WikiMovieDataset eval_fn = evaluate.wikimovie_eval_fn elif FLAGS.data_type == "hotpotqa": dataset_class = input_fns.HotpotQADataset eval_fn = evaluate.hotpot_eval_fn if FLAGS.model_type == "onehop": create_model_fn = model_fns.create_onehop_model elif FLAGS.model_type == "twohop": create_model_fn = model_fns.create_twohop_model elif FLAGS.model_type == "twohop-cascaded": create_model_fn = model_fns.create_twohopcascade_model elif FLAGS.model_type == "threehop": create_model_fn = functools.partial(model_fns.create_twohop_model, num_hops=3) elif FLAGS.model_type == "threehop-cascaded": create_model_fn = functools.partial( model_fns.create_twohopcascade_model, num_hops=3) elif FLAGS.model_type == "wikimovie": create_model_fn = model_fns.create_wikimovie_model elif FLAGS.model_type == "wikimovie-2hop": create_model_fn = functools.partial(model_fns.create_wikimovie_model, num_hops=2) elif FLAGS.model_type == "wikimovie-3hop": create_model_fn = functools.partial(model_fns.create_wikimovie_model, num_hops=3) elif FLAGS.model_type == "hotpotqa": create_model_fn = functools.partial(model_fns.create_hotpotqa_model, num_hops=FLAGS.num_hops) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) # Load mention and entity files. mention2text = json.load( tf.gfile.Open(os.path.join(FLAGS.train_data_dir, "mention2text.json"))) tf.logging.info("Loading metadata about entities and mentions...") entity2id, entity2name = json.load( tf.gfile.Open(os.path.join(FLAGS.train_data_dir, "entities.json"))) entityid2name = {str(i): entity2name[e] for e, i in entity2id.items()} # all_paragraphs = json.load(tf.gfile.Open(os.path.join( # FLAGS.train_data_dir, "subparas.json"))) # all_mentions = np.load(tf.gfile.Open(os.path.join( # FLAGS.train_data_dir, "mentions.npy"))) all_paragraphs = None all_mentions = None qa_config = QAConfig( qry_layers_to_use=FLAGS.qry_layers_to_use, qry_aggregation_fn=FLAGS.qry_aggregation_fn, dropout=FLAGS.question_dropout, qry_num_layers=FLAGS.question_num_layers, projection_dim=FLAGS.projection_dim, load_only_bert=FLAGS.load_only_bert, num_entities=len(entity2id), max_entity_len=FLAGS.max_entity_len, ensure_answer_sparse=FLAGS.ensure_answer_sparse, ensure_answer_dense=FLAGS.ensure_answer_dense, train_with_sparse=FLAGS.train_with_sparse, predict_with_sparse=FLAGS.predict_with_sparse, fix_sparse_to_one=FLAGS.fix_sparse_to_one, supervision=FLAGS.supervision, l2_normalize_db=FLAGS.l2_normalize_db, entity_score_aggregation_fn=FLAGS.entity_score_aggregation_fn, entity_score_threshold=FLAGS.entity_score_threshold, softmax_temperature=FLAGS.softmax_temperature, sparse_reduce_fn=FLAGS.sparse_reduce_fn, intermediate_loss=FLAGS.intermediate_loss, light=FLAGS.light, sparse_strategy=FLAGS.sparse_strategy, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) mips_config = MIPSConfig(ckpt_path=os.path.join(FLAGS.train_data_dir, "mention_feats"), ckpt_var_name="db_emb", num_mentions=len(mention2text), emb_size=FLAGS.projection_dim * 2, num_neighbors=FLAGS.num_mips_neighbors) validate_flags_or_throw() tf.gfile.MakeDirs(FLAGS.output_dir) if FLAGS.do_train: json.dump( tf.app.flags.FLAGS.flag_values_dict(), tf.gfile.Open(os.path.join(FLAGS.output_dir, "flags.json"), "w")) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.estimator.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=8, tpu_config=tf.estimator.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host), session_config=tf.ConfigProto(log_device_placement=False)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_dataset = dataset_class( in_file=FLAGS.train_file, tokenizer=tokenizer, subject_mention_probability=FLAGS.subject_mention_probability, max_qry_length=FLAGS.max_query_length, is_training=True, entity2id=entity2id, tfrecord_filename=os.path.join(FLAGS.output_dir, "train.tf_record")) num_train_steps = int(train_dataset.num_examples / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) summary_obj = None model_fn = model_fn_builder( bert_config=bert_config, qa_config=qa_config, mips_config=mips_config, init_checkpoint=FLAGS.init_checkpoint, e2m_checkpoint=os.path.join(FLAGS.train_data_dir, "ent2ment.npz"), m2e_checkpoint=os.path.join(FLAGS.train_data_dir, "coref.npz"), entity_id_checkpoint=os.path.join(FLAGS.train_data_dir, "entity_ids"), entity_mask_checkpoint=os.path.join(FLAGS.train_data_dir, "entity_mask"), learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, create_model_fn=create_model_fn, summary_obj=summary_obj) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", train_dataset.num_examples) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train(train_dataset, estimator, num_train_steps) if FLAGS.do_predict: eval_dataset = dataset_class(in_file=FLAGS.predict_file, tokenizer=tokenizer, subject_mention_probability=0.0, max_qry_length=FLAGS.max_query_length, is_training=False, entity2id=entity2id, tfrecord_filename=os.path.join( FLAGS.output_dir, "eval.tf_record")) continuous_eval(eval_dataset, estimator, mention2text, entityid2name, qa_config.supervision, eval_fn, paragraphs=all_paragraphs, mentions=all_mentions) if FLAGS.do_test: # Load mention and entity files. mention2text = json.load( tf.gfile.Open( os.path.join(FLAGS.test_data_dir, "mention2text.json"))) entity2id, entity2name = json.load( tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "entities.json"))) entityid2name = {str(i): entity2name[e] for e, i in entity2id.items()} all_paragraphs = json.load( tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "subparas.json"))) all_mentions = np.load( tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "mentions.npy"))) qa_config.num_entities = len(entity2id) mips_config = MIPSConfig(ckpt_path=os.path.join( FLAGS.test_data_dir, "mention_feats"), ckpt_var_name="db_emb", num_mentions=len(mention2text), emb_size=FLAGS.projection_dim * 2, num_neighbors=FLAGS.num_mips_neighbors) model_fn = model_fn_builder( bert_config=bert_config, qa_config=qa_config, mips_config=mips_config, init_checkpoint=FLAGS.init_checkpoint, e2m_checkpoint=os.path.join(FLAGS.test_data_dir, "ent2ment.npz"), m2e_checkpoint=os.path.join(FLAGS.test_data_dir, "coref.npz"), entity_id_checkpoint=os.path.join(FLAGS.test_data_dir, "entity_ids"), entity_mask_checkpoint=os.path.join(FLAGS.test_data_dir, "entity_mask"), learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, create_model_fn=create_model_fn, summary_obj=summary_obj) estimator = tf.estimator.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) eval_dataset = dataset_class(in_file=FLAGS.test_file, tokenizer=tokenizer, subject_mention_probability=0.0, max_qry_length=FLAGS.max_query_length, is_training=False, entity2id=entity2id, tfrecord_filename=os.path.join( FLAGS.output_dir, "test.tf_record")) if tf.gfile.Exists(os.path.join(FLAGS.output_dir, "best_model.meta")): ckpt_path = os.path.join(FLAGS.output_dir, "best_model") else: ckpt_path = None output_prediction_file = os.path.join(FLAGS.output_dir, "test_predictions.json") metrics = single_eval(eval_dataset, estimator, ckpt_path, mention2text, entityid2name, qa_config.supervision, output_prediction_file, eval_fn, paragraphs=all_paragraphs, mentions=all_mentions) with tf.gfile.Open(os.path.join(FLAGS.output_dir, "test_metrics.txt"), "w") as fo: for metric, value in metrics.items(): tf.logging.info("%s: %.4f", metric, value) fo.write("%s %.4f\n" % (metric, value))
def __init__(self, **kwargs): np.random.seed(0) tf.set_random_seed(0) self.batch_size = kwargs.pop('batch_size') self.data_sets = kwargs.pop('data_sets') self.train_dir = kwargs.pop('train_dir', 'output') log_dir = kwargs.pop('log_dir', 'log') self.model_name = kwargs.pop('model_name') self.num_classes = kwargs.pop('num_classes') self.initial_learning_rate = kwargs.pop('initial_learning_rate') self.decay_epochs = kwargs.pop('decay_epochs') self.avextol = kwargs.pop('avextol') if 'keep_probs' in kwargs: self.keep_probs = kwargs.pop('keep_probs') else: self.keep_probs = None if 'mini_batch' in kwargs: self.mini_batch = kwargs.pop('mini_batch') else: self.mini_batch = True if 'damping' in kwargs: self.damping = kwargs.pop('damping') else: self.damping = 0.0 if not os.path.exists(self.train_dir): os.makedirs(self.train_dir) # Initialize session os.environ["CUDA_VISIBLE_DEVICES"] = "1" gpu_options = tf.GPUOptions(allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # config = tf.ConfigProto() # self.sess = tf.Session(config=config) # K.set_session(self.sess) # Setup input self.input_placeholder, self.labels_placeholder = self.placeholder_inputs( ) self.num_train_examples = self.data_sets.train.labels.shape[0] self.num_test_examples = self.data_sets.test.labels.shape[0] # Setup inference and training if self.keep_probs is not None: self.keep_probs_placeholder = tf.placeholder(tf.float32, shape=(2)) self.logits = self.inference(self.input_placeholder, self.keep_probs_placeholder) elif hasattr(self, 'inference_needs_labels'): self.logits = self.inference(self.input_placeholder, self.labels_placeholder) else: self.logits = self.inference(self.input_placeholder) self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg = self.loss( self.logits, self.labels_placeholder) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.learning_rate = tf.Variable(self.initial_learning_rate, name='learning_rate', trainable=False) self.learning_rate_placeholder = tf.placeholder(tf.float32) self.update_learning_rate_op = tf.assign( self.learning_rate, self.learning_rate_placeholder) # self.optimizer = tf.train.AdamOptimizer(self.learning_rate) # self.train_op = self.optimizer.minimize(self.total_loss, global_step=self.global_step) self.train_op, self.reset_optimizer_op = self.get_train_op( self.total_loss, self.global_step, self.learning_rate) self.train_sgd_op = self.get_train_sgd_op(self.total_loss, self.global_step, self.learning_rate * 10) # self.train_op=self.train_sgd_op self.accuracy_op = self.get_accuracy_op(self.logits, self.labels_placeholder) self.preds = self.predictions(self.logits) # Setup misc self.saver = tf.train.Saver() # Setup gradients and Hessians self.params = self.get_all_params() self.grad_total_loss_op = tf.gradients(self.total_loss, self.params) self.grad_loss_no_reg_op = tf.gradients(self.loss_no_reg, self.params) self.grad_loss_r = tf.gradients(tf.squeeze(self.logits), self.params) self.v_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.u_placeholder = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params ] self.hessian_vector = hessian_vector_product(self.total_loss, self.params, self.v_placeholder) self.grad_loss_wrt_input_op = tf.gradients(self.total_loss, self.input_placeholder) # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum) self.influence_op = tf.add_n([ tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b))) for a, b in zip(self.grad_total_loss_op, self.v_placeholder) ]) self.grad_influence_wrt_input_op = tf.gradients( self.influence_op, self.input_placeholder) self.checkpoint_file = os.path.join(self.train_dir, "%s-checkpoint" % self.model_name) self.all_train_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.train) self.all_test_feed_dict = self.fill_feed_dict_with_all_ex( self.data_sets.test) init = tf.global_variables_initializer() self.sess.run(init) self.vec_to_list = self.get_vec_to_list_fn() self.adversarial_loss, self.indiv_adversarial_loss = self.adversarial_loss( self.logits, self.labels_placeholder) if self.adversarial_loss is not None: self.grad_adversarial_loss_op = tf.gradients( self.adversarial_loss, self.params)
def main(args): print(args) tf.disable_eager_execution() if args.memory_limit: physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) tf.config.experimental.set_virtual_device_configuration( physical_devices[0], [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=args.memory_limit) ]) assert args.microbatches is None args.microbatches = args.batch_size data_fn = data.data_fn_dict[args.experiment][int(args.dummy_data)] kwargs = { 'max_features': args.max_features, 'max_len': args.max_len, 'format': 'NHWC', } if args.dummy_data: kwargs['num_examples'] = args.batch_size * 2 (train_data, train_labels), _ = data_fn(**kwargs) num_train_eg = train_data.shape[0] loss_fn = tf.nn.sparse_softmax_cross_entropy_with_logits if args.experiment == 'logreg': loss_fn = lambda labels, logits: tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=tf.squeeze(logits)) train_labels = train_labels.astype('float32') model = partial(model_dict[args.experiment], features=train_data, max_features=args.max_features, args=args) if args.use_xla: # Not sure which one of these two works, so I'll just use both assert os.environ['TF_XLA_FLAGS'] == '--tf_xla_auto_jit=2' session_config = tf.ConfigProto() session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2 run_config = tf.estimator.RunConfig(session_config=session_config) print('Using XLA!') else: run_config = None print('NOT using XLA!') model_obj = tf.estimator.Estimator(model_fn=partial( nn_model_fn, model, loss_fn, args), config=run_config) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={'x': train_data}, y=train_labels, batch_size=args.batch_size, num_epochs=args.epochs, shuffle=True) steps_per_epoch = num_train_eg // args.batch_size timings = [] for epoch in range(1, args.epochs + 1): start = time.perf_counter() model_obj.train(input_fn=train_input_fn, steps=steps_per_epoch) duration = time.perf_counter() - start print("Time Taken: ", duration) timings.append(duration) if args.dpsgd: # eps = compute_epsilon(epoch, num_train_eg, args) # print('For delta=1e-5, the current epsilon is: %.2f' % eps) print('Trained with DPSGD optimizer') else: print('Trained with vanilla non-private SGD optimizer') if not args.no_save: utils.save_runtimes(__file__.split('.')[0], args, timings) else: print('Not saving!') print('Done!')
def evaluate(test_list, checkpoint_dir): print('Running PRLNet -Evaluation!') save_dir_test = os.path.join("./output/results") exists_or_mkdir(save_dir_test) # --------------------------------- set model --------------------------------- # data fetched within range: [-1,1] input_imgs, target_imgs, num = input_producer(test_list, in_channels, batch_size, need_shuffle=False) contents, details, pred_imgs = gen_PRLNet(input_imgs, out_channels, is_train=False, reuse=False) # --------------------------------- evaluation --------------------------------- # set GPU resources config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.45 saver = tf.train.Saver() with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Restore model weights from previously saved model check_pt = tf.train.get_checkpoint_state(checkpoint_dir) if check_pt and check_pt.model_checkpoint_path: saver.restore(sess, check_pt.model_checkpoint_path) print('model is loaded successfully.') else: print('# error: loading checkpoint failed.') return None cnt = 0 psnr_list = [] ssim_list = [] start_time = time.time() while not coord.should_stop(): tm = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') print('%s evaluating: [%d - %d]' % (tm, cnt, cnt + batch_size)) pd_images, gt_images = sess.run([pred_imgs, target_imgs]) save_images_from_batch(pd_images, save_dir_test, cnt) psnr, ssim = measure_quality(pd_images, gt_images) psnr_list.append(psnr) ssim_list.append(ssim) cnt += batch_size if cnt >= num: coord.request_stop() # Wait for threads to finish. coord.join(threads) sess.close() print("Testing finished! consumes %f sec" % (time.time() - start_time)) print("Numerical accuracy computing ...") # numerical evaluation mean_psnr = np.mean(np.array(psnr_list)) stde_psnr = np.std(np.array(psnr_list)) mean_ssim = np.mean(np.array(ssim_list)) stde_ssim = np.std(np.array(ssim_list)) save_path = os.path.join("./output/", "accuracy.txt") with open(save_path, 'w') as f: f.writelines('mean psnr:' + str(mean_psnr) + '\n') f.writelines('stde psnr:' + str(stde_psnr) + '\n\n') f.writelines('mean ssim:' + str(mean_ssim) + '\n') f.writelines('stde psnr:' + str(stde_ssim) + '\n') print("Done!")
def train(replication_factor, batch_size, batch_per_step, profile, num_iter, time_steps): """Launch training.""" # Set up in-feeds for the data with tf.device('cpu'): data_generator = EnvGenerator(batch_size, time_steps) items = next(data_generator) output_types = tuple((tf.dtypes.as_dtype(i.dtype) for i in items)) output_shapes = tuple((tf.TensorShape(i.shape) for i in items)) total_bytes = 0 for i in items: total_bytes += i.nbytes print(f'Input data size = {total_bytes/1000000} MB/batch') dataset = tf.data.Dataset.from_generator(data_generator, output_types=output_types, output_shapes=output_shapes) infeed_queue = ipu_infeed_queue.IPUInfeedQueue( dataset, "InfeedQueue", replication_factor=replication_factor) data_init = infeed_queue.initializer # Compile loss op with ipu_scope("/device:IPU:0"): total_loss = ipu_compiler.compile( lambda: loops.repeat(batch_per_step, build_train_op, infeed_queue=infeed_queue, inputs=[tf.constant(0.0, dtype=DTYPE)])) # Set up report op optionally. if profile: with tf.device('cpu'): report = gen_ipu_ops.ipu_event_trace() # Set up session on IPU opts = utils.create_ipu_config( profiling=profile, use_poplar_text_report=use_poplar_text_report, profile_execution=profile, merge_infeed_io_copies=True) opts = utils.set_optimization_options( opts, max_cross_replica_sum_buffer_size=10000000) opts = utils.auto_select_ipus(opts, [replication_factor]) utils.configure_ipu_system(opts) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) # Initialize variables utils.move_variable_initialization_to_cpu() sess.run([tf.global_variables_initializer(), data_init]) # Run training and time total_time = 0.0 total_samples = 0 skip_iterations = 5 # Initially the infeed may buffer extra input data and # first run for IPU includes XLA compile, so skipping these iterations for calculating items/sec. for iters in range(num_iter): data_generator.reset_counter() t0 = time.perf_counter() sess.run(total_loss) t1 = time.perf_counter() if profile: raw_reports = sess.run(report) if use_poplar_text_report: # extract the report rep = utils.extract_all_strings_from_event_trace(raw_reports) print("Writing profiling report to %s" % report_dest) with open(report_dest, "w") as f: f.write(rep) else: os.makedirs('profile_rl', exist_ok=True) save_tf_report(raw_reports, log_dir='profile_rl') print("Writing profiling report to profile_rl") break if iters > skip_iterations: total_time += (t1 - t0) total_samples += (batch_size * batch_per_step * replication_factor) print("Average %.1f items/sec" % (total_samples / total_time))
def train(train_list, val_list, debug_mode=True): print('Running PRLNet -Training!') # create folders to save trained model and results graph_dir = './graph' checkpt_dir = './model' ouput_dir = './output' exists_or_mkdir(graph_dir, need_remove=True) exists_or_mkdir(ouput_dir) exists_or_mkdir(checkpt_dir) # --------------------------------- load data --------------------------------- # data fetched at range: [-1,1] input_imgs, target_imgs, num = input_producer(train_list, in_channels, batch_size, need_shuffle=True) if debug_mode: input_val, target_val, num_val = input_producer(val_list, in_channels, batch_size, need_shuffle=False) pred_content, pred_detail, pred_imgs = gen_PRLNet(input_imgs, out_channels, is_train=True, reuse=False) if debug_mode: _, _, pred_val = gen_PRLNet(input_val, out_channels, is_train=False, reuse=True) # --------------------------------- loss terms --------------------------------- with tf.name_scope('Loss') as loss_scp: target_224 = tf.image.resize_images(target_imgs, size=[224, 224], method=0, align_corners=False) predict_224 = tf.image.resize_images(pred_imgs, size=[224, 224], method=0, align_corners=False) vgg19_api = VGG19("../vgg19.npy") vgg_map_targets = vgg19_api.build((target_224 + 1) / 2, is_rgb=(in_channels == 3)) vgg_map_predict = vgg19_api.build((predict_224 + 1) / 2, is_rgb=(in_channels == 3)) content_loss = tf.losses.mean_squared_error(target_imgs, pred_content) vgg_loss = 2e-6 * tf.losses.mean_squared_error(vgg_map_targets, vgg_map_predict) l1_loss = tf.reduce_mean(tf.abs(target_imgs - pred_imgs)) mse_loss = tf.losses.mean_squared_error(target_imgs, pred_imgs) loss_op = content_loss + 2 * vgg_loss + l1_loss # --------------------------------- solver definition --------------------------------- global_step = tf.Variable(0, name='global_step', trainable=False) iters_per_epoch = np.floor_divide(num, batch_size) lr_decay = tf.train.polynomial_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=iters_per_epoch * n_epochs, end_learning_rate=learning_rate / 100.0, power=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('optimizer'): with tf.control_dependencies(update_ops): gen_vars = [ var for var in tf.trainable_variables() if var.name.startswith("PRLNet") ] gen_optim = tf.train.AdamOptimizer(lr_decay, beta1) gen_grads_and_vars = gen_optim.compute_gradients(loss_op, var_list=gen_vars) train_op = gen_optim.apply_gradients(gen_grads_and_vars, global_step=global_step) # --------------------------------- model training --------------------------------- ''' if debug_mode: with tf.name_scope('summarise') as sum_scope: tf.summary.scalar('loss', loss_op) tf.summary.scalar('learning rate', lr_decay) tf.summary.image('predicts', pred_imgs, max_outputs=9) summary_op = tf.summary.merge_all() ''' with tf.name_scope("parameter_count"): num_parameters = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) # set GPU resources config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.45 saver = tf.train.Saver(max_to_keep=1) loss_list = [] psnr_list = [] with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(tf.global_variables_initializer()) print(">>------------>>> [Training_Num] =%d" % num) print(">>------------>>> [Parameter_Num] =%d" % sess.run(num_parameters)) ''' if debug_mode: with tf.name_scope(sum_scope): summary_writer = tf.summary.FileWriter(graph_dir, graph=sess.graph) ''' for epoch in range(0, n_epochs): start_time = time.time() epoch_loss, n_iters = 0, 0 for step in range(0, num, batch_size): _, loss = sess.run([train_op, loss_op]) epoch_loss += loss n_iters += 1 # iteration information if n_iters % display_steps == 0: tm = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S.%f') print("%s >> [%d/%d] iter: %d loss: %4.4f" % (tm, epoch, n_epochs, n_iters, loss)) ''' if debug_mode: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) ''' # epoch information epoch_loss = epoch_loss / n_iters loss_list.append(epoch_loss) print( "[*] ----- Epoch: %d/%d | Loss: %4.4f | Time-consumed: %4.3f -----" % (epoch, n_epochs, epoch_loss, (time.time() - start_time))) if (epoch + 1) % save_epochs == 0: if debug_mode: print("----- validating model ...") mean_psnr, nn = 0, 0 for idx in range(0, num_val, batch_size): predicts, groundtruths = sess.run( [pred_val, target_val]) save_images_from_batch(predicts, ouput_dir, idx) psnr = measure_psnr(predicts, groundtruths) mean_psnr += psnr nn += 1 psnr_list.append(mean_psnr / nn) print("----- psnr:%4.4f" % (mean_psnr / nn)) print("----- saving model ...") saver.save(sess, os.path.join(checkpt_dir, "model.cpkt"), global_step=global_step) save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) # stop data queue coord.request_stop() coord.join(threads) # write out the loss list save_list(os.path.join(ouput_dir, "loss"), loss_list) save_list(os.path.join(ouput_dir, "psnr"), psnr_list) print("Training finished!") return None
def export(self, output_dir: Text, tflite_path: Text = None, tensorrt: Text = None): """Export a saved model, frozen graph, and potential tflite/tensorrt model. Args: output_dir: the output folder for saved model. tflite_path: the path for saved tflite file. tensorrt: If not None, must be {'FP32', 'FP16', 'INT8'}. """ signitures = self.signitures signature_def_map = { 'serving_default': tf.saved_model.predict_signature_def( {signitures['image_arrays'].name: signitures['image_arrays']}, {signitures['prediction'].name: signitures['prediction']}), } b = tf.saved_model.Builder(output_dir) b.add_meta_graph_and_variables( self.sess, tags=['serve'], signature_def_map=signature_def_map, assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS), clear_devices=True) b.save() logging.info('Model saved at %s', output_dir) # also save freeze pb file. graphdef = self.freeze() pb_path = os.path.join(output_dir, self.model_name + '_frozen.pb') tf.io.gfile.GFile(pb_path, 'wb').write(graphdef.SerializeToString()) logging.info('Frozen graph saved at %s', pb_path) if tflite_path: height, width = utils.parse_image_size(self.params['image_size']) input_name = signitures['image_arrays'].op.name input_shapes = {input_name: [None, height, width, 3]} converter = tf.lite.TFLiteConverter.from_saved_model( output_dir, input_arrays=[input_name], input_shapes=input_shapes, output_arrays=[signitures['prediction'].op.name]) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] tflite_model = converter.convert() tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_model) logging.info('TFLite is saved at %s', tflite_path) if tensorrt: from tensorflow.python.compiler.tensorrt import trt # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) trt_path = os.path.join(output_dir, 'tensorrt_' + tensorrt.lower()) trt.create_inference_graph( None, None, precision_mode=tensorrt, input_saved_model_dir=output_dir, output_saved_model_dir=trt_path, session_config=sess_config) logging.info('TensorRT model is saved at %s', trt_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('checkpoint_path', help='Path to checkpoint to load') parser.add_argument('--input-size', type=int, default=256, help='Shape of input to use (depends on checkpoint)') parser.add_argument('--inter', nargs='+', type=int, help='Interpolate between the 4 style given') arguments = parser.parse_args() style_control = [] style_inter = arguments.inter if not style_inter: for style_index in range(16): style_control.append([0.0] * 16) style_control[-1][style_index] = 1 else: for col in range(4): for row in range(4): style_index = (col % 4) + (row * 4) style_control.append([0.0] * 16) # top left style style_control[-1][style_inter[0]] = ((3 - row) / 3) * ( (3 - col) / 3) # top right style style_control[-1][style_inter[1]] = (row / 3) * ((3 - col) / 3) # bottom left style style_control[-1][style_inter[2]] = ((3 - row) / 3) * (col / 3) # bottom right style style_control[-1][style_inter[3]] = (row / 3) * (col / 3) style_control = np.asarray(style_control, dtype=np.float) capture = cv2.VideoCapture(-1) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf if tf.__version__.split('.')[0] == '2': import tensorflow.compat.v1 as tf tf.disable_v2_behavior() import tensorflow.compat.v1 as tf1 from engine_multi import EngineMultiStyle gpu_options = tf1.GPUOptions(allow_growth=True) session_config = tf1.ConfigProto(gpu_options=gpu_options) with tf1.Session(config=session_config).as_default() as session: input_size = arguments.input_size engine = EngineMultiStyle(session, input_size, arguments.checkpoint_path) mosaic = np.zeros((4 * input_size, 4 * input_size, 3), dtype=np.uint8) while (True): # Capture frame-by-frame _, frame = capture.read() frame = cv2.resize(frame, (arguments.input_size, arguments.input_size)) input_image = np.asarray(frame) outputs = engine.predict([input_image] * 16, style_control) for row in range(4): for col in range(4): mosaic[col * input_size:(col + 1) * input_size, row * input_size:(row + 1) * input_size] = outputs[(4 * col) + row] # Display the resulting frame cv2.imshow('original', input_image) cv2.imshow('style', mosaic) key_pressed = cv2.waitKey(1) & 0xFF if key_pressed == ord('q'): break # When everything done, release the capture capture.release() cv2.destroyAllWindows()
def train(config): Model_cls = HandwritingVRNNGmmModel Dataset_cls = HandWritingDatasetConditionalTF # Dataset training_dataset = Dataset_cls(config['training_data'], use_bow_labels=config['use_bow_labels']) num_training_iterations = int(training_dataset.num_samples / config['batch_size']) print("# training steps per epoch: " + str(num_training_iterations)) # Create a tensorflow sub-graph that loads batches of samples. if config.get('use_bucket_feeder', True) and training_dataset.is_dynamic: bucket_edges = training_dataset.get_seq_len_histogram( num_bins=15, collapse_first_and_last_bins=[2, -2]) data_feeder = DataFeederTF(training_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024) sequence_length, inputs, targets = data_feeder.batch_queue_bucket( bucket_edges, dynamic_pad=training_dataset.is_dynamic, queue_capacity=300, queue_threads=4) else: # Training data data_feeder = DataFeederTF(training_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024) sequence_length, inputs, targets = data_feeder.batch_queue( dynamic_pad=training_dataset.is_dynamic, queue_capacity=512, queue_threads=4) if config.get('use_staging_area', False): staging_area = TFStagingArea([sequence_length, inputs, targets], device_name="/gpu:0") sequence_length, inputs, targets = staging_area.tensors # Create step counter (used by optimization routine and learning rate function.) global_step = tf.compat.v1.get_variable(name='global_step', trainable=False, initializer=1) # Annealing KL-divergence loss. kld_loss_weight_backup = config['loss_weights']['kld_loss'] if type(config['loss_weights']['kld_loss']) == np.ndarray: # Create a piecewise increasing kld loss weight. num_steps = len(config['loss_weights']['kld_loss']) values = np.linspace(0, 1, num_steps + 1).tolist() boundaries = (config['loss_weights']['kld_loss'] * num_training_iterations).tolist() config['loss_weights']['kld_loss'] = tf.train.piecewise_constant( global_step, boundaries=boundaries, values=values) tf.summary.scalar('training/kld_loss_weight', config['loss_weights']['kld_loss'], collections=["training_status"]) # Create training graph. with tf.name_scope("training"): model = Model_cls(config, reuse=False, input_op=inputs, target_op=targets, input_seq_length_op=sequence_length, input_dims=training_dataset.input_dims, target_dims=training_dataset.target_dims, mode="training", data_processor=training_dataset) model.build_graph() model.create_image_summary(training_dataset.prepare_for_visualization) # Create sampling graph. with tf.name_scope("sampling"): sampling_input_op = tf.compat.v1.placeholder( tf.float32, shape=[ 1, training_dataset.sequence_length, sum(training_dataset.input_dims) ]) sampling_sequence_length_op = tf.compat.v1.placeholder(tf.int32, shape=[1]) sampling_model = Model_cls( config, reuse=True, input_op=sampling_input_op, target_op=None, input_seq_length_op=sampling_sequence_length_op, input_dims=training_dataset.input_dims, target_dims=training_dataset.target_dims, batch_size=1, mode="sampling", data_processor=training_dataset) sampling_model.build_graph() sampling_model.create_image_summary( training_dataset.prepare_for_visualization) # Validation model. if config.get('validate_model', False): validation_dataset = Dataset_cls( config['validation_data'], use_bow_labels=config['use_bow_labels']) num_validation_iterations = int(validation_dataset.num_samples / config['batch_size']) print("# validation steps per epoch: " + str(num_validation_iterations)) valid_data_feeder = DataFeederTF(validation_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024, shuffle=False) valid_sequence_length, valid_inputs, valid_targets = valid_data_feeder.batch_queue( dynamic_pad=validation_dataset.is_dynamic, queue_capacity=512, queue_threads=4) if 'use_staging_area' in config and config['use_staging_area']: valid_staging_area = TFStagingArea( [valid_sequence_length, valid_inputs, valid_targets], device_name="/gpu:0") valid_sequence_length, valid_inputs, valid_targets = valid_staging_area.tensors with tf.name_scope("validation"): valid_model = Model_cls(config, reuse=True, input_op=valid_inputs, target_op=valid_targets, input_seq_length_op=valid_sequence_length, input_dims=validation_dataset.input_dims, target_dims=validation_dataset.target_dims, mode="training", data_processor=validation_dataset) valid_model.build_graph() # Create a session object and initialize parameters. gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) if config['learning_rate_type'] == 'exponential': learning_rate = tf.train.exponential_decay( config['learning_rate'], global_step=global_step, decay_steps=config['learning_rate_decay_steps'], decay_rate=config['learning_rate_decay_rate'], staircase=False) tf.summary.scalar('training/learning_rate', learning_rate, collections=["training_status"]) elif config['learning_rate_type'] == 'fixed': learning_rate = config['learning_rate'] else: raise Exception("Invalid learning rate type") optimizer = tf.train.AdamOptimizer(learning_rate) # Gradient clipping and a sanity check. grads = list( zip(tf.gradients(model.loss, tf.trainable_variables()), tf.trainable_variables())) grads_clipped = [] with tf.name_scope("grad_clipping"): for grad, var in grads: if grad is not None: if config['grad_clip_by_norm'] > 0: grads_clipped.append( (tf.clip_by_norm(grad, config['grad_clip_by_norm']), var)) elif config['grad_clip_by_value'] > 0: grads_clipped.append( (tf.clip_by_value(grad, -config['grad_clip_by_value'], -config['grad_clip_by_value']), var)) else: grads_clipped.append((grad, var)) train_op = optimizer.apply_gradients(grads_and_vars=grads_clipped, global_step=global_step) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) run_opts = None run_opts_metadata = None if config.get('create_timeline', False): run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, timeout_in_ms=100000) run_opts_metadata = tf.RunMetadata() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) if config['model_dir']: # If model directory already exists, continue training by restoring computation graph. # Restore variables. if config['checkpoint_id'] is None: checkpoint_path = tf.train.latest_checkpoint(config['model_dir']) else: checkpoint_path = os.path.join(config['model_dir'], config['checkpoint_id']) print("Continue training with model " + checkpoint_path) saver.restore(sess, checkpoint_path) step = tf.train.global_step(sess, global_step) start_epoch = round( step / (training_dataset.num_samples / config['batch_size'])) else: # Fresh start # Create a unique output directory for this experiment. config['model_dir'] = get_model_dir_timestamp( base_path=config['model_save_dir'], prefix="tf", suffix=config['experiment_name'], connector="-") print("Saving to {}\n".format(config['model_dir'])) start_epoch = 1 step = 1 coord = tf.train.Coordinator() data_feeder.init( sess, coord ) # Enqueue threads must be initialized after definition of train_op. if config.get('validate_model', False): valid_data_feeder.init(sess, coord) queue_threads = tf.train.start_queue_runners(coord=coord, sess=sess) queue_threads.append(data_feeder.enqueue_threads) # Register and create summary ops. summary_dir = os.path.join(config['model_dir'], "summary") summary_writer = tf.summary.FileWriter(summary_dir, sess.graph) # Create summaries to visualize weights and gradients. if config['tensorboard_verbose'] > 1: for grad, var in grads: tf.summary.histogram(var.name, var, collections=["training_status"]) tf.summary.histogram(var.name + '/gradient', grad, collections=["training_status"]) if config['tensorboard_verbose'] > 1: tf.summary.scalar( "training/queue", math_ops.cast(data_feeder.input_queue.size(), dtypes.float32) * (1. / data_feeder.queue_capacity), collections=["training_status"]) # Save configuration config['loss_weights']['kld_loss'] = kld_loss_weight_backup try: # Pickle and json dump. pickle.dump( config, open(os.path.join(config['model_dir'], 'config.pkl'), 'wb')) json.dump(config, open(os.path.join(config['model_dir'], 'config.json'), 'w'), indent=4, sort_keys=True) except: pass training_summary = tf.compat.v1.summary.merge_all('training_status') training_run_ops = [ model.loss_summary, training_summary, model.ops_loss, train_op ] training_run_ops_with_img_summary = [ model.loss_summary, training_summary, model.ops_loss, model.ops_img_summary, train_op ] if config.get('validate_model', False): validation_run_ops = [valid_model.ops_loss] if config['use_staging_area']: training_run_ops.append(staging_area.preload_op) training_run_ops_with_img_summary.append(staging_area.preload_op) # Fill staging area first. for i in range(256): _ = sess.run(staging_area.preload_op, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) if config.get('validate_model', False): validation_run_ops.append(valid_staging_area.preload_op) # Fill staging area first. for i in range(256): _ = sess.run(valid_staging_area.preload_op, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) for epoch in range(start_epoch, config['num_epochs'] + 1): for epoch_step in range(num_training_iterations): start_time = time.perf_counter() step = tf.train.global_step(sess, global_step) if (step % config['checkpoint_every_step']) == 0: ckpt_save_path = saver.save( sess, os.path.join(config['model_dir'], 'model'), global_step) print("Model saved in file: %s" % ckpt_save_path) if config['img_summary_every_step'] > 0 and step % config[ 'img_summary_every_step'] == 0: run_training_output = sess.run( training_run_ops_with_img_summary, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) img_summary = model.get_image_summary( sess, ops_img_summary_evaluated=run_training_output[3], seq_len=500) summary_writer.add_summary(img_summary, step) else: run_training_output = sess.run(training_run_ops, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) summary_writer.add_summary(run_training_output[0], step) # Loss summary summary_writer.add_summary(run_training_output[1], step) # Training status summary. if step % config['print_every_step'] == 0: time_elapsed = (time.perf_counter() - start_time) / config['print_every_step'] model.log_loss(run_training_output[2], step, epoch, time_elapsed) if config['img_summary_every_step'] > 0 and step % config[ 'img_summary_every_step'] == 0: sampling_img_summary = sampling_model.get_image_summary( sess, ops_img_summary_evaluated=None, seq_len=500) summary_writer.add_summary(sampling_img_summary, step) if config.get('validate_model', False) and step % config['validate_every_step'] == 0: start_time = time.perf_counter() for i in range(num_validation_iterations): run_validation_output = sess.run( validation_run_ops, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) valid_model.update_validation_loss( run_validation_output[0]) valid_summary, valid_eval_loss = valid_model.get_validation_summary( session=sess) summary_writer.add_summary(valid_summary, step) # Validation loss summary time_elapsed = (time.perf_counter() - start_time) / num_validation_iterations valid_model.log_loss(valid_eval_loss, step, epoch, time_elapsed, prefix="VALID: ") valid_model.reset_validation_loss() if config.get('create_timeline', False): create_tf_timeline(config['model_dir'], run_opts_metadata) print("End-of-Training.") ckpt_save_path = saver.save(sess, os.path.join(config['model_dir'], 'model'), global_step) print("Model saved in file: %s" % ckpt_save_path) print('Model is trained for %d epochs, %d steps.' % (config['num_epochs'], step)) try: sess.run(data_feeder.input_queue.close(cancel_pending_enqueues=True)) coord.request_stop() coord.join(queue_threads, stop_grace_period_secs=5) except: pass sess.close()
def __init__(self): # 误差图 def plotloss(): plt.figure() ax = plt.gca() y1 = R_variable['loss_test'] y2 = R_variable['loss_train'] plt.plot(y1, 'ro', label='Test') plt.plot(y2, 'g*', label='Train') # ax.set_xscale('log') ax.set_yscale('log') plt.legend(fontsize=18) plt.xlabel('Epoch', fontsize=15) plt.title('loss', fontsize=15) fntmp = '%sloss' % (self.FolderName) mySaveFig(plt, fntmp, ax=ax, isax=1, iseps=0) def plotacc(): plt.figure() ax = plt.gca() y1 = R_variable['acc_test'] y2 = R_variable['acc_train'] plt.plot(y1, 'ro', label='Test') plt.plot(y2, 'g*', label='Train') # ax.set_xscale('log') # ax.set_yscale('log') plt.legend(fontsize=18) plt.xlabel('Epoch', fontsize=15) plt.title('accuracy', fontsize=15) fntmp = '%saccuracy' % (self.FolderName) mySaveFig(plt, fntmp, ax=ax, isax=1, iseps=0) # 保存文件 def savefile(): # 序列化变量R, 需要的话可以load出来 with open('%s/objs.pkl' % (self.FolderName), 'wb') as f: # Python 3: open(..., 'wb') pickle.dump(R_variable, f, protocol=4) # 保存变量R参数长度小于等于20的 text_file = open("%s/Output.txt" % (self.FolderName), "w") for para in R_variable: if np.size(R_variable[para]) > 20: continue text_file.write('%s: %s\n' % (para, R_variable[para])) text_file.close() # 保存loss到csv中 da = pd.DataFrame(R_variable['loss_train']) da.to_csv(self.FolderName + "loss_train" + ".csv", header=False, columns=None) db = pd.DataFrame(R_variable['loss_test']) db.to_csv(self.FolderName + "loss_test" + ".csv", header=False, columns=None) dc = pd.DataFrame(R_variable['acc_train']) dc.to_csv(self.FolderName + "acc_train" + ".csv", header=False, columns=None) dd = pd.DataFrame(R_variable['acc_test']) dd.to_csv(self.FolderName + "acc_test" + ".csv", header=False, columns=None) # 记录误差值,L2,在每次画loss前更新(以防中期停止程序) def gapReocord(): R_variable['final_train_loss'] = R_variable['loss_train'][-1] R_variable['final_test_loss'] = R_variable['loss_test'][-1] R_variable['final_train_acc'] = R_variable['acc_train'][-1] R_variable['final_test_acc'] = R_variable['acc_test'][-1] # 储存误差 R_variable['loss_test'] = [] R_variable['loss_train'] = [] R_variable['acc_test'] = [] R_variable['acc_train'] = [] # 记时,创建新目录 self.t0 = time.time() self.FolderName = mk_newfolder() self.x = tf.placeholder(tf.float32, [None].extend(R_variable['graph_shape']), name='x') self.y0 = tf.placeholder(tf.float32, shape=[None ].extend(R_variable['label_shape']), name='y0') dataset = tf.data.Dataset.from_tensor_slices((self.x, self.y0)) dataset = dataset.shuffle(20).batch(R_variable['batch_size']).repeat() itetator = dataset.make_initializable_iterator() data_element = itetator.get_next() def weight_variable(shape, name=None): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial, name=name) def bias_variable(shape, name=None): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial, name=name) def activation_fun(x, name=None): if (R_variable['ActFuc'] == 'relu'): z = tf.nn.relu(x, name=name) if (R_variable['ActFuc'] == 'tanh'): z = tf.nn.tanh(x, name=name) if (R_variable['ActFuc'] == 'srelu'): z = tf.nn.relu(-(x - 1)) * tf.nn.relu(x) return z # 全连接层 x_flat = tf.reshape( self.x, [-1, R_variable['graph_shape'][0] * R_variable['graph_shape'][1]], name='x_flat') W_fc1 = weight_variable( [R_variable['graph_shape'][0] * R_variable['graph_shape'][1], 800], name='W_fc1') b_fc1 = bias_variable([800], name='b_fc1') h_fc1 = activation_fun(tf.matmul(x_flat, W_fc1) + b_fc1, name='h_fc1') # 全连接层 W_fc2 = weight_variable([800, 800], name='W_fc2') b_fc2 = bias_variable([800], name='b_fc2') h_fc2 = activation_fun(tf.matmul(h_fc1, W_fc2) + b_fc2, name='h_fc2') # 全连接层 W_fc3 = weight_variable([800, 512], name='W_fc3') b_fc3 = bias_variable([512], name='b_fc3') h_fc3 = activation_fun(tf.matmul(h_fc2, W_fc3) + b_fc3, name='h_fc3') # 全连接层 W_fc4 = weight_variable([512, 64], name='W_fc4') b_fc4 = bias_variable([64], name='b_fc4') h_fc4 = activation_fun(tf.matmul(h_fc3, W_fc4) + b_fc4, name='h_fc4') # softmax W_fc5 = weight_variable([64, 10], name='W_fc5') b_fc5 = bias_variable([10], name='b_fc3') y_pre = tf.add(tf.matmul(h_fc4, W_fc5), b_fc5, name='y_pre') self.y = tf.nn.softmax(y_pre, name='y') # loss func self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.y, labels=self.y0)) # train aim self.train = tf.train.AdamOptimizer( learning_rate=R_variable['learning_rate']).minimize( self.cross_entropy) # accuracy self.result = tf.equal(tf.argmax(self.y, 1), tf.argmax(self.y0, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.result, tf.float32)) config = tf.ConfigProto() config.gpu_options.allow_growth = True #服务器跑,可忽略 self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) self.sess.run(itetator.initializer, feed_dict={ self.x: R_variable['train_inputs'], self.y0: R_variable['y_true_train'] }) for e in range(R_variable['epoch']): for s in range(R_variable['batch_num']): finished_batch = e * R_variable['batch_num'] + s + 1 # training x_batch, y_batch = self.sess.run(data_element) self.sess.run(self.train, feed_dict={ self.x: x_batch, self.y0: y_batch }) acc_Test, loss_Test = self.sess.run( [self.accuracy, self.cross_entropy], feed_dict={ self.x: R_variable['test_inputs'], self.y0: R_variable['y_true_test'] }) if (acc_Test >= R_variable['breakstandard']): R_variable['uesd batch'] = finished_batch R_variable['uesd time'] = time.time() - self.t0 R_variable['flag'] = 1 break if s % 1000 == 0: acc_Train, loss_Train = self.sess.run( [self.accuracy, self.cross_entropy], feed_dict={ self.x: x_batch, self.y0: y_batch }) acc_Test, loss_Test = self.sess.run( [self.accuracy, self.cross_entropy], feed_dict={ self.x: R_variable['test_inputs'], self.y0: R_variable['y_true_test'] }) # R_variable['loss_train'].append(loss_Train) # R_variable['loss_test'].append(loss_Test) batch_needed = R_variable['epoch'] * R_variable[ 'batch_num'] - finished_batch round_time = time.time() R_variable['use_time'] = round_time - self.t0 time_needed = (round_time - self.t0) / finished_batch * batch_needed print( "In epoch: %d, step: %d, Train accuracy is: %3.3f, Train loss is: %3.3f" % (e + 1, s, acc_Train, loss_Train)) print("Test accuracy is: %3.3f, Test loss is: %3.3f" % (acc_Test, loss_Test)) print( "The program have been running for %ds, still need %ds" % (round_time - self.t0, time_needed)) # savefile() # gapReocord() acc_Train, loss_Train = self.sess.run( [self.accuracy, self.cross_entropy], feed_dict={ self.x: R_variable['train_inputs'], self.y0: R_variable['y_true_train'] }) acc_Test, loss_Test = self.sess.run( [self.accuracy, self.cross_entropy], feed_dict={ self.x: R_variable['test_inputs'], self.y0: R_variable['y_true_test'] }) R_variable['loss_train'].append(loss_Train) R_variable['loss_test'].append(loss_Test) R_variable['acc_train'].append(acc_Train) R_variable['acc_test'].append(acc_Test) savefile() gapReocord() plotloss() plotacc() if (R_variable['flag'] == 1): break print("Program ends. ") print("Train accuracy is: %3.3f, Train loss is: %3.3f" % (acc_Train, loss_Train)) print("Test accuracy is: %3.3f, Test loss is: %3.3f" % (acc_Test, loss_Test)) print("The program have been running for %ds." % (round_time - self.t0))
from get_coordinates import get_coordinates from PIL import Image import pandas as pd from show_image import show_image_objects import os from kerasretinanet.keras_retinanet import models from kerasretinanet.keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image from kerasretinanet.keras_retinanet.utils.visualization import draw_box, draw_caption from kerasretinanet.keras_retinanet.utils.colors import label_color import cv2 import matplotlib.pyplot as plt import numpy as np #some fixes so we can train model import tensorflow.compat.v1 as tf1 config = tf1.ConfigProto() config.gpu_options.allow_growth = True session = tf1.InteractiveSession(config=config) #prepare test pictures and annotations pic_list = [ p for p in pathlib.Path('AWEForSegmentation/testannot_rect').iterdir() if p.is_file() ] dataset = dict() dataset['img_name'] = list() dataset['x_min'] = list() dataset['y_min'] = list() dataset['x_max'] = list() dataset['y_max'] = list() dataset['class_name'] = list()
def train(params): """Entry point for training.""" with gfile.GFile(params.data_path, 'rb') as finp: x_train, x_valid, x_test, _, _ = pickle.load(finp) print('-' * 80) print('train_size: {0}'.format(np.size(x_train))) print('valid_size: {0}'.format(np.size(x_valid))) print(' test_size: {0}'.format(np.size(x_test))) g = tf.Graph() with g.as_default(): tf.random.set_random_seed(2126) ops = get_ops(params, x_train, x_valid, x_test) run_ops = [ ops['train_loss'], ops['grad_norm'], ops['learning_rate'], ops['should_reset'], ops['moving_avg_started'], ops['train_op'], ] saver = tf.train.Saver(max_to_keep=2) checkpoint_saver_hook = tf.train.CheckpointSaverHook( params.output_dir, save_steps=params.num_train_batches, saver=saver) hooks = [checkpoint_saver_hook] config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.train.SingularMonitoredSession( config=config, hooks=hooks, checkpoint_dir=params.output_dir) accum_loss = 0. accum_step = 0 best_valid_ppl = [] start_time = time.time() while True: try: loss, gn, lr, should_reset, moving_avg_started, _ = sess.run( run_ops) accum_loss += loss accum_step += 1 step = sess.run(ops['global_step']) if step % params.log_every == 0: epoch = step // params.num_train_batches train_ppl = np.exp(accum_loss / accum_step) mins_so_far = (time.time() - start_time) / 60. log_string = 'epoch={0:<5d}'.format(epoch) log_string += ' step={0:<7d}'.format(step) log_string += ' ppl={0:<10.2f}'.format(train_ppl) log_string += ' lr={0:<6.3f}'.format(lr) log_string += ' |g|={0:<6.3f}'.format(gn) log_string += ' avg={0:<2d}'.format(moving_avg_started) log_string += ' mins={0:<.2f}'.format(mins_so_far) print(log_string) if moving_avg_started: sess.run(ops['update_moving_avg']) # if step % params.num_train_batches == 0: if should_reset: sess.run(ops['reset_batch_states']) accum_loss = 0 accum_step = 0 valid_ppl = ops['eval_valid']( sess, use_moving_avg=moving_avg_started) sess.run( [ops['reset_batch_states'], ops['reset_start_idx']]) if (not moving_avg_started and len(best_valid_ppl) > params.best_valid_ppl_threshold and valid_ppl > min(best_valid_ppl[:-params. best_valid_ppl_threshold])): print('Starting moving_avg') sess.run(ops['start_moving_avg']) best_valid_ppl.append(valid_ppl) if step >= params.num_train_steps: ops['eval_test'](sess, use_moving_avg=moving_avg_started) break except tf.errors.InvalidArgumentError: last_checkpoint = tf.train.latest_checkpoint(params.output_dir) print('rolling back to previous checkpoint {0}'.format( last_checkpoint)) saver.restore(sess, last_checkpoint) accum_loss, accum_step = 0., 0 sess.close()
def build_graph(bert_config, opts, iterations_per_step=1, is_training=True, feed_name=None): """Build the graph for training. Args: bert_config: configuration for the BERT model. opts: a dictionary containing all global options. iterations_per_step: number of iterations per step is_training (bool): if true return a graph with trainable variables. feed_name: name of the IPU infeed. Returns: a GraphOps containing a BERT graph and session prepared for inference or training. """ train_graph = tf.Graph() with train_graph.as_default(): placeholders = dict() placeholders['learning_rate'] = tf.placeholder(bert_config.dtype, shape=[]) learning_rate = placeholders['learning_rate'] train_iterator = ipu.ipu_infeed_queue.IPUInfeedQueue( dataset.data(opts, is_training=is_training), feed_name=feed_name + "_in", replication_factor=opts['replicas']) outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue( feed_name=feed_name + "_out", replication_factor=opts['replicas']) with ipu.scopes.ipu_scope('/device:IPU:0'): train = training_step_with_infeeds_and_outfeeds( bert_config, train_iterator, outfeed_queue, opts, learning_rate, iterations_per_step, is_training=is_training) outfeed = outfeed_queue.dequeue() bert_logging.print_trainable_variables(opts['logs_path']) model_variables = tf.trainable_variables() + tf.get_collection( tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES) model_and_optimiser_variables = tf.global_variables() restore = tf.train.Saver( var_list=model_and_optimiser_variables if opts['restore_optimiser_from_ckpt'] else model_variables) # We store two savers: one for the standard training and another one for the best checkpoint savers = { "train_saver": tf.train.Saver(var_list=model_variables if opts['ckpt_model_only'] else model_and_optimiser_variables, name='latest', max_to_keep=5), "best_saver": tf.train.Saver(var_list=model_variables if opts['ckpt_model_only'] else model_and_optimiser_variables, name='best', max_to_keep=1) } ipu.utils.move_variable_initialization_to_cpu() train_init = tf.global_variables_initializer() tvars = tf.trainable_variables() # Calculate number of IPUs required for pretraining pipeline. num_embedding_ipu = { 'two_ipus': 2, 'same_ipu': 1, 'same_as_hidden_layers': 0 }[opts['embeddings_placement']] num_hidden_layer_stages = len(bert_config.hidden_layers_per_stage) num_ipus_required = opts['replicas'] * next_power_of_two( num_hidden_layer_stages + num_embedding_ipu) # Configure the IPU options. ipu_options = get_ipu_config( fp_exceptions=opts["fp_exceptions"], stochastic_rounding=opts['stochastic_rounding'], xla_recompute=opts["xla_recompute"], available_memory_proportion=opts['available_memory_proportion'], disable_graph_outlining=opts["disable_graph_outlining"], num_ipus_required=num_ipus_required, max_cross_replica_sum_buffer_size=opts[ 'max_cross_replica_sum_buffer_size'], scheduler_selection=opts['scheduler'], compile_only=opts['compile_only'], partials_type=opts['partials_type']) ipu.utils.configure_ipu_system(ipu_options) train_sess = tf.Session(graph=train_graph, config=tf.ConfigProto()) return GraphOps(train_graph, train_sess, train_init, [train], placeholders, train_iterator, outfeed, savers, restore, tvars)
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) np.set_printoptions(threshold=np.inf, linewidth=10000) flags = vars(FLAGS) for key in sorted(flags.keys()): tf.logging.info('%s = %s', key, flags[key]) # Start a new TensorFlow session. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True #config.log_device_placement = False sess = tf.InteractiveSession(config=config) label_file = os.path.join(os.path.dirname(FLAGS.start_checkpoint), "vgg_labels.txt") fid = open(label_file) labels = [] for line in fid: labels.append(line.rstrip()) label_count = len(labels) fid.close() model_settings = models.prepare_model_settings( label_count, FLAGS.sample_rate, FLAGS.nchannels, FLAGS.clip_duration_ms, FLAGS.representation, FLAGS.window_size_ms, FLAGS.window_stride_ms, 1, FLAGS.dct_coefficient_count, FLAGS.filterbank_channel_count, [int(x) for x in FLAGS.filter_counts.split(',')], [int(x) for x in FLAGS.filter_sizes.split(',')], FLAGS.final_filter_len, FLAGS.dropout_prob, FLAGS.batch_size, FLAGS.dilate_after_layer, FLAGS.stride_after_layer, FLAGS.connection_type) fingerprint_size = model_settings['fingerprint_size'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') hidden, logits = models.create_model(fingerprint_input, model_settings, FLAGS.model_architecture, is_training=False) tf.global_variables_initializer().run() models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) total_parameters = 0 for variable in tf.trainable_variables(): shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= int(dim) total_parameters += variable_parameters tf.logging.info('number of trainable parameters: %d', total_parameters) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.labels_touse.split(','), FLAGS.validation_percentage, FLAGS.validation_offset_percentage, FLAGS.validation_files.split(','), FLAGS.testing_percentage, FLAGS.testing_files.split(','), FLAGS.subsample_skip, FLAGS.subsample_word, FLAGS.partition_word, FLAGS.partition_n, FLAGS.partition_training_files.split(','), FLAGS.partition_validation_files.split(','), -1, FLAGS.testing_equalize_ratio, FLAGS.testing_max_samples, model_settings) testing_set_size = audio_processor.set_size('testing') for isample in xrange(0, testing_set_size, FLAGS.batch_size): fingerprints, _, samples = (audio_processor.get_data( FLAGS.batch_size, isample, model_settings, 0.0, 0.0, 0.0 if FLAGS.time_shift_random else time_shift_samples, FLAGS.time_shift_random, 'testing', sess)) needed = FLAGS.batch_size - fingerprints.shape[0] if needed > 0: fingerprints = np.append(fingerprints, np.repeat(fingerprints[[0], :], needed, axis=0), axis=0) for _ in range(needed): samples.append(samples[0]) logit_vals, hidden_vals = sess.run([logits, hidden], feed_dict={ fingerprint_input: fingerprints, }) batch_size = min(FLAGS.batch_size, testing_set_size - isample) obtained = FLAGS.batch_size - needed if isample == 0: samples_data = [None] * testing_set_size samples_data[isample:isample + obtained] = samples[:obtained] if FLAGS.save_activations: if isample == 0: activations = [] for ihidden in range(len(hidden_vals)): nHWC = np.shape(hidden_vals[ihidden])[1:] activations.append(np.empty((testing_set_size, *nHWC))) activations.append( np.empty((testing_set_size, np.shape(logit_vals)[1]))) for ihidden in range(len(hidden_vals)): activations[ihidden][isample:isample+obtained,:,:] = \ hidden_vals[ihidden][:obtained,:,:,:] activations[-1][isample:isample + obtained, :] = logit_vals[:obtained, :] if FLAGS.save_fingerprints: if isample == 0: nW = round((FLAGS.clip_duration_ms - FLAGS.window_size_ms) / \ FLAGS.window_stride_ms + 1) nH = round(np.shape(fingerprints)[1] / nW) input_layer = np.empty((testing_set_size, nW, nH)) input_layer[isample:isample+obtained,:,:] = \ np.reshape(fingerprints[:obtained,:],(obtained,nW,nH)) if FLAGS.save_activations: np.savez(os.path.join(FLAGS.data_dir,'activations.npz'), \ *activations, samples=samples_data, labels=labels) if FLAGS.save_fingerprints: np.save(os.path.join(FLAGS.data_dir, 'fingerprints.npy'), input_layer)
def main(): args = parser.parse_args() enc = encoder.get_encoder(args.model_name) hparams = model.default_hparams() with open(os.path.join('..//models', args.model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if args.sample_length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) if args.model_name == '345M': args.memory_saving_gradients = True if args.optimizer == 'adam': args.only_train_transformer_layers = True config = tf.ConfigProto() config.gpu_options.allow_growth = True config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF with tf.Session(config=config) as sess: context = tf.placeholder(tf.int32, [args.batch_size, None]) context_in = randomize(context, hparams, args.noise) output = model.model(hparams=hparams, X=context_in) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) if args.val_every > 0: val_context = tf.placeholder(tf.int32, [args.val_batch_size, None]) val_output = model.model(hparams=hparams, X=val_context) val_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=val_context[:, 1:], logits=val_output['logits'][:, :-1])) val_loss_summary = tf.summary.scalar('val_loss', val_loss) tf_sample = sample.sample_sequence(hparams=hparams, length=args.sample_length, context=context, batch_size=args.batch_size, temperature=1.0, top_k=args.top_k, top_p=args.top_p) all_vars = [v for v in tf.trainable_variables() if 'model' in v.name] train_vars = [v for v in all_vars if '/h' in v.name ] if args.only_train_transformer_layers else all_vars if args.optimizer == 'adam': opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate) elif args.optimizer == 'sgd': opt = tf.train.GradientDescentOptimizer( learning_rate=args.learning_rate) else: exit('Bad optimizer:', args.optimizer) if args.accumulate_gradients > 1: if args.memory_saving_gradients: exit( "Memory saving gradients are not implemented for gradient accumulation yet." ) opt = AccumulatingOptimizer(opt=opt, var_list=train_vars) opt_reset = opt.reset() opt_compute = opt.compute_gradients(loss) opt_apply = opt.apply_gradients() summary_loss = tf.summary.scalar('loss', opt_apply) else: if args.memory_saving_gradients: opt_grads = memory_saving_gradients.gradients(loss, train_vars) else: opt_grads = tf.gradients(loss, train_vars) opt_grads = list(zip(opt_grads, train_vars)) opt_apply = opt.apply_gradients(opt_grads) summary_loss = tf.summary.scalar('loss', loss) summary_lr = tf.summary.scalar('learning_rate', args.learning_rate) summaries = tf.summary.merge([summary_lr, summary_loss]) summary_log = tf.summary.FileWriter( os.path.join(CHECKPOINT_DIR, args.run_name)) saver = tf.train.Saver(var_list=all_vars, max_to_keep=5, keep_checkpoint_every_n_hours=2) sess.run(tf.global_variables_initializer()) if args.restore_from == 'latest': ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.run_name)) if ckpt is None: # Get fresh GPT weights if new run. ckpt = tf.train.latest_checkpoint( os.path.join('..//models', args.model_name)) elif args.restore_from == 'fresh': ckpt = tf.train.latest_checkpoint( os.path.join('..//models', args.model_name)) else: ckpt = tf.train.latest_checkpoint(args.restore_from) print('Loading checkpoint', ckpt) saver.restore(sess, ckpt) print('Loading dataset...') chunks = load_dataset(enc, args.dataset, args.combine, encoding=args.encoding) data_sampler = Sampler(chunks) if args.val_every > 0: if args.val_dataset: val_chunks = load_dataset(enc, args.val_dataset, args.combine, encoding=args.encoding) else: val_chunks = chunks print('dataset has', data_sampler.total_size, 'tokens') print('Training...') if args.val_every > 0: # Sample from validation set once with fixed seed to make # it deterministic during training as well as across runs. val_data_sampler = Sampler(val_chunks, seed=1) val_batches = [[ val_data_sampler.sample(1024) for _ in range(args.val_batch_size) ] for _ in range(args.val_batch_count)] counter = 1 counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter') if os.path.exists(counter_path): # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r') as fp: counter = int(fp.read()) + 1 def save(): maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) print( 'Saving', os.path.join(CHECKPOINT_DIR, args.run_name, 'model-{}').format(counter)) saver.save(sess, os.path.join(CHECKPOINT_DIR, args.run_name, '..//model'), global_step=counter) with open(counter_path, 'w') as fp: fp.write(str(counter) + '\n') def generate_samples(): print('Generating samples...') context_tokens = data_sampler.sample(1) all_text = [] index = 0 while index < args.sample_num: out = sess.run( tf_sample, feed_dict={context: args.batch_size * [context_tokens]}) for i in range(min(args.sample_num - index, args.batch_size)): text = enc.decode(out[i]) text = '======== SAMPLE {} ========\n{}\n'.format( index + 1, text) all_text.append(text) index += 1 print(text) maketree(os.path.join(SAMPLE_DIR, args.run_name)) with open(os.path.join(SAMPLE_DIR, args.run_name, 'samples-{}').format(counter), 'w', encoding=args.encoding) as fp: fp.write('\n'.join(all_text)) def validation(): print('Calculating validation loss...') losses = [] for batch in tqdm.tqdm(val_batches): losses.append( sess.run(val_loss, feed_dict={val_context: batch})) v_val_loss = np.mean(losses) v_summary = sess.run(val_loss_summary, feed_dict={val_loss: v_val_loss}) summary_log.add_summary(v_summary, counter) summary_log.flush() print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'. format(counter=counter, time=time.time() - start_time, loss=v_val_loss)) def sample_batch(): return [data_sampler.sample(1024) for _ in range(args.batch_size)] avg_loss = (0.0, 0.0) start_time = time.time() try: while True: if counter % args.save_every == 0: save() if counter % args.sample_every == 0: generate_samples() if args.val_every > 0 and (counter % args.val_every == 0 or counter == 1): validation() if args.accumulate_gradients > 1: sess.run(opt_reset) for _ in range(args.accumulate_gradients): sess.run(opt_compute, feed_dict={context: sample_batch()}) (v_loss, v_summary) = sess.run((opt_apply, summaries)) else: (_, v_loss, v_summary) = sess.run( (opt_apply, loss, summaries), feed_dict={context: sample_batch()}) summary_log.add_summary(v_summary, counter) avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format(counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter += 1 except KeyboardInterrupt: print('interrupted') save()
def run_training(): """Train.""" with tf.Graph().as_default(): # Input images and labels. features = get_features(True, FLAGS.batch_size) model = f_model.multi_gpu_model print('so far so good!') result = model(features) # TODO(sasabour): merge jit scopes after jit scopes where enabled. merged = result['summary'] train_step = result['train'] # test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test') sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) saver = tf.train.Saver(max_to_keep=FLAGS.keep_ckpt) if tf.gfile.Exists(FLAGS.summary_dir + '/train'): ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir + '/train/') print(ckpt) if (not FLAGS.restart) and ckpt and ckpt.model_checkpoint_path: print('hesllo') saver.restore(sess, ckpt.model_checkpoint_path) prev_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('what??') tf.gfile.DeleteRecursively(FLAGS.summary_dir + '/train') tf.gfile.MakeDirs(FLAGS.summary_dir + '/train') prev_step = 0 else: tf.gfile.MakeDirs(FLAGS.summary_dir + '/train') prev_step = 0 train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train', sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: step = 0 for i in range(prev_step, FLAGS.max_steps): step += 1 summary, _ = sess.run([merged, train_step]) train_writer.add_summary(summary, i) if (i + 1) % FLAGS.checkpoint_steps == 0: saver.save(sess, os.path.join(FLAGS.summary_dir + '/train', 'model.ckpt'), global_step=i + 1) except tf.errors.OutOfRangeError: print('Done training for %d steps.' % step) finally: # When done, ask the threads to stop. coord.request_stop() train_writer.close() # Wait for threads to finish. coord.join(threads) sess.close()