def get_available_gpus(): """Get the list of usable GPUs""" with Session(config=ConfigProto(gpu_options=GPUOptions( per_process_gpu_memory_fraction=0.01, allow_growth=True))) as sess: tf.logging.set_verbosity(tf.logging.WARN) local_device_protos = device_lib.list_local_devices() devs = [x.name for x in local_device_protos if x.device_type == 'GPU'] return devs
def parallel_gpu_jobs(allow_growth=True, fraction=.5): '''Sets the max used memory as a fraction for tensorflow backend allow_growth :: True of False fraction :: a float value (e.g. 0.5 means 4gb out of 8gb) ''' from tensorflow.compat.v1 import GPUOptions, ConfigProto, Session from tensorflow.compat.v1 import keras as K gpu_options = GPUOptions(allow_growth=allow_growth, per_process_gpu_memory_fraction=fraction) config = ConfigProto(gpu_options=gpu_options) session = Session(config=config) K.backend.set_session(session)
def deconv_volume( vol: np.ndarray, psf: np.ndarray, deconvolver: tfd_restoration.RichardsonLucyDeconvolver, n_iter: int, observer: Optional[Callable] = None, ) -> np.ndarray: """perform RL deconvolution on volume vol using deconvolver Parameters ---------- vol : np.ndarray input volume psf : np.ndarray point spread function deconvolver : tfd_restoration.RichardsonLucyDeconvolver see init_rl_deconvolver n_iter : int number of RL iterations observer : Optional[Callable], optional NOT YET IMPLEMENTED observer callback so that progress updates for each iteration can be displayed. Also, add option to save intermediate results within a certain range of iterations.(the default is None) Returns ------- np.ndarray deconvolved volume """ # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.85) gpu_options = GPUOptions(allow_growth=True) config = ConfigProto(log_device_placement=False, gpu_options=gpu_options) aq = fd_data.Acquisition(data=vol, kernel=psf) if observer is not None: warnings.warn("Observer function for iteration not yet implemented.") result = deconvolver.run(aq, niter=n_iter, session_config=config) logger.debug(f"flowdec info: {result.info}") return result.data
def detect_face(self, Img, image_size): minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 margin = 44 gpu_memory_fraction = 1.0 print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = Session(config=ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): dir_model = "./align" pnet, rnet, onet = align.detect_face.create_mtcnn( sess, dir_model) Img_size = np.asarray(Img.shape)[0:2] bounding_boxes, _ = align.detect_face.detect_face( Img, minsize, pnet, rnet, onet, threshold, factor) faces = np.zeros( (len(bounding_boxes), image_size, image_size, 3), dtype="uint8") bb = np.zeros((len(bounding_boxes), 4), dtype=np.int32) for i in range(len(bounding_boxes)): det = np.squeeze(bounding_boxes[i, 0:4]) bb[i, 0] = np.maximum(det[0] - margin / 2, 0) bb[i, 1] = np.maximum(det[1] - margin / 2, 0) bb[i, 2] = np.minimum(det[2] + margin / 2, Img_size[1]) bb[i, 3] = np.minimum(det[3] + margin / 2, Img_size[0]) cropped = Img[bb[i, 1]:bb[i, 3], bb[i, 0]:bb[i, 2], :] img_cropped = Image.fromarray(cropped) img_aligned = img_cropped.resize((image_size, image_size), Image.BILINEAR) aligned_arr = np.asarray(img_aligned) faces[i, :, :, :] = cv2.cvtColor(aligned_arr, cv2.COLOR_BGR2RGB) return faces, bb
def test_dali_tf_op(pipe_type=CaffeReadPipeline, batch_size=16, iterations=32): test_batch = get_batch_dali(batch_size, pipe_type, tf.int32) try: from tensorflow.compat.v1 import GPUOptions from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import Session except ImportError: # Older TF versions don't have compat.v1 layer from tensorflow import GPUOptions from tensorflow import ConfigProto from tensorflow import Session gpu_options = GPUOptions(per_process_gpu_memory_fraction=0.5) config = ConfigProto(gpu_options=gpu_options) with Session(config=config) as sess: for i in range(iterations): imgs, labels = sess.run(test_batch) # Testing correctness of labels for label in labels: # labels need to be integers assert np.equal(np.mod(label, 1), 0).all() assert (label >= 0).all() assert (label <= 999).all()
def __init__(self): ''' dlib库对应的关键点模型 ''' # import pdb # pdb.set_trace() self.path = './face_models' # self.face_landmark_dlib = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat') # self.face_detector_dlib = dlib.get_frontal_face_detector() #set sess '''如果使用gpu,按需分配''' gpu_options = GPUOptions(allow_growth=True) session_config = ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) ''' 初始化人脸特征模型, 人脸检测模型,人脸关键点模型 ''' self.face_feature_sess = Session(graph=tf.Graph(), config=session_config) self.face_detection_sess = Session(graph=tf.Graph(), config=session_config) self.face_landmark_sess = Session(graph=tf.Graph(), config=session_config) self.face_attribute_sess = Session(graph=tf.Graph(), config=session_config) self.ff_pb_path = self.path + "/face_recognition_model.pb" self.init_feature_face() self.detect_pb_path = self.path + "/face_detection_model.pb" self.init_detection_face_tf() self.landmark_pb_path = self.path + "/landmark.pb" self.init_face_landmark_tf() self.attribute_pb_path = self.path + "/face_attribute.pb" self.init_face_attribute()
def get_tf_config(gpus=None, gpu_fraction=1, horovod=None, allow_parallel_threads=True): intra_op_parallelism_threads = 2 # defult in tensorflow inter_op_parallelism_threads = 5 # defult in tensorflow if not allow_parallel_threads: # this is needed for reproducibility intra_op_parallelism_threads = 1 inter_op_parallelism_threads = 1 if gpus is not None: if gpu_fraction > 0 and gpu_fraction < 1: # this is the source of freezing in tensorflow 1.3.1 gpu_options = GPUOptions( per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True) else: gpu_options = GPUOptions(allow_growth=True) # allow_growth=True is needed for a weird behavior with CUDA 10 # https://github.com/tensorflow/tensorflow/issues/24828 if isinstance(gpus, int): gpus = [gpus] gpu_options.visible_device_list = ','.join(str(g) for g in gpus) tf_config = ConfigProto( allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=intra_op_parallelism_threads, inter_op_parallelism_threads=inter_op_parallelism_threads, gpu_options=gpu_options ) else: tf_config = ConfigProto( allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=intra_op_parallelism_threads, inter_op_parallelism_threads=inter_op_parallelism_threads, gpu_options=GPUOptions(allow_growth=True) ) if horovod is not None: tf_config.gpu_options.visible_device_list = str(horovod.local_rank()) return tf_config
import tensorflow as tf import numpy as np import cv2 from tensorflow.keras.callbacks import (ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, TensorBoard) from yolov3_tf2.models import (YoloV3, YoloV3Tiny, YoloLoss, yolo_anchors, yolo_anchor_masks, yolo_tiny_anchors, yolo_tiny_anchor_masks) from yolov3_tf2.utils import freeze_all import yolov3_tf2.dataset as dataset # 慢慢使用GPU from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession from tensorflow.compat.v1 import GPUOptions gpu_options = GPUOptions(per_process_gpu_memory_fraction=0.6) config = ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) config.gpu_options.allow_growth = True session = InteractiveSession(config=config) flags.DEFINE_string('dataset', './data/voc2012_train.tfrecord', 'path to dataset') flags.DEFINE_string('val_dataset', './data/voc2012_val.tfrecord', 'path to validation dataset') flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny') flags.DEFINE_string('weights', './checkpoints/yolov3.tf', 'path to weights file') flags.DEFINE_string('classes', './data/hats_voc2012.names', 'path to classes file') flags.DEFINE_enum( 'mode', 'fit', ['fit', 'eager_fit', 'eager_tf'], 'fit: model.fit, '
def test_fw_iter(IteratorClass, args): iterator_name = IteratorClass.__module__ + "." + IteratorClass.__name__ print("Start testing {}".format(iterator_name)) sess = None daliop = None dali_train_iter = None images = [] labels = [] pipes = [ RN50Pipeline(batch_size=args.batch_size, num_threads=args.workers, device_id=n, num_gpus=args.gpus, data_paths=data_paths, prefetch=PREFETCH, fp16=args.fp16, nhwc=args.nhwc) for n in range(args.gpus) ] [pipe.build() for pipe in pipes] iters = args.iters if args.iters < 0: iters = pipes[0].epoch_size("Reader") assert (all(pipe.epoch_size("Reader") == iters for pipe in pipes)) iters_tmp = iters iters = iters // args.batch_size if iters_tmp != iters * args.batch_size: iters += 1 iters_tmp = iters iters = iters // args.gpus if iters_tmp != iters * args.gpus: iters += 1 if iterator_name == "nvidia.dali.plugin.tf.DALIIterator": daliop = IteratorClass() for dev in range(args.gpus): with tf.device('/gpu:%i' % dev): if args.fp16: out_type = tf.float16 else: out_type = tf.float32 image, label = daliop(pipeline=pipes[dev], shapes=[(args.batch_size, 3, 224, 224), ()], dtypes=[out_type, tf.int32]) images.append(image) labels.append(label) gpu_options = GPUOptions(per_process_gpu_memory_fraction=0.8) config = ConfigProto(gpu_options=gpu_options) sess = Session(config=config) end = time.time() for i in range(args.epochs): if i == 0: print("Warm up") else: print("Test run " + str(i)) data_time = AverageMeter() if iterator_name == "nvidia.dali.plugin.tf.DALIIterator": assert sess != None for j in range(iters): res = sess.run([images, labels]) data_time.update(time.time() - end) if j % args.print_freq == 0: print( "{} {}/ {}, avg time: {} [s], worst time: {} [s], speed: {} [img/s]" .format(iterator_name, j + 1, iters, data_time.avg, data_time.max_val, args.gpus * args.batch_size / data_time.avg)) end = time.time() else: dali_train_iter = IteratorClass(pipes, pipes[0].epoch_size("Reader")) j = 0 for it in iter(dali_train_iter): data_time.update(time.time() - end) if j % args.print_freq == 0: print( "{} {}/ {}, avg time: {} [s], worst time: {} [s], speed: {} [img/s]" .format(iterator_name, j + 1, iters, data_time.avg, data_time.max_val, args.gpus * args.batch_size / data_time.avg)) end = time.time() j = j + 1 if j > iters: break
def train(infer_func, params): image_width = params['image_width'] image_height = params['image_height'] image_format = params['image_format'] batch_size = params['batch_size'] distort_color = params['distort_color'] data_dir = params['data_dir'] data_idx_dir = params['data_idx_dir'] log_dir = params['log_dir'] precision = params['precision'] momentum = params['momentum'] learning_rate_init = params['learning_rate_init'] learning_rate_power = params['learning_rate_power'] weight_decay = params['weight_decay'] loss_scale = params['loss_scale'] larc_eta = params['larc_eta'] larc_mode = params['larc_mode'] num_iter = params['num_iter'] checkpoint_secs = params['checkpoint_secs'] display_every = params['display_every'] iter_unit = params['iter_unit'] dali_cpu = params['dali_cpu'] epoch_evaluation = params['epoch_evaluation'] use_xla = params['use_xla'] # Determinism is not fully supported by all TF ops. # Disabling until remaining wrinkles can be ironed out. deterministic = False if deterministic: tf.set_random_seed(2 * (1 + hvd.rank())) random.seed(3 * (1 + hvd.rank())) np.random.seed(2) log_dir = None if log_dir == "" else log_dir data_dir = None if data_dir == "" else data_dir data_idx_dir = None if data_idx_dir == "" else data_idx_dir global_batch_size = batch_size * hvd.size() if data_dir is not None: filename_pattern = os.path.join(data_dir, '%s-*') train_filenames = sorted(tf.gfile.Glob(filename_pattern % 'train')) num_training_samples = _get_num_records(train_filenames) else: num_training_samples = global_batch_size train_idx_filenames = None if data_idx_dir is not None: filename_pattern = os.path.join(data_idx_dir, '%s-*') train_idx_filenames = sorted(tf.gfile.Glob(filename_pattern % 'train')) if iter_unit.lower() == 'epoch': nstep = num_training_samples * num_iter // global_batch_size num_epochs = num_iter decay_steps = nstep else: nstep = num_iter num_epochs = max(nstep * global_batch_size // num_training_samples, 1) decay_steps = 90 * num_training_samples // global_batch_size nstep_per_epoch = num_training_samples // global_batch_size # Horovod: pin GPU to be used to process local rank (one GPU per process) gpu_options = GPUOptions(per_process_gpu_memory_fraction=0.7) config = ConfigProto(gpu_options=gpu_options) if use_xla: config.graph_options.optimizer_options.global_jit_level = ( tf.OptimizerOptions.ON_1) #config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) config.gpu_options.force_gpu_compatible = True # Force pinned memory config.intra_op_parallelism_threads = 1 # Avoid pool of Eigen threads config.inter_op_parallelism_threads = max(2, 40 // hvd.size() - 2) classifier = tf.estimator.Estimator( model_fn=_cnn_model_function, model_dir=log_dir, params={ 'model': infer_func, 'format': image_format, 'dtype': tf.float16 if precision == 'fp16' else tf.float32, 'momentum': momentum, 'learning_rate_init': learning_rate_init, 'learning_rate_power': learning_rate_power, 'decay_steps': decay_steps, 'weight_decay': weight_decay, 'loss_scale': loss_scale, 'larc_eta': larc_eta, 'larc_mode': larc_mode, 'deterministic': deterministic, 'n_classes': 1000, 'dali_cpu': dali_cpu, }, config=tf.estimator.RunConfig( tf_random_seed=2 * (1 + hvd.rank()) if deterministic else None, session_config=config, save_checkpoints_secs=checkpoint_secs if hvd.rank() == 0 else None, save_checkpoints_steps=nstep if hvd.rank() == 0 else None, keep_checkpoint_every_n_hours=3)) print("Training") if not deterministic: num_preproc_threads = 4 else: num_preproc_threads = 1 training_hooks = [ hvd.BroadcastGlobalVariablesHook(0), _PrefillStagingAreasHook() ] if hvd.rank() == 0: training_hooks.append( _LogSessionRunHook(global_batch_size, num_training_samples, display_every)) input_func = lambda: nvutils.image_set(train_filenames, batch_size, image_height, image_width, training=True, distort_color=distort_color, deterministic=deterministic, num_threads=num_preproc_threads, dali_cpu=dali_cpu, idx_filenames=train_idx_filenames) if epoch_evaluation: classifier_eval, eval_input_func, eval_steps = create_validation_estimator( infer_func, params) try: if epoch_evaluation: for i in range(num_epochs): classifier.train(input_fn=input_func, steps=nstep // num_epochs, hooks=training_hooks) if hvd.rank() == 0: eval_result = classifier_eval.evaluate( input_fn=eval_input_func, steps=eval_steps) print('epoch {} top1: {}%'.format( i, eval_result['top1_accuracy'] * 100)) print('epoch {} top5: {}%'.format( i, eval_result['top5_accuracy'] * 100)) else: classifier.train(input_fn=input_func, max_steps=nstep, hooks=training_hooks) except KeyboardInterrupt: print("Keyboard interrupt")
def train_and_eval(DIR, EPOCHS=20, BS=16, IMAGE_COUNT=139, VALIDATION_COUNT=134, learning_rate=0.05, beta=0.5, input_size=(64, 64, 32, 1)): # session setting """ os.environ['TF_CPP_MIN_LOG_LEVEL'] 0 = all messages are logged (default behavior) 1 = INFO messages are not printed 2 = INFO and WARNING messages are not printed 3 = INFO, WARNING, and ERROR messages are not printed """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' gpu_options = GPUOptions(per_process_gpu_memory_fraction=1.0) config = ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True session = InteractiveSession(config=config) print("Number of GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) ## program parameter BASE_DIR = DIR TRAIN_DIR_PATH = BASE_DIR + 'train/' VALIDATION_DIR_PATH = BASE_DIR + 'validation/' seed = 1 time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # create logs log_dir = "logs/fit/" + time_stamp tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) file_writer = tf.summary.create_file_writer(log_dir) ## training parameter loss_func = tversky_loss(beta) steps_per_epoch = 70 input_size = input_size ## construct training and validation set training_data = PPDataGenerator(TRAIN_DIR_PATH, batch_size=BS, image_size=64) validating_data = PPDataGenerator(VALIDATION_DIR_PATH, batch_size=BS, image_size=64) ## load model model = unet_norm(input_size=input_size, loss_func=loss_func, l_rate=learning_rate) print('#### Model loaded') ## training begin model.fit_generator(training_data, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validating_data, callbacks=[tensorboard_callback]) if not os.path.exists('./model/'): os.makedirs('./model/') model.save("model/UNet_%s.h5" % time_stamp) print("model saved at model/UNet_%s.h5" % time_stamp) text = 'UNet_%s.h5\n\ loss: weighted_dice %s\n\ learninf rate: %s\n\ image size: %s\n'\ %(time_stamp, beta,learning_rate,input_size) with open("./log.txt", "a") as myfile: myfile.write(text) ## prediction begin TP_sum, FP_sum, FN_sum, TP_P_sum, FP_P_sum, FN_P_sum = predict_folder( model, '%stest/' % BASE_DIR, save_mode=4, save_dir='./result/%s' % (time_stamp)) eval_precision = divide(TP_sum, TP_sum + FP_sum) eval_recall = divide(TP_sum, TP_sum + FN_sum) eval_precision_P = divide(TP_P_sum, TP_P_sum + FP_P_sum) eval_recall_P = divide(TP_P_sum, TP_P_sum + FN_P_sum) text = 'Evaluation result: %s\n\ TP : %s\n\ FP : %s\n\ FN : %s\n\ Recall by pixel: %s\n\ Precision by pixel: %s\n\ Recall by area: %s\n\ Precision by area: %s\n\n\n'\ %(time_stamp, TP_sum, FP_sum, FN_sum, eval_recall_P, eval_precision_P, eval_recall, eval_precision) with open("./log.txt", "a") as myfile: myfile.write(text) file_writer.close() InteractiveSession.close(session)
def train_and_eval(params): # session setting """ os.environ['TF_CPP_MIN_LOG_LEVEL'] 0 = all messages are logged (default behavior) 1 = INFO messages are not printed 2 = INFO and WARNING messages are not printed 3 = INFO, WARNING, and ERROR messages are not printed """ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' gpu_options = GPUOptions(per_process_gpu_memory_fraction=1.0) config = ConfigProto(gpu_options=gpu_options) config.gpu_options.allow_growth = True session = InteractiveSession(config=config) print("Number of GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) ## program parameter BASE_DIR = params[0] TRAIN_DIR_PATH = BASE_DIR + 'train/' VALIDATION_DIR_PATH = BASE_DIR + 'validation/' seed = params[1] time_stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_dir = params[2] + time_stamp tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) file_writer = tf.summary.create_file_writer(log_dir) ## training parameter loss_func = params[3] input_size = params[4] steps_per_epoch = params[5] EPOCHS = params[6] BS = params[7] IMAGE_COUNT = params[8] VALIDATION_COUNT = params[9] learning_rate = params[10] ## construct training and validation set training_data = DataGenerator(TRAIN_DIR_PATH, batch_size=BS, image_size=input_size[0]) validating_data = DataGenerator(VALIDATION_DIR_PATH, batch_size=BS, image_size=input_size[0]) ## load model model = unet(input_size=input_size, loss_func=loss_func, l_rate=learning_rate) model.summary() print('#### Model loaded') ## training begin model.fit_generator(training_data, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, validation_data=validating_data, callbacks=[tensorboard_callback]) if not os.path.exists('./model/'): os.makedirs('./model/') model.save("model/UNet_%s.h5" % time_stamp) print("model saved at model/UNet_%s.h5" % time_stamp) text = 'UNet_%s.h5\n\ Learning rate: %s\n\ Image size: %s\n\ Epoch: %s\n\ Batch size: %s\n\ Step per epoch: %s\n'\ %(time_stamp, learning_rate, input_size, steps_per_epoch, BS, EPOCHS) with open("./log.txt", "a") as myfile: myfile.write(text) file_writer.close() InteractiveSession.close(session) ## prediction begin predict_folder(model, '%stest/' % BASE_DIR, save_dir='./result/%s' % (time_stamp)) InteractiveSession.close(session)