def main(argv=()): del argv # Unused. hvd.init() #logging.basicConfig(level=logging.ERROR) model_class = import_symbol(FLAGS.model_name) # Multiply the task number by a value large enough that tasks starting at a # similar time cannot end up with the same seed. seed = int(time.time() + hvd.rank() * 3600 * 24) random.seed(seed) if hvd.rank() == 0: # logging.basicConfig(level=logging.INFO) # not for tflogging logging.set_verbosity(logging.INFO) # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' logging.info('Rank: %d / %d' % (hvd.rank(), hvd.size())) logging.info('Random seed: %r', seed) logging.info('Learning rate: %r', get_learning_rate(1,FLAGS.batch_size)) logging.info('Batch size: %d', FLAGS.batch_size) try: logging.info('Python version: {}'.format(sys.version)) logging.info('numpy version: {}'.format(np.__version__)) logging.info('tensorflow version: {}'.format(tf.__version__)) logging.info('horovod version: {}'.format(hvd.__version__)) import mkl logging.info('MKL: {}'.format(mklstr)) mklstr = mkl.get_version_string() except: pass train_ffn(model_class, batch_size=FLAGS.batch_size, **json.loads(FLAGS.model_args))
def main(argv=()): del argv # Unused. model_class = import_symbol(FLAGS.model_name) # Multiply the task number by a value large enough that tasks starting at a # similar time cannot end up with the same seed. seed = int(time.time() + FLAGS.task * 3600 * 24) logging.info('Random seed: %r', seed) random.seed(seed) train_ffn(model_class, batch_size=FLAGS.batch_size, **json.loads(FLAGS.model_args))
def main(argv=()): del argv # Unused. model_class = import_symbol(FLAGS.model_name) # Multiply the task number by a value large enough that tasks starting at a # similar time cannot end up with the same seed. seed = int(time.time() + FLAGS.task * 3600 * 24) logging.info('Random seed: %r', seed) random.seed(seed) # added by julien. forces tf to use a single GPU by making the other one invisible os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device_ID) train_ffn(model_class, batch_size=FLAGS.batch_size, **json.loads(FLAGS.model_args))
def main(argv=()): del argv # Unused. hvd.init() #initialize horovod here if hvd.rank() == 0: print("---number of horovod ranks: ", hvd.size()) if FLAGS.do_benchmark_test == 1: print("---this is a benchmark run") model_class = import_symbol(FLAGS.model_name) # Multiply the task number by a value large enough that tasks starting at a # similar time cannot end up with the same seed. seed = int(time.time() + hvd.rank() * 3600 * 24) if hvd.rank() == 0: logging.info('Random seed: %r', seed) random.seed(seed) if hvd.rank() == 0: print("train dir:", FLAGS.train_dir) train_ffn(model_class, batch_size=FLAGS.batch_size, **json.loads(FLAGS.model_args))
def main(unused_argv): model_class = import_symbol(FLAGS.model_name, 'em_mask') model_args = json.loads(FLAGS.model_args) fov_size = tuple([int(i) for i in model_args['fov_size']]) if FLAGS.input_offset and FLAGS.input_size: input_offset = np.array( [int(i) for i in FLAGS.input_offset.split(',')]) input_size = np.array([int(i) for i in FLAGS.input_size.split(',')]) else: input_offset, input_size = precomputed_utils.get_offset_and_size( FLAGS.input_volume) if 'label_size' in model_args: label_size = tuple([int(i) for i in model_args['label_size']]) else: label_size = fov_size model_args['label_size'] = label_size input_mip = FLAGS.input_mip input_cv = CloudVolume('file://%s' % FLAGS.input_volume, mip=FLAGS.input_mip) resolution = input_cv.meta.resolution(FLAGS.input_mip) overlap = [int(i) for i in FLAGS.overlap] num_bbox = precomputed_utils.get_num_bbox(input_offset, input_size, fov_size, overlap) logging.warning('num bbox: %s', num_bbox) num_classes = int(model_args['num_classes']) params = { 'model_class': model_class, 'model_args': model_args, 'batch_size': FLAGS.batch_size, 'num_classes': num_classes } mask_estimator = prepare_model(params, FLAGS.model_checkpoint, FLAGS.use_gpu) tensors_to_log = {"center": "center"} logging_hook = tf.compat.v1.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1) predictions = mask_estimator.predict( input_fn=lambda: precomputed_utils.predict_input_fn_precomputed( input_volume=FLAGS.input_volume, input_offset=input_offset, input_size=input_size, input_mip=input_mip, chunk_shape=fov_size, label_shape=label_size, overlap=overlap, batch_size=FLAGS.batch_size, offset=FLAGS.image_mean, scale=FLAGS.image_stddev, var_threshold=FLAGS.var_threshold), predict_keys=['center', 'logits', 'class_prediction'], # hooks=[logging_hook], hooks=[], yield_single_examples=False) _ = precomputed_utils.writer(predictions, output_volume=FLAGS.output_volume, output_offset=input_offset, output_size=input_size, chunk_shape=fov_size, label_shape=label_size, resolution=resolution, overlap=overlap, num_iter=num_bbox // mpi_size // FLAGS.batch_size)
def main(unused_argv): hvd.init() model_class = import_symbol(FLAGS.model_name, 'em_mask') model_args = json.loads(FLAGS.model_args) fov_size = tuple([int(i) for i in model_args['fov_size']]) if 'label_size' in model_args: label_size = tuple([int(i) for i in model_args['label_size']]) else: label_size = fov_size model_args['label_size'] = label_size num_classes = int(model_args['num_classes']) if num_classes == 1: # model_fn = model_utils.mask_model_fn_regression model_fn = model_utils.mask_model_fn_binary else: model_fn = model_utils.mask_model_fn_classfication params = { 'model_class': model_class, 'model_args': model_args, 'batch_size': FLAGS.batch_size, 'num_classes': num_classes, 'learning_rate': FLAGS.learning_rate, 'weighted': FLAGS.weighted } gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') sess_config = tf.compat.v1.ConfigProto() sess_config.gpu_options.allow_growth = True sess_config.gpu_options.visible_device_list = str(hvd.local_rank()) model_dir = FLAGS.train_dir if hvd.rank() == 0 else None save_summary_steps = 90 if hvd.rank() == 0 else None save_checkpoints_secs = 540 if hvd.rank() == 0 else None config = tf.estimator.RunConfig( model_dir=model_dir, save_summary_steps=save_summary_steps, save_checkpoints_secs=save_checkpoints_secs, session_config=sess_config, keep_checkpoint_max=10, ) mask_estimator = tf.estimator.Estimator(model_fn=model_fn, config=config, params=params) bcast_hook = hvd.BroadcastGlobalVariablesHook(0) if FLAGS.weights_volumes: input_fn = io_utils.train_input_fn_with_weight( FLAGS.data_volumes, FLAGS.label_volumes, FLAGS.weights_volumes, FLAGS.tf_coords, num_classes, fov_size, label_size, FLAGS.batch_size, FLAGS.image_mean, FLAGS.image_stddev, FLAGS.rotation) else: # input_fn = io_utils.train_input_fn( # FLAGS.data_volumes, # FLAGS.label_volumes, # FLAGS.tf_coords, # num_classes, # fov_size, # label_size, # FLAGS.batch_size, # FLAGS.image_mean, # FLAGS.image_stddev, # FLAGS.rotation) # input_fn = io_utils.train_input_rebalance_fn( input_fn = io_utils.train_input_mult_fn( FLAGS.data_volumes, FLAGS.label_volumes, FLAGS.tf_coords, num_classes, fov_size, label_size, FLAGS.batch_size, FLAGS.image_mean, FLAGS.image_stddev, FLAGS.rotation, False) mask_estimator.train(input_fn=input_fn, steps=FLAGS.max_steps, hooks=[bcast_hook])