def run(experiment_id, restore_path, config_file, bit, unquant_layers): if config_file is None and experiment_id is None: raise Exception("config_file or experiment_id are required") if experiment_id: environment.init(experiment_id) config = config_util.load_from_experiment() if config_file: config = config_util.merge(config, config_util.load(config_file)) if restore_path is None: restore_file = executor.search_restore_filename( environment.CHECKPOINTS_DIR) restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file) if not os.path.exists("{}.index".format(restore_path)): raise Exception( "restore file {} dont exists.".format(restore_path)) else: experiment_id = "profile" environment.init(experiment_id) config = config_util.load(config_file) executor.init_logging(config) config_util.display(config) _profile(config, restore_path, bit, unquant_layers)
def run(experiment_id, restore_path=None, image_size=(None, None), image=DEFAULT_INFERENCE_TEST_DATA_IMAGE, config_file=None): environment.init(experiment_id) config = config_util.load_from_experiment() if config_file: config = config_util.merge(config, config_util.load(config_file)) config.BATCH_SIZE = 1 config.NETWORK.BATCH_SIZE = 1 config.DATASET.BATCH_SIZE = 1 if list(image_size) != [None, None]: config.IMAGE_SIZE = list(image_size) config.NETWORK.IMAGE_SIZE = list(image_size) # override pre processes image size. if config.PRE_PROCESSOR: config.PRE_PROCESSOR.set_image_size(image_size) # override post processes image size. if config.POST_PROCESSOR: config.POST_PROCESSOR.set_image_size(image_size) print("Override IMAGE_SIZE", config.IMAGE_SIZE) executor.init_logging(config) config_util.display(config) return _export(config, restore_path, image)
def run_server(server_info, experiment_id, config_file, restore_path): environment.init(experiment_id) if config_file is None: config = config_util.load_from_experiment() else: config = config_util.merge(config, config_util.load(config_file)) if restore_path is None: restore_file = search_restore_filename(environment.CHECKPOINTS_DIR) restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file) inference_model = Inference(config, restore_path) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(server_info) s.listen(32) print("boot: {}:{}".format(*server_info)) while True: client_conn, client_addr = s.accept() print("\033[Kfrom: {}:{}".format(*client_addr), end="\r") try: th = threading.Thread(target=receive_and_send, args=(client_conn, inference_model), daemon=True) th.start() # th.join() # receive_and_send(client_conn, inference_model) except BrokenPipeError: print("Send data aborted!") pass
def run(config_file, tunable_id, local_dir): register_trainable(tunable_id, TrainTunable) lm_config = config_util.load(config_file) def easydict_to_dict(config): if isinstance(config, EasyDict): config = dict(config) for key, value in config.items(): if isinstance(value, EasyDict): value = dict(value) easydict_to_dict(value) config[key] = value return config tune_space = easydict_to_dict(lm_config['TUNE_SPACE']) tune_spec = easydict_to_dict(lm_config['TUNE_SPEC']) tune_spec['run'] = tunable_id tune_spec['config'] = {'lm_config': os.path.join(os.getcwd(), config_file)} tune_spec['local_dir'] = local_dir tune_spec['trial_name_creator'] = ray.tune.function(trial_str_creator) # Expecting use of gpus to do parameter search ray.init(num_cpus=multiprocessing.cpu_count() // 2, num_gpus=max(get_num_gpu(), 1)) algo = HyperOptSearch(tune_space, max_concurrent=4, reward_attr="mean_accuracy") scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="mean_accuracy", max_t=200) trials = run_experiments(experiments={'exp_tune': tune_spec}, search_alg=algo, scheduler=scheduler) print("The best result is", get_best_result(trials, metric="mean_accuracy", param='config'))
def _setup(self, config): self.lm_config = config_util.load(self.config['lm_config']) executor.init_logging(self.lm_config) model_class = self.lm_config.NETWORK_CLASS network_kwargs = {key.lower(): val for key, val in self.lm_config.NETWORK.items()} network_kwargs = update_parameters_for_each_trial(network_kwargs, self.config) # No distributed training was implemented, therefore rank set to 0 self.train_dataset = setup_dataset(self.lm_config, "train", 0) self.validation_dataset = setup_dataset(self.lm_config, "validation", 0) if model_class.__module__.startswith("lmnet.networks.object_detection"): model = model_class( classes=self.train_dataset.classes, num_max_boxes=self.train_dataset.num_max_boxes, is_debug=self.lm_config.IS_DEBUG, **network_kwargs, ) elif model_class.__module__.startswith("lmnet.networks.segmentation"): model = model_class( classes=self.train_dataset.classes, label_colors=self.train_dataset.label_colors, is_debug=self.lm_config.IS_DEBUG, **network_kwargs, ) else: model = model_class( classes=self.train_dataset.classes, is_debug=self.lm_config.IS_DEBUG, **network_kwargs, ) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.is_training_placeholder = tf.placeholder(tf.bool, name="is_training_placeholder") self.images_placeholder, self.labels_placeholder = model.placeholders() output = model.inference(self.images_placeholder, self.is_training_placeholder) if model_class.__module__.startswith("lmnet.networks.object_detection"): loss = model.loss(output, self.labels_placeholder, self.is_training_placeholder) else: loss = model.loss(output, self.labels_placeholder) opt = model.optimizer(self.global_step) train_op = model.train(loss, opt, self.global_step) metrics_ops_dict, metrics_update_op = model.metrics(output, self.labels_placeholder) self.train_op = train_op self.metrics_ops_dict = metrics_ops_dict self.metrics_update_op = metrics_update_op init_op = tf.global_variables_initializer() self.reset_metrics_op = tf.local_variables_initializer() session_config = tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=True)) self.sess = tf.Session(config=session_config) self.sess.run([init_op, self.reset_metrics_op]) self.iterations = 0 self.saver = tf.train.Saver()
def test_build_tfds_classification(): environment.setup_test_environment() # Build TFDS Dataset config_file = "tests/fixtures/configs/for_build_tfds_classification.py" run(config_file, overwrite=True) # Check if the builded dataset can be loaded with the same config file expriment_id = "tfds_classification" train_run(None, None, config_file, expriment_id, recreate=True) # Check if the dataset was build correctly train_data_num = 3 validation_data_num = 2 config = config_util.load(config_file) train_dataset = setup_dataset(TFDSClassification, subset="train", batch_size=config.BATCH_SIZE, pre_processor=config.PRE_PROCESSOR, **config.DATASET.TFDS_KWARGS) validation_dataset = setup_dataset(TFDSClassification, subset="validation", batch_size=config.BATCH_SIZE, pre_processor=config.PRE_PROCESSOR, **config.DATASET.TFDS_KWARGS) assert train_dataset.num_per_epoch == train_data_num assert validation_dataset.num_per_epoch == validation_data_num for _ in range(train_data_num): images, labels = train_dataset.feed() assert isinstance(images, np.ndarray) assert images.shape[0] == config.BATCH_SIZE assert images.shape[1] == config.IMAGE_SIZE[0] assert images.shape[2] == config.IMAGE_SIZE[1] assert images.shape[3] == 3 assert isinstance(labels, np.ndarray) assert labels.shape[0] == config.BATCH_SIZE assert labels.shape[1] == train_dataset.num_classes for _ in range(validation_data_num): images, labels = validation_dataset.feed() assert isinstance(images, np.ndarray) assert images.shape[0] == config.BATCH_SIZE assert images.shape[1] == config.IMAGE_SIZE[0] assert images.shape[2] == config.IMAGE_SIZE[1] assert images.shape[3] == 3 assert isinstance(labels, np.ndarray) assert labels.shape[0] == config.BATCH_SIZE assert labels.shape[1] == validation_dataset.num_classes
def _get_tfds_settings(config_file): config = config_util.load(config_file) dataset_class = config.DATASET_CLASS dataset_kwargs = {key.lower(): val for key, val in config.DATASET.items()} if "tfds_kwargs" not in dataset_kwargs: raise ValueError( "The given config file does not contain settings for building TFDS datasets.\n" "Please see help messages (python executor/build_tfds.py -h) for detail." ) tfds_kwargs = dataset_kwargs.pop("tfds_kwargs") dataset_name = tfds_kwargs["name"] data_dir = os.path.expanduser(tfds_kwargs["data_dir"]) return dataset_class, dataset_kwargs, dataset_name, data_dir
def main(network, dataset, config_file, experiment_id, restore_path): environment.init(experiment_id) config = config_util.load_from_experiment() if config_file: config = config_util.merge(config, config_util.load(config_file)) if network: network_class = module_loader.load_network_class(network) config.NETWORK_CLASS = network_class if dataset: dataset_class = module_loader.load_dataset_class(dataset) config.DATASET_CLASS = dataset_class executor.init_logging(config) config_util.display(config) evaluate(config, restore_path)
def run(network, dataset, config_file, experiment_id, recreate): environment.init(experiment_id) config = config_util.load(config_file) if network: network_class = module_loader.load_network_class(network) config.NETWORK_CLASS = network_class if dataset: dataset_class = module_loader.load_dataset_class(dataset) config.DATASET_CLASS = dataset_class config_util.display(config) executor.init_logging(config) executor.prepare_dirs(recreate) config_util.copy_to_experiment_dir(config_file) config_util.save_yaml(environment.EXPERIMENT_DIR, config) start_training(config)
def main(model): if model == "yolov2": weight_file = 'inputs/yolo-voc.weights' experiment_id = "convert_weight_from_darknet/yolo_v2" config_file = "configs/convert_weight_from_darknet/yolo_v2.py" if model == "darknet19": weight_file = 'inputs/darknet19_448.weights' experiment_id = "convert_weight_from_darknet/darknet19" config_file = "configs/convert_weight_from_darknet/darknet19.py" recreate = True environment.init(experiment_id) executor.prepare_dirs(recreate) config = config_util.load(config_file) config_util.display(config) config_util.copy_to_experiment_dir(config_file) convert(config, weight_file)
def run(input_dir, output_dir, experiment_id, config_file, restore_path, save_images): environment.init(experiment_id) config = config_util.load_from_experiment() if config_file: config = config_util.merge(config, config_util.load(config_file)) if not os.path.isdir(input_dir): raise Exception("Input directory {} does not exist.".format(input_dir)) if restore_path is None: restore_file = search_restore_filename(environment.CHECKPOINTS_DIR) restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file) print("Restore from {}".format(restore_path)) if not os.path.exists("{}.index".format(restore_path)): raise Exception("restore file {} dont exists.".format(restore_path)) print("---- start predict ----") _run(input_dir, output_dir, config, restore_path, save_images) print("---- end predict ----")
self.sess = tf.Session(graph=graph, config=session_config) self.sess.run(init_op) saver.restore(self.sess, restore_path) def __call__(self, input_data): feed_dict = {self.images_placeholder: input_data * (1 / 255.0)} t_begin = time.time() output = self.sess.run(self.output_op, feed_dict=feed_dict) calc_time = time.time() - t_begin return output, calc_time if __name__ == '__main__': environment.init(args.experiment_id) config = config_util.load_from_experiment() print(config) if args.config_file is not None: config = config_util.merge(config, config_util.load(args.config_file)) if args.restore_path is None: restore_file = search_restore_filename(environment.CHECKPOINTS_DIR) restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file) else: restore_path = args.restore_path print("Restore from {}".format(restore_path)) inference_model = Inference(config, restore_path) window_name = os.path.basename(restore_path) run_demo(inference_model, diff_step=args.diff_step, window_name=window_name)
def _run(config_file, experiment_id, restore_path, image_size, step_size, cpu): if experiment_id: environment.init(experiment_id) config = config_util.load_from_experiment() if config_file: config = config_util.merge(config, config_util.load(config_file)) if restore_path is None: restore_file = executor.search_restore_filename( environment.CHECKPOINTS_DIR) restore_path = os.path.join(environment.CHECKPOINTS_DIR, restore_file) if not os.path.exists("{}.index".format(restore_path)): raise Exception( "restore file {} dont exists.".format(restore_path)) else: experiment_id = "measure_latency" environment.init(experiment_id) config = config_util.load(config_file) config.BATCH_SIZE = 1 config.NETWORK.BATCH_SIZE = 1 config.DATASET.BATCH_SIZE = 1 if list(image_size) != [None, None]: config.IMAGE_SIZE = list(image_size) config.NETWORK.IMAGE_SIZE = list(image_size) # override pre processes image size. if config.PRE_PROCESSOR: config.PRE_PROCESSOR.set_image_size(image_size) # override post processes image size. if config.POST_PROCESSOR: config.POST_PROCESSOR.set_image_size(image_size) print("Override IMAGE_SIZE", config.IMAGE_SIZE) executor.init_logging(config) config_util.display(config) overall_times, only_network_times = _measure_time(config, restore_path, step_size) overall_times = np.array(overall_times) only_network_times = np.array(only_network_times) # list of physical_device_desc devices = [ device.physical_device_desc for device in device_lib.list_local_devices() if device.physical_device_desc ] message = """ ---- measure latency result ---- total number of execution (number of samples): {} network: {} use gpu by network: {} image size: {} devices: {} * overall (include pre-post-process which execute on cpu) total time: {:.4f} msec latency mean (SD=standard deviation): {:.4f} (SD={:.4f}) msec, min: {:.4f} msec, max: {:.4f} msec FPS mean (SD=standard deviation): {:.4f} (SD={:.4f}), min: {:.4f}, max: {:.4f} * network only (exclude pre-post-process): total time: {:.4f} msec latency mean (SD=standard deviation): {:.4f} (SD={:.4f}) msec, min: {:.4f} msec, max: {:.4f} msec FPS mean (SD=standard deviation): {:.4f} (SD={:.4f}), min: {:.4f}, max: {:.4f} ---- measure latency result ---- """.format( step_size, config.NETWORK_CLASS.__name__, not cpu, config.IMAGE_SIZE, devices, # overall np.sum(overall_times) * 1000, # latency np.mean(overall_times) * 1000, np.std(overall_times) * 1000, np.min(overall_times) * 1000, np.max(overall_times) * 1000, # FPS np.mean(1 / overall_times), np.std(1 / overall_times), np.min(1 / overall_times), np.max(1 / overall_times), # network only np.sum(only_network_times) * 1000, # latency np.mean(only_network_times) * 1000, np.std(only_network_times) * 1000, np.min(only_network_times) * 1000, np.max(only_network_times) * 1000, # FPS np.mean(1 / only_network_times), np.std(1 / only_network_times), np.min(1 / only_network_times), np.max(1 / only_network_times), ) print(message)