def read_data_from_file(self, filename):
     field_dict = {self._input_field_name: False, self._target_field_name: False}
     data = hdf5_utils.read_hdf5_file_to_numpy_dict(filename, field_dict)
     if self._input_field_name is not None:
         assert(field_dict[self._input_field_name])
     if self._target_field_name is not None:
         assert(field_dict[self._target_field_name])
     return data
Esempio n. 2
0
 def read_samples_from_file(filename):
     stacked_samples, attr_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(filename, read_attributes=True)
     samples = []
     for key in stacked_samples:
         for i in range(len(stacked_samples[key])):
             if len(samples) <= i:
                 samples.append({})
             samples[i][key] = stacked_samples[key][i, ...]
     return samples, attr_dict
def run(args):
    if args.input_path is not None:
        input_path = args.input_path
        input_files = list(
            file_helpers.input_filename_generator_hdf5(
                input_path, file_helpers.DEFAULT_HDF5_PATTERN))
    else:
        input_list_file = args.input_list_file
        with file(input_list_file, "r") as fin:
            input_files = [l.strip() for l in fin.readlines()]

    dataset_kwargs = {}
    if args.compression:
        dataset_kwargs.update({"compression": args.compression})
        if args.compression_level >= 0:
            dataset_kwargs.update({"compression_opts": args.compression_level})

    print("Counting {} input files".format(len(input_files)))

    for i, input_file in enumerate(input_files):
        print("Reading input file #{} out of {}".format(i, len(input_files)))
        field_dict = None
        data, attr_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(
            input_file, field_dict, read_attributes=True)
        print("Writing {} samples with new compression settings".format(
            data[list(data.keys())[0]].shape[0]))
        hdf5_utils.write_numpy_dict_to_hdf5_file(input_file + "_recompressed",
                                                 data, attr_dict,
                                                 **dataset_kwargs)
        if args.check_written_samples:
            print("Reading samples from file {}".format(input_file +
                                                        "_recompressed"))
            written_data, written_attr_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(
                input_file + "_recompressed", field_dict, read_attributes=True)
            for key in data:
                assert (np.all(data[key] == written_data[key]))
            for key in attr_dict:
                assert (np.all(attr_dict[key] == written_attr_dict[key]))
        os.remove(input_file)
        os.rename(input_file + "_recompressed", input_file)
Esempio n. 4
0
def run(args):
    if args.input_path is not None:
        input_path = args.input_path
        input_files = list(
            file_helpers.input_filename_generator_hdf5(
                input_path, file_helpers.DEFAULT_HDF5_PATTERN))
    else:
        input_list_file = args.input_list_file
        with open(input_list_file, "r") as fin:
            input_files = [l.strip() for l in fin.readlines()]

    print("Counting {} input files".format(len(input_files)))

    # Count total number of samples
    total_num_samples = 0
    for i, input_file in enumerate(input_files):
        print("Reading input file #{} out of {} ({})".format(
            i, len(input_files), input_file))
        field_dict = {"scores": False}
        data, attr = hdf5_utils.read_hdf5_file_to_numpy_dict(
            input_file, field_dict, read_attributes=True)
        total_num_samples += data["scores"].shape[0]

    print("Total number of samples: {}".format(total_num_samples))
def run(args):
    # Create environment
    client_id = args.client_id
    with open(args.environment_config, "r") as fin:
        environment_config = yaml.load(fin)
    environment = env_factory.create_environment_from_config(
        environment_config, client_id, use_openai_wrapper=True)

    result = environment.base.get_mapper().perform_info()
    map_resolution = result.resolution
    axis_mode = environment_config["collect_data"]["axis_mode"]
    forward_factor = float(
        environment_config["collect_data"]["forward_factor"])
    downsample_to_grid = environment_config["collect_data"][
        "downsample_to_grid"]
    raycast_max_range = float(environment_config["octomap"]["max_range"])
    logger.info("map_resolution={}".format(map_resolution))
    logger.info("axis_mode={}".format(axis_mode))
    logger.info("forward_factor={}".format(forward_factor))
    logger.info("downsample_to_grid={}".format(downsample_to_grid))
    logger.info("raycast_max_range={}".format(raycast_max_range))

    environment.base.get_engine().disable_input()

    pose_list = []

    def before_reset_hook(env):
        pose = pose_list[0]
        print("Resetting episode with pose {}".format(pose))
        return pose

    def after_reset_hook(env):
        logger.info("Env reset in pose {}".format(env.base.get_pose()))

    environment.base.before_reset_hooks.register(before_reset_hook,
                                                 environment)
    environment.base.after_reset_hooks.register(after_reset_hook, environment)

    input_path = os.path.dirname(args.input_filename_prefix)
    print("Input path: {}".format(input_path))
    input_filename_pattern = "{:s}_(\d+)\.hdf5".format(
        os.path.basename(args.input_filename_prefix))
    input_filenames_and_matches = file_utils.get_matching_filenames(
        input_filename_pattern, path=input_path, return_match_objects=True)
    print("Number of input files: {}".format(len(input_filenames_and_matches)))

    only_episode = args.episode
    max_steps = args.max_steps

    for i, (input_filename,
            filename_match) in enumerate(input_filenames_and_matches):
        input_episode_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(
            os.path.join(input_path, input_filename))
        episode_poses = [environment.base.Pose(location, orientation_rpy) for location, orientation_rpy \
                          in zip(input_episode_dict["location"], input_episode_dict["orientation_rpy"])]
        if max_steps is not None:
            episode_poses = episode_poses[:max_steps]

        del pose_list[:]
        pose_list.extend(episode_poses)

        episode = int(filename_match.group(1))
        if only_episode is not None and episode != only_episode:
            continue

        logger.info("Running episode #{} from input file {}".format(
            episode, input_filename))

        run_episode(environment,
                    pose_list,
                    downsample_to_grid=downsample_to_grid,
                    measure_timing=args.measure_timing)

        episode += 1
def run(args):
    # Read config file
    topic_cmdline_mappings = {"tensorflow": "tf"}
    topics = ["tensorflow", "io", "training", "data"]
    cfg = configuration.get_config_from_cmdline(args, topics,
                                                topic_cmdline_mappings)
    if args.config is not None:
        with open(args.config, "r") as config_file:
            tmp_cfg = yaml.load(config_file)
            configuration.update_config_from_other(cfg, tmp_cfg)

    # Read model config
    if "model" in cfg:
        model_config = cfg["model"]
    elif args.model_config is not None:
        model_config = {}
    else:
        logger.fatal(
            "ERROR: Model configuration must be in general config file or provided in extra config file."
        )
        import sys
        sys.exit(1)

    if args.model_config is not None:
        with open(args.model_config, "r") as config_file:
            tmp_model_config = yaml.load(config_file)
            configuration.update_config_from_other(model_config,
                                                   tmp_model_config)

    cfg = AttributeDict.convert_deep(cfg)
    model_config = AttributeDict.convert_deep(model_config)

    if args.hdf5_data_stats_path is not None:
        logger.info("Loading data stats from HDF5 file")
        data_stats_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(
            args.hdf5_data_stats_path)
    else:
        data_stats_dict = None
    if args.use_train_data or data_stats_dict is None:
        logger.info("Creating train dataflow")
        train_dataflow = input_pipeline.InputAndTargetDataFlow(
            cfg.data.train_path,
            cfg.data,
            shuffle_lmdb=args.shuffle,
            override_data_stats=data_stats_dict,
            verbose=True)
        data_stats_dict = train_dataflow.get_data_stats()
        if args.use_train_data:
            dataflow = train_dataflow
    if not args.use_train_data:
        assert cfg.data.test_path is not None, "Test data path has to be specified if not using train data"
        logger.info("Creating test dataflow")
        dataflow = input_pipeline.InputAndTargetDataFlow(
            cfg.data.test_path,
            cfg.data,
            shuffle_lmdb=args.shuffle,
            override_data_stats=data_stats_dict,
            verbose=True)

    logger.info("# samples in dataset: {}".format(dataflow.size()))

    logger.info("Input and target shapes:")
    dataflow.reset_state()
    first_sample = next(dataflow.get_data())
    tensor_shapes = [tensor.shape for tensor in first_sample]
    tensor_dtypes = [tensor.dtype for tensor in first_sample]
    logger.info("  Shape of input: {}".format(first_sample[0].shape))
    logger.info("  Type of input: {}".format(first_sample[0].dtype))
    logger.info("  Shape of target: {}".format(first_sample[1].shape))
    logger.info("  Type of target: {}".format(first_sample[1].dtype))

    # Create tensorflow session
    logger.info("Creating tensorflow session")
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=cfg.tensorflow.gpu_memory_fraction)
    tf_config = tf.ConfigProto(gpu_options=gpu_options)
    tf_config.intra_op_parallelism_threads = cfg.tensorflow.intra_op_parallelism
    tf_config.inter_op_parallelism_threads = cfg.tensorflow.inter_op_parallelism
    tf_config.log_device_placement = cfg.tensorflow.log_device_placement
    with tf.Session(config=tf_config) as sess:
        coord = tf.train.Coordinator()

        # def signal_handler(signal, frame):
        #     sess.close()
        #
        # batch_size = 1
        #
        # sample_stats = dataflow.get_sample_stats()
        # pipeline = input_pipeline.TFDataFlowPipeline(
        #     dataflow.get_batch_dataflow(), tensor_shapes, tensor_dtypes, sess, coord, batch_size,
        #         cfg.tensorflow, is_training=False, sample_stats=sample_stats, is_batch_dataflow=True)
        #
        # Create model
        #
        # with tf.device("/gpu:0"):
        #     with tf.variable_scope("model"):
        #             model = models.Model(model_config,
        #                                  pipeline.tensors_batch[0],
        #                                  pipeline.tensors[1],
        #                                  is_training=False,
        #                                  verbose=args.verbose)

        input_placeholder = tf.placeholder(dtype=tensor_dtypes[0],
                                           shape=(1, ) + tensor_shapes[0],
                                           name="Input")
        target_placeholder = tf.placeholder(dtype=tensor_dtypes[1],
                                            shape=(1, ) + tensor_shapes[1],
                                            name="Target")
        gpu_device_name = tf_utils.gpu_device_name()
        print(tf_utils.get_available_cpu_ids(),
              tf_utils.get_available_cpu_names())
        print(tf_utils.get_available_gpu_ids(),
              tf_utils.get_available_gpu_names())
        print(gpu_device_name)
        with tf.device(gpu_device_name):
            with tf.variable_scope("model"):
                model = models.Model(model_config,
                                     input_placeholder,
                                     target_placeholder,
                                     is_training=False,
                                     verbose=args.verbose)

    try:
        saver = tf.train.Saver(model.global_variables)

        if args.check_numerics:
            # Run numeric checks on all model checkpoints
            if args.checkpoint is None:
                ckpt = tf.train.get_checkpoint_state(args.model_dir)
                checkpoint_paths = ckpt.all_model_checkpoint_paths
            else:
                checkpoint_path = os.path.join(args.model_dir, args.checkpoint)
                checkpoint_paths = [checkpoint_path]
            for checkpoint_path in checkpoint_paths:
                if args.verbose:
                    logger.info(
                        "Checking numerics on model checkpoint {}".format(
                            checkpoint_path))
                saver.restore(sess, checkpoint_path)
                for var in model.variables:
                    if args.verbose:
                        logger.info("  Checking tensor {}".format(var.name))
                    sess.run(
                        tf.check_numerics(
                            var, "Numeric check for tensor {} failed".format(
                                var.name)))
            return

        # Restore model
        if args.checkpoint is None:
            logger.info("Reading latest checkpoint from {}".format(
                args.model_dir))
            ckpt = tf.train.get_checkpoint_state(args.model_dir)
            if ckpt is None:
                raise IOError("No previous checkpoint found at {}".format(
                    args.model_dir))
            else:
                logger.info('Found previous checkpoint... restoring')
                checkpoint_path = ckpt.model_checkpoint_path
            saver.restore(sess, checkpoint_path)
        else:
            checkpoint_path = os.path.join(args.model_dir, args.checkpoint)
        if checkpoint_path is not None:
            logger.info("Trying to restore model from checkpoint {}".format(
                checkpoint_path))
            saver.restore(sess, checkpoint_path)

        # custom_threads = []
        # pipeline.start()
        # custom_threads.extend(pipeline.threads)
        # # Start data provider threads
        # custom_threads.extend(tf.train.start_queue_runners(sess=sess))

        sess.graph.finalize()

        # Running statistics
        stats = None

        denorm_target_list = []
        denorm_output_list = []

        logger.info("Starting evaluating")
        for i, (input, target) in enumerate(dataflow.get_data()):
            if args.verbose:
                logger.info("  sample # {}".format(i))

            if stats is None:
                stats = AttributeDict()
                stats.output = math_utils.SinglePassStatistics(target.shape)
                stats.target = math_utils.SinglePassStatistics(target.shape)
                stats.diff = math_utils.SinglePassStatistics(target.shape)
                stats.squared_diff = math_utils.SinglePassStatistics(
                    target.shape)
                stats.loss = math_utils.SinglePassStatistics(target.shape)

            denorm_target = dataflow.input_and_target_retriever.denormalize_target(
                target)

            input_batch = input[np.newaxis, ...]
            target_batch = target[np.newaxis, ...]
            loss_v, loss_min_v, loss_max_v, output_batch = sess.run(
                [model.loss, model.loss_min, model.loss_max, model.output],
                feed_dict={
                    input_placeholder: input_batch,
                    target_placeholder: target_batch
                })
            output = output_batch[0, ...]
            denorm_output = dataflow.input_and_target_retriever.denormalize_target(
                output)
            diff = denorm_output - denorm_target
            squared_diff = np.square(diff)
            if args.verbose:
                logger.info("Output={}, Target={}, Diff={}, Diff^2={}".format(
                    denorm_output, denorm_target, diff, squared_diff))
                logger.info("  loss: {}, min loss: {}, max loss: {}".format(
                    loss_v, loss_min_v, loss_max_v))
            if diff > 80:
                import time
                time.sleep(5)
            # Update stats
            stats.output.add_value(denorm_output)
            stats.target.add_value(denorm_target)
            stats.diff.add_value(diff)
            stats.squared_diff.add_value(squared_diff)
            stats.loss.add_value(loss_v)

            denorm_output_list.append(denorm_output)
            denorm_target_list.append(denorm_target)

            if i % 100 == 0 and i > 0:
                logger.info("-----------")
                logger.info("Statistics after {} samples:".format(i + 1))
                for key in stats:
                    logger.info(
                        "  {:s}: mean={:.4f}, stddev={:.4f}, min={:.4f}, max={:.4f}"
                        .format(key, stats[key].mean[0], stats[key].stddev[0],
                                float(stats[key].min), float(stats[key].max)))
                    logger.info("-----------")

                import scipy.stats
                correlation, pvalue = scipy.stats.pearsonr(
                    np.array(denorm_target_list), np.array(denorm_output_list))
                logger.info("Pearson correlation: {} [p={}]".format(
                    correlation, pvalue))
                correlation, pvalue = scipy.stats.spearmanr(
                    np.array(denorm_target_list), np.array(denorm_output_list))
                logger.info("Spearman correlation: {} [p={}]".format(
                    correlation, pvalue))
                obj = {
                    "a": np.array(denorm_target_list),
                    "b": np.array(denorm_output_list)
                }
                np.savez("spearman.npz", **obj)
                hdf5_utils.write_numpy_dict_to_hdf5_file("spearman.hdf5", obj)

            if args.visualize:
                import visualization
                fig = 1
                fig = visualization.plot_grid(input[..., 2],
                                              input[..., 3],
                                              title_prefix="input",
                                              show=False,
                                              fig_offset=fig)
                # fig = visualization.plot_grid(record.in_grid_3d[..., 6], record.in_grid_3d[..., 7], title_prefix="in_grid_3d", show=False, fig_offset=fig)
                visualization.show(stop=True)

    except Exception as exc:
        logger.info("Exception in evaluation oop: {}".format(exc))
        traceback.print_exc()
        coord.request_stop(exc)
        raise exc
    finally:
        logger.info("Requesting stop")
        coord.request_stop()
        # pipeline.stop()
        # coord.join(custom_threads, stop_grace_period_secs=(2 * cfg.io.timeout))
        sess.close()
Esempio n. 7
0
def run(args):
    # Create environment
    client_id = args.client_id
    with open(args.environment_config, "r") as fin:
        environment_config = yaml.load(fin)
    environment = env_factory.create_environment_from_config(
        environment_config, client_id, use_openai_wrapper=True)

    result = environment.base.get_mapper().perform_info()
    map_resolution = result.resolution
    axis_mode = environment_config["collect_data"]["axis_mode"]
    forward_factor = float(
        environment_config["collect_data"]["forward_factor"])
    downsample_to_grid = environment_config["collect_data"][
        "downsample_to_grid"]
    raycast_max_range = float(environment_config["octomap"]["max_range"])
    logger.info("map_resolution={}".format(map_resolution))
    logger.info("axis_mode={}".format(axis_mode))
    logger.info("forward_factor={}".format(forward_factor))
    logger.info("downsample_to_grid={}".format(downsample_to_grid))
    logger.info("raycast_max_range={}".format(raycast_max_range))

    environment.base.get_engine().disable_input()

    pose_list = []

    def before_reset_hook(env):
        pose = pose_list[0]
        print("Resetting episode with pose {}".format(pose))
        return pose

    def after_reset_hook(env):
        logger.info("Env reset in pose {}".format(env.base.get_pose()))

    environment.base.before_reset_hooks.register(before_reset_hook,
                                                 environment)
    environment.base.after_reset_hooks.register(after_reset_hook, environment)

    input_path = os.path.dirname(args.input_filename_prefix)
    print("Input path: {}".format(input_path))
    input_filename_pattern = "{:s}_(\d+)\.hdf5".format(
        os.path.basename(args.input_filename_prefix))
    input_filenames_and_matches = file_utils.get_matching_filenames(
        input_filename_pattern, path=input_path, return_match_objects=True)
    print("Number of input files: {}".format(len(input_filenames_and_matches)))

    for i, (input_filename,
            filename_match) in enumerate(input_filenames_and_matches):
        input_episode_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(
            os.path.join(input_path, input_filename))
        del pose_list[:]
        pose_list.extend([environment.base.Pose(location, orientation_rpy) for location, orientation_rpy \
                           in zip(input_episode_dict["location"], input_episode_dict["orientation_rpy"])])

        episode = int(filename_match.group(1))

        if args.output_filename_prefix:
            hdf5_filename = "{:s}_{:d}.hdf5".format(
                args.output_filename_prefix, episode)
            logger.info("File name for episode {}: {}".format(
                episode, hdf5_filename))
            if os.path.isfile(hdf5_filename):
                logger.info("File '{}' already exists. Skipping.".format(
                    hdf5_filename))
                continue

        logger.info("Running episode #{} from input file {}".format(
            episode, input_filename))

        output = run_episode(episode,
                             environment,
                             input_episode_dict,
                             args.reset_interval,
                             args.reset_score_threshold,
                             downsample_to_grid=downsample_to_grid,
                             measure_timing=args.measure_timing)

        if args.output_filename_prefix:
            locations = [pose.location() for pose in output["pose"]]
            orientation_rpys = [
                pose.orientation_rpy() for pose in output["pose"]
            ]
            hdf5_dict = {
                "score": np.asarray(output["score"]),
                "computed_reward": np.asarray(output["computed_reward"]),
                "true_reward": np.asarray(output["true_reward"]),
                "location": np.asarray(locations),
                "orientation_rpy": np.asarray(orientation_rpys),
            }
            if os.path.isfile(hdf5_filename):
                raise RuntimeError(
                    "ERROR: Output file '{}' already exists".format(
                        hdf5_filename))
            if not args.dry_run:
                hdf5_utils.write_numpy_dict_to_hdf5_file(
                    hdf5_filename, hdf5_dict)
                logger.info(
                    "Wrote output to HDF5 file: {}".format(hdf5_filename))
        episode += 1
def read_data_statistics(stats_filename):
    statistics_dict = hdf5_utils.read_hdf5_file_to_numpy_dict(stats_filename)
    return statistics_dict