def main(): init_logging() config = load_config() build_config = config.build db = TrackDatabase(os.path.join(config.tracks_folder, "dataset.hdf5")) dataset = Dataset(db, "dataset", config) tracks_loaded, total_tracks = dataset.load_tracks() print( "Loaded {}/{} tracks, found {:.1f}k segments".format( tracks_loaded, total_tracks, len(dataset.segments) / 1000 ) ) for key, value in dataset.filtered_stats.items(): if value != 0: print(" {} filtered {}".format(key, value)) print() show_tracks_breakdown(dataset) print() show_segments_breakdown(dataset) print() show_cameras_breakdown(dataset) print() print("Splitting data set into train / validation") datasets = split_dataset_by_cameras(db, dataset, build_config) # if build_config.use_previous_split: # split = get_previous_validation_bins(build_config.previous_split) # datasets = split_dataset(db, dataset, build_config, split) # else: # datasets = split_dataset(db, dataset, build_config) pickle.dump(datasets, open(dataset_db_path(config), "wb"))
def test_dataset(db, config, date): test = Dataset(db, "test", config) tracks_loaded, total_tracks = test.load_tracks(shuffle=True, after_date=date) print("Test Loaded {}/{} tracks".format(tracks_loaded, total_tracks)) for key, value in test.filtered_stats.items(): if value != 0: print("Test {} filtered {}".format(key, value)) return test
def main(): init_logging() args = parse_args() config = load_config(args.config_file) # return # import yaml # # with open("defualtstest.yml", "w") as f: # yaml.dump(config.as_dict(), f) test_clips = config.build.test_clips() if test_clips is None: test_clips = [] logging.info("# of test clips are %s", len(test_clips)) db_file = os.path.join(config.tracks_folder, "dataset.hdf5") dataset = Dataset(db_file, "dataset", config, consecutive_segments=args.consecutive_segments) tracks_loaded, total_tracks = dataset.load_tracks() dataset.labels.sort() print("Loaded {}/{} tracks, found {:.1f}k segments".format( tracks_loaded, total_tracks, len(dataset.segments) / 1000)) for key, value in dataset.filtered_stats.items(): if value != 0: print(" {} filtered {}".format(key, value)) print() show_tracks_breakdown(dataset) print() show_segments_breakdown(dataset) print() show_sample_frames_breakdown(dataset) print() show_cameras_breakdown(dataset) print() print("Splitting data set into train / validation") datasets = split_randomly(db_file, dataset, config, args, test_clips) validate_datasets(datasets, test_clips, args.date) print_counts(dataset, *datasets) base_dir = config.tracks_folder for dataset in datasets: dataset.saveto_numpy(os.path.join(base_dir)) for dataset in datasets: dataset.clear_samples() dataset.db = None logging.info("saving to %s", f"{os.path.join(base_dir, dataset.name)}.dat") pickle.dump(dataset, open(f"{os.path.join(base_dir, dataset.name)}.dat", "wb"))
def main(): global dataset global db db = TrackDatabase(os.path.join(DATASET_FOLDER, 'dataset.hdf5')) dataset = Dataset(db, 'dataset') total_tracks = len(db.get_all_track_ids()) tracks_loaded = dataset.load_tracks(track_filter) print("Loaded {}/{} tracks, found {:.1f}k segments".format( tracks_loaded, total_tracks, len(dataset.segments) / 1000)) for key, value in filtered_stats.items(): if value != 0: print(" {} filtered {}".format(key, value)) print() labels = sorted(list(set(dataset.tracks_by_label.keys()))) dataset.labels = labels show_tracks_breakdown() print() show_segments_breakdown() print() show_cameras_breakdown() print() print("Splitting data set into train / validation") if USE_PREVIOUS_SPLIT: split = get_bin_split('template.dat') datasets = split_dataset_days(split) else: datasets = split_dataset_days() pickle.dump(datasets, open(os.path.join(DATASET_FOLDER, 'datasets.dat'), 'wb'))
def main(): init_logging() args = parse_args() config = load_config(args.config_file) db = TrackDatabase(os.path.join(config.tracks_folder, "dataset.hdf5")) dataset = Dataset( db, "dataset", config, consecutive_segments=args.consecutive_segments ) tracks_loaded, total_tracks = dataset.load_tracks(before_date=args.date) print( "Loaded {}/{} tracks, found {:.1f}k segments".format( tracks_loaded, total_tracks, len(dataset.segments) / 1000 ) ) for key, value in dataset.filtered_stats.items(): if value != 0: print(" {} filtered {}".format(key, value)) print() show_tracks_breakdown(dataset) print() show_segments_breakdown(dataset) print() show_important_frames_breakdown(dataset) print() show_cameras_breakdown(dataset) print() print("Splitting data set into train / validation") datasets = split_dataset_by_cameras(db, dataset, config, args) if args.date is None: args.date = datetime.datetime.now(pytz.utc) - datetime.timedelta(days=7) test = test_dataset(db, config, args.date) datasets = (*datasets, test) print_counts(dataset, *datasets) print_cameras(*datasets) pickle.dump(datasets, open(dataset_db_path(config), "wb"))