def post_import_videos_job(dataset_name, path, method, logs_path=None, minutes=0): dataset_name = quote(dataset_name) if logs_path is None: logs_path = path # Since 'path' probably contains a query, like ending with '*.mkv', this should be removed if not (logs_path[-1] == '/'): logs_path = right_remove(logs_path, logs_path.split('/')[-1]) dc = DatasetConfig(dataset_name) if dc.exists: resolution = dc.get('video_resolution') fps = dc.get('video_fps') cmd = [python_path, "import_videos.py", "--query={}".format(path), "--dataset={}".format(dataset_name), "--resolution={}".format(resolution), "--method={}".format(method), "--fps={}".format(fps), "--logs={}".format(logs_path), "--minutes={}".format(minutes)] job_id = jm.run(cmd, "import_videos") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def post_autoannotate_job(dataset_name, import_datasets="", epochs=75, resolution="(640,480,3)"): dataset_name = quote(dataset_name) resolution = quote(resolution) dc = DatasetConfig(dataset_name) if dc.exists: cmd = [ python_path, "autoannotate.py", "--dataset={}".format(dataset_name), "--input_shape={}".format(resolution), "--image_shape={}".format(dc.get('video_resolution')), "--epochs={}".format(epochs) ] if import_datasets: import_datasets = quote(import_datasets) cmd.append("--import_datasets={}".format(import_datasets)) job_id = jm.run(cmd, "autoannotate") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def post_train_detector_job(dataset_name, run_name, epochs, import_datasets=""): dataset_name = quote(dataset_name) run_name = quote(run_name) rc = RunConfig(dataset_name, run_name) dc = DatasetConfig(dataset_name) if rc.exists and dc.exists: cmd = [ python_path, "training_script.py", "--name={}".format(dataset_name), "--experiment={}".format(run_name), "--input_shape={}".format(rc.get('detector_resolution')), "--train_data_dir=fjlfbwjefrlbwelrfb_man_we_need_a_better_detector_codebase", "--batch_size={}".format(rc.get('detection_training_batch_size')), "--image_shape={}".format(dc.get('video_resolution')), "--epochs={}".format(epochs) ] if import_datasets: import_datasets = quote(import_datasets) cmd.append("--import_datasets={}".format(import_datasets)) job_id = jm.run(cmd, "train_detector") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def get_dataset_config(dataset_name): dataset_name = quote(dataset_name) dc = DatasetConfig(dataset_name) if dc.exists: return dc.get_data() else: return (NoContent, 404)
def post_point_tracks_job(dataset_name, visualize, overwrite): assert (type(visualize) == bool) assert (type(overwrite) == bool) cmd = "findvids" if not overwrite: cmd = "continue" dataset_name = quote(dataset_name) dc = DatasetConfig(dataset_name) if dc.exists: cmd = [ python_path, "klt.py", "--cmd={}".format(cmd), "--dataset={}".format(dataset_name), "--imsize={}".format(dc.get('point_track_resolution')), "--visualize={}".format(visualize) ] job_id = jm.run(cmd, "point_tracks") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def main(cmd, dataset, run, conf, make_videos): from pathlib import Path if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = runs_path / "{}_{}".format(dataset,run) / "world_tracking_optimization.pklz" if config_path.is_file(): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": vidnames = (datasets_path / dataset / "videos").glob('*.mkv') vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / "{}_{}".format(dataset,run) / "tracks_world" mkdir(outfolder) else: vidnames = [cmd] outfolder = Path('./') start_stop = (0,500) for v in vidnames: print_flush(v) out_path = outfolder / (v+'_tracks.pklz') print_flush("Loading data...") det_path = runs_path / "{}_{}".format(dataset,run) / "detections_world" / (v+'_world.csv') detections3D = pd.read_csv(det_path) klt_path = det_path.with_name(det_path.stem + '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = datasets_path / dataset / "videos" / (v+'.mkv') print_flush("Rendering video...") render_video(tracks, vidpath, out_path.with_suffix('.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def main(cmd, dataset, run, conf, make_videos): if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = "{rp}{ds}_{rn}/world_tracking_optimization.pklz".format(rp=runs_path, ds=dataset, rn=run) if isfile(config_path): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": from glob import glob vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/tracks_world/'.format(runs_path, dataset, run) mkdir(outfolder) else: vidnames = [cmd] outfolder = './' start_stop = (0,500) for v in vidnames: print_flush(v) out_path = "{of}{v}_tracks.pklz".format(of=outfolder, v=v) print_flush("Loading data...") det_path = "{rp}{ds}_{rn}/detections_world/{v}_world.csv".format(rp=runs_path, ds=dataset, rn=run, v=v) detections3D = pd.read_csv(det_path) klt_path = det_path.replace('.csv', '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v) print_flush("Rendering video...") render_video(tracks, vidpath, out_path.replace('.pklz','.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def post_dataset_config(dataset_name, dataset_config): dataset_name = quote(dataset_name) dc = DatasetConfig(dataset_name) if dc.set_data(dataset_config): dc.save() return (NoContent, 200) else: return ("Could not interpret dataset configuration. Is some required parameter missing? Is video resolution divisible by 16?", 500)
def get_dataset(id): ds = openml.datasets.get_dataset(int(eval(id))) X, y, categorical_indicator, attribute_names = ds.get_data( dataset_format='dataframe', target=ds.default_target_attribute ) if ds.qualities['NumberOfMissingValues'] > 100000 or X.shape[1] > 500000: return None, None, None dataset_conf = DatasetConfig({'openml': id, 'train_path': None}) dataset_conf.format = ds.format dataset_conf.class_column = ds.default_target_attribute dataset_conf.name = '{}_{}_{}'.format(ds.name, int(eval(id)), time.time()) return X, y, dataset_conf
def load_openml(dataset_conf: DatasetConfig) -> pd.DataFrame: LOGGER.info("Loading openml dataset {}".format(dataset_conf.openml)) ds = openml.datasets.get_dataset(dataset_conf.openml) X, y, categorical_indicator, attribute_names = ds.get_data( dataset_format='dataframe', target=ds.default_target_attribute) df = pd.concat([X, y], axis=1) # Fix configuration dataset_conf.format = ds.format dataset_conf.class_column = ds.default_target_attribute dataset_conf.name = '{}_{}_{}'.format(ds.name, dataset_conf.openml, time.time()) return df
def get_annotation_slideshow(dataset_name): dataset_name = quote(dataset_name) dc = DatasetConfig(dataset_name) if dc.exists: imsize = dc.get('video_resolution') outpath = datasets_path / dataset_name / "slideshow.mp4" res = slideshow(dataset_name, outpath) if not res: return ("Failed to make slideshow", 404) else: vid = send_file(str(outpath), mimetype='video/mp4') return (vid, 200) else: return ("Dataset does not exist", 404)
def get_progress(dataset_name, run_name): dataset_name = quote(dataset_name) run_name = quote(run_name) ds_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset_name) if isdir(ds_path): progress = dict() progress['has_config'] = isfile(ds_path + 'config.txt') if progress['has_config']: dc = DatasetConfig(dataset_name) progress['has_mask'] = isfile(ds_path + 'mask.png') progress['has_classnames'] = isfile(ds_path + 'classes.txt') progress['has_calibration'] = isfile(ds_path + 'calib.tacal') progress['number_of_timestamp_logs'] = len(glob(ds_path + 'logs/*.log')) progress['number_of_videos'] = len(glob(ds_path + 'videos/*.mkv')) progress['training_frames_to_annotate'] = len(glob(ds_path + 'objects/train/*/*.jpg')) progress['training_frames_annotated'] = len(glob(ds_path + 'objects/train/*/*.txt')) progress['videos_with_point_tracks_computed'] = len(glob(ds_path + 'klt/*.pklz')) progress['videos_with_point_tracks_visualized'] = len(glob(ds_path + 'klt/*.mp4')) progress['all_runs'] = [x.split('/')[-1].split('_')[-1] for x in glob("{rp}{ds}_*".format(rp=runs_path, ds=dataset_name))] run_path = "{rp}{ds}_{rn}/".format(rp=runs_path, ds=dataset_name, rn=run_name) if isdir(run_path): progress['has_this_run'] = True rprogress = dict() rprogress['has_pretrained_weights'] = isfile(ssd_path + '/weights_SSD300.hdf5') rprogress['videos_with_detected_objects'] = len(glob(run_path + 'csv/*.csv')) rprogress['videos_with_detected_objects_visualized'] = len(glob(run_path + 'detections/*.mp4')) rprogress['videos_with_detected_objects_in_world_coordinates'] = len(glob(run_path + 'detections_world/*.csv')) rprogress['videos_with_detected_objects_in_world_coordinates_visualized'] = len(glob(run_path + 'detections_world/*.mp4')) rprogress['stored_weight_files'] = len(glob(run_path + 'checkpoints/*.hdf5')) rprogress['videos_with_pixel_coordinate_tracks'] = len(glob(run_path + 'tracks/*.pklz')) rprogress['videos_with_pixel_coordinate_tracks_visualized'] = len(glob(run_path + 'tracks/*.mp4')) rprogress['videos_with_world_coordinate_tracks'] = len(glob(run_path + 'tracks_world/*.pklz')) rprogress['videos_with_world_coordinate_tracks_visualized'] = len(glob(run_path + 'tracks_world/*.mp4')) rprogress['has_optimized_world_tracking'] = isfile(run_path + 'world_tracking_optimization.pklz') rprogress['has_visualized_optimized_world_tracking'] = isfile(run_path + 'world_tracking_optimization.mp4') rprogress['has_world_tracking_ground_truth'] = isfile(run_path + 'world_trajectory_gt.csv') rprogress['track_zips'] = [x.split('/')[-1] for x in glob(run_path + 'track_zips/*.zip')] all_progress = {'dataset': progress, 'run': rprogress} else: progress['has_this_run'] = False all_progress = {'dataset': progress} return (all_progress, 200) else: return ("Dataset does not exist", 404)
def post_prepare_annotations_job(dataset_name, less_night=True): assert(type(less_night) == bool) dataset_name = quote(dataset_name) dc = DatasetConfig(dataset_name) if dc.exists: cmd = [python_path, "annotation_preparation.py", "--dataset={}".format(dataset_name), "--num_ims={}".format(dc.get('images_to_annotate')), "--ims_per_vid={}".format(dc.get('images_to_annotate_per_video')), "--train_amount={}".format(dc.get('annotation_train_split')), "--night={}".format(less_night)] job_id = jm.run(cmd, "prepare_annotations") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def _get_context_dialog(self, dialog: TidyDialog): """Get context dialog. Note: The last utterance of the context dialog is system response. Args: dialog (TidyDialog): Dialog. Returns: texts: Texts (dialog_context_size + 1, dialog_text_max_len). text_lengths: Text lengths (dialog_context_size + 1, ). images: Images (dialog_context_size + 1, pos_images_max_num, 3, image_size, image_size). utter_type (int): The type of the last user utterance. """ # Text. text_list: List[List[int]] = [utter.text for utter in dialog] text_length_list: List[int] = [utter.text_len for utter in dialog] # Text tensors. texts = torch.stack(tuple([torch.tensor(text) for text in text_list])) # (dialog_context_size + 1, dialog_text_max_len) text_lengths = torch.tensor(text_length_list) # (dialog_context_size + 1, ) # Image. image_list = [[] for _ in range(DatasetConfig.dialog_context_size + 1)] for idx, utter in enumerate(dialog): for img_id in utter.pos_images: path = self.image_paths[img_id] if path: path = join(DatasetConfig.image_data_directory, path) else: path = '' if path and isfile(path): try: raw_image = Image.open(path).convert("RGB") image = DatasetConfig.transform(raw_image) image_list[idx].append(image) except OSError: image_list[idx].append(Dataset.EMPTY_IMAGE) else: image_list[idx].append(Dataset.EMPTY_IMAGE) images = torch.stack(list(map(torch.stack, image_list))) # (dialog_context_size + 1, pos_images_max_num, # 3, image_size, image_size) # Utterance type. utter_type = dialog[-2].utter_type return texts, text_lengths, images, utter_type
def _get_images_product_texts(self, image_ids: List[int], num_products: int): """Get images and product texts of a response. Args: image_ids (List[int]): Image ids. num_products (int): Number of images (max images). Returns: num_products (int): Number of products (exclude padding). images: Images (num_products, 3, image_size, image_size). product_texts: Product texts (num_products, product_text_max_len). product_text_lengths: Product text lengths (num_products, ). """ images = [] product_texts = [] product_text_lengths = [] for img_id in image_ids: if img_id == 0: break image_name = self.image_paths[img_id] image_path = join(DatasetConfig.image_data_directory, image_name) product_path = get_product_path(image_name) # Image. raw_image = Image.open(image_path).convert("RGB") image = DatasetConfig.transform(raw_image) images.append(image) # Text. text = Dataset._get_product_text(product_path) text = [ self.dialog_vocab.get(word, UNK_ID) for word in word_tokenize(text) ] text, text_len = pad_or_clip_text( text, DatasetConfig.product_text_max_len) product_texts.append(text) product_text_lengths.append(text_len) # Padding. num_pads = (num_products - len(images)) images.extend([self.EMPTY_IMAGE] * num_pads) product_texts.extend([self.EMPTY_PRODUCT_TEXT] * num_pads) product_text_lengths.extend([1] * num_pads) # To tensors. num_products = len(images) images = torch.stack(images) product_texts = torch.stack(list(map(torch.tensor, product_texts))) product_text_lengths = torch.tensor(product_text_lengths) return num_products, images, product_texts, product_text_lengths
def generate_tidy_data_file(raw_data: RawData, task: int, mode: int): """Generate tidy data file. Args: raw_data (RawData): Raw data. task (int): A single task. mode (int): A single mode. """ # If item file already exists, then return and print a warning item_file_name: str = DatasetConfig.get_dialog_filename(task, mode) if isfile(item_file_name): print('Warning: Tidy data file {} exists.'.format(item_file_name)) return # Get raw data dialogs according to its mode. dialogs: List[Dialog] = None if mode == TRAIN_MODE: dialogs = raw_data.train_dialogs if mode == VALID_MODE: dialogs = raw_data.valid_dialogs if mode == TEST_MODE: dialogs = raw_data.test_dialogs assert dialogs is not None #if task & KNOWLEDGE_TASK: # ordinal_number = {raw_data.dialog_vocab[key]: value for key, value in # DatasetConfig.ordinal_number.items()} tidy_dialogs: List[TidyDialog] = [] for item_idx, dialog in enumerate(dialogs): print('Getting items from dialogs {}/{}'.format( item_idx + 1, len(dialogs))) # Get items according to different TASKS. if task == INTENTION_TASK: # Standardize dialog first. std_dialog: Dialog = standardized_dialog(dialog) tidy_dialogs.extend(get_intention_task_items(std_dialog)) elif task == TEXT_TASK: tidy_dialogs.extend(get_text_task_items(dialog)) elif task == RECOMMEND_TASK: tidy_dialogs.extend( get_recommend_task_items(raw_data.obj_id, dialog)) elif task == KNOWLEDGE_TASK: items = get_knowledge_items( dialog, #ordinal_number, KNOWLEDGE_TASK) tidy_dialogs.extend(items) # Save as pickle file. #print('Not saving for now') save_pkl(tidy_dialogs, 'tidy_dialogs', item_file_name)
def post_visualize_detections_job(dataset_name, run_name, confidence_threshold, coords): dataset_name = quote(dataset_name) run_name = quote(run_name) rc = RunConfig(dataset_name, run_name) dc = DatasetConfig(dataset_name) if rc.exists and dc.exists: cmd = [python_path, "visualize_detections.py", "--cmd=findvids", "--dataset={}".format(dataset_name), "--run={}".format(run_name), "--res={}".format(rc.get("detector_resolution")), "--conf={}".format(confidence_threshold), "--fps={}".format(dc.get('video_fps')), "--coords={}".format(coords)] job_id = jm.run(cmd, "visualize_detections") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def post_detections_to_world_coordinates_job(dataset_name, run_name, make_videos): dataset_name = quote(dataset_name) run_name = quote(run_name) rc = RunConfig(dataset_name, run_name) dc = DatasetConfig(dataset_name) if rc.exists and dc.exists: cmd = [ python_path, "detections_world.py", "--cmd=findvids", "--dataset={}".format(dataset_name), "--run={}".format(run_name), "--make_videos={}".format(make_videos), "--ssdres={}".format(rc.get("detector_resolution")), "--vidres={}".format(dc.get('video_resolution')), "--kltres={}".format(dc.get('point_track_resolution')) ] job_id = jm.run(cmd, "detections_to_world") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def post_rare_class_mining_job(dataset_name, class_name, confidence, time_distance, time_sampling, import_datasets="", epochs=75, resolution="(300,300,3)"): dataset_name = quote(dataset_name) class_name = quote(class_name) resolution = quote(resolution) dc = DatasetConfig(dataset_name) if dc.exists: cmd = [ python_path, "rare_class_mining.py", "--dataset={}".format(dataset_name), "--class_name={}".format(class_name), "--confidence={}".format(confidence), "--time_dist={}".format(time_distance), "--sampling_rate={}".format(time_sampling), "--epochs={}".format(epochs), "--input_shape={}".format(resolution), "--image_shape={}".format(dc.get('video_resolution')) ] if import_datasets: import_datasets = quote(import_datasets) cmd.append("--import_datasets={}".format(import_datasets)) job_id = jm.run(cmd, "rare_class_mining") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return ("Dataset does not exists or is not configured", 404)
def _enter_data(args): from data import load_data, load_openml core = _get_core(args) dataset_conf = DatasetConfig(args) if dataset_conf.openml is not None: df = load_openml(dataset_conf) elif dataset_conf.train_path is not None: df = load_data(dataset_conf.train_path) else: raise ValueError('Neither --openml nor --trainpath given.') class_column = dataset_conf.class_column dataset = core.add_dataset(df, class_column, depth=0, name=dataset_conf.name) return dataset.id
def post_tracking_world_coordinates_job(dataset_name, run_name, confidence_threshold, make_videos): dataset_name = quote(dataset_name) run_name = quote(run_name) rc = RunConfig(dataset_name, run_name) dc = DatasetConfig(dataset_name) if rc.exists and dc.exists: cmd = [python_path, "tracking_world.py", "--cmd=findvids", "--dataset={}".format(dataset_name), "--run={}".format(run_name), "--conf={}".format(confidence_threshold), "--make_videos={}".format(make_videos)] job_id = jm.run(cmd, "tracking_world_coordinates") if job_id: return (job_id, 202) else: return (NoContent, 503) else: return (NoContent, 404)
def post_optimize_tracking_world_coordinates_job( csv_ground_truth_file, dataset_name, run_name, date, detection_id, class_name_conversion, visualize, patience, per_iteration): dataset_name = quote(dataset_name) run_name = quote(run_name) rc = RunConfig(dataset_name, run_name) dc = DatasetConfig(dataset_name) if rc.exists and dc.exists: this_run_path = runs_path / "{dn}_{rn}".format(dn=dataset_name, rn=run_name) csv_path = this_run_path / "world_trajectory_gt.csv" try: gt = csv_ground_truth_file.decode('utf-8') except: return ("Could not parse .csv file as UTF-8", 400) else: with csv_path.open('w') as f: f.write(gt) cmd = [ python_path, "tracking_world_optimization.py", "--dataset={}".format(dataset_name), "--run={}".format(run_name), "--date={}".format(date), "--gt_csv={}".format(csv_path), "--det_id={}".format(detection_id), "--gt_class_name_conversion={}".format(class_name_conversion), "--visualize={}".format(visualize), "--patience={}".format(patience), "--per_iteration={}".format(per_iteration) ] job_id = jm.run(cmd, "optimize_tracking_world_coordinates") if job_id: return (job_id, 202) else: return (NoContent, 404) else: s = dataset_name + '_' + run_name return (s, 404)
def main(dataset, run, date, gt_csv, det_id, gt_class_name_conversion, visualize, patience, per_iteration): dc = DatasetConfig(dataset) vidres = dc.get('video_resolution') width, height, _ = vidres if width > height: stack_axis = 'v' else: stack_axis = 'h' if gt_class_name_conversion is None: print_flush("Not converting class names") else: print_flush("Using class conversion:") print_flush(gt_class_name_conversion) gt_class_name_conversion = json.loads(gt_class_name_conversion) assert (type(gt_class_name_conversion) == dict) print_flush("Interpreting ground truth...") gt = interpret_tracks_gt(dataset, date, det_id, gt_csv) print_flush("Optimizing...") config_min = { 'time_drop_thresh': 0.1, # in seconds 'time_region_check_thresh': 0.1, # in seconds 'creation_too_close_thresh': 1, # in meters 'is_too_close_thresh': { 'default': 0.2, 'bicycle_bicycle': 0.1 }, # in metres 'incorrect_class_cost': { 'default': 100, 'bicycle_person': 3, 'person_bicycle': 3 }, # unitless? Compared with WorldTrack.cost output 'cost_thresh': { 'default': 5, 'bicycle': 5 }, # unitless? Compared with WorldTrack.cost output 'mask_margin': 0, # in pixels, how close to the borders of the interesting region a track can be 'cost_dist_weight': 0.5, 'cost_dir_weight': 0.5, } config_max = { 'time_drop_thresh': 7.0, # in seconds 'time_region_check_thresh': 2.0, # in seconds 'creation_too_close_thresh': 10, # in meters 'is_too_close_thresh': { 'default': 3.0, 'bicycle_bicycle': 2.0 }, # in metres 'incorrect_class_cost': { 'default': 123456789123456789, 'bicycle_person': 30, 'person_bicycle': 30 }, # unitless? Compared with WorldTrack.cost output 'cost_thresh': { 'default': 25, 'bicycle': 35 }, # unitless? Compared with WorldTrack.cost output 'mask_margin': 15, # in pixels, how close to the borders of the interesting region a track can be 'cost_dist_weight': 2.0, 'cost_dir_weight': 2.0, } config_min, config_max = map(WorldTrackingConfig, (config_min, config_max)) base_path = runs_path / "{}_{}".format(dataset, run) plot_path = base_path / 'world_tracking_optimization.png' config, tracks = optimize_tracking(config_min, config_max, dataset, run, gt, gt_class_name_conversion, plot_path=plot_path, patience=patience, n=per_iteration) save(config, base_path / 'world_tracking_optimization.pklz') if visualize: print_flush("Visualizing...") visualize_tracks(base_path / 'world_tracking_optimization.mp4', dataset, gt, tracks, stack_axis=stack_axis) print_flush("Done!")
def visualize_tracks(outvidpath, dataset, gts, tracks=None, stack_axis='v'): import imageio as iio from visualize_tracking import _draw_world, draw_world from visualize import class_colors from apply_mask import Masker from config import DatasetConfig if not (tracks is None): calib = Calibration(dataset) # Reset IDs tracks = sorted(tracks, key=lambda x: x.history[0][0]) for track in tracks: track.id = i i += 1 dc = DatasetConfig(dataset) gts_by_vid = split_lambda(gts, lambda x: x[0]) assert (len(gts_by_vid) == 1) vid = list(gts_by_vid.keys())[0] n_colors = 50 colors = class_colors(n_colors) mask = Masker(dataset) with iio.get_writer(outvidpath, fps=dc.get('video_fps')) as outvid: with iio.get_reader(datasets_path / dataset / "videos" / (vid + '.mkv')) as invid: gt_by_frame = split_lambda(gts, lambda x: x[1]) fns = list(gt_by_frame.keys()) fns.sort() for fn in fns: gts_frame = gt_by_frame[fn] frame = invid.get_data(fn) frame = mask.mask(frame, alpha=0.5) if not (tracks is None): tracks_frame = frame.copy() for gt in gts_frame: vid, fn, t, x, y, i, c, px, py = gt text = "{} {}".format(c, i) col = colors[i % n_colors] frame = _draw_world(frame, text, px, py, col) if not (tracks is None): for track in tracks: draw_world(tracks_frame, track, fn, colors[track.id % n_colors], calib) if stack_axis == 'h': frame = np.hstack((frame, tracks_frame)) elif stack_axis == 'v': frame = np.vstack((frame, tracks_frame)) else: raise (ValueError( "Incorrect stack axis {}, try 'h' or 'v'".format( stack_axis))) outvid.append_data(frame)
def main(dataset, run, videos): # Note: This main function only works for world coordinate tracks! calib = Calibration(dataset) dc = DatasetConfig(dataset) masker = Masker(dataset) if videos == 'all': from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] elif videos.startswith('random:'): num = int(left_remove(videos, 'random:')) from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) all_video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] video_names = [] while len(video_names) < num: video_name = choice(all_video_names) if not video_name in video_names: video_names.append(video_name) # Just in case user wants more videos than there are if len(video_names) == len(all_video_names): break else: # Assumes the user types one or more videos, separated by commas with no spaces video_names = videos.split(',') # In case user includes endings video_names = [right_remove(x.rstrip, '.mkv') for x in video_names] # In case user includes spaces video_names = [x.strip(' ') for x in video_names] print_flush("Chosen videos: ") print_flush(str(video_names)) for video_name in video_names: print_flush(video_name) print_flush("Loading...") tracks = load('{rp}{ds}_{r}/tracks_world/{v}_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run, v=video_name)) vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=video_name) if not isfile(vidpath): raise (ValueError("Incorrect input {}".format(videos))) outvidpath = '{rp}{ds}_{r}/tracks_world/{v}_tracks.mp4'.format( rp=runs_path, ds=dataset, r=run, v=video_name) print_flush("Rendering...") render_video(tracks, vidpath, outvidpath, mask=masker, id_mode="global", calib=calib, fps=dc.get('video_fps')) print_flush("Done!")
import pandas as pd import numpy as np from config import DatasetConfig from data import load_openml from database import Database from metafeatures import MetaFeatures from utilities import logloss, multiclass_roc_auc_score logging.basicConfig(level=logging.DEBUG) """Database Credentials""" # db = Database('sqlite', 'ml-base.db') db = Database('postgres', 'april_dump', 'postgres', 'postgres', '192.168.50.4', 5432) """Load new dataset from OpenML, CSV or ARFF""" config = DatasetConfig({'openml': 1510, 'train_path': None}) df = load_openml(config) # df = pd.read_csv('') # with open('amldata.arff') as f: # df = a2p.load(f) """Set class column to be predicted""" class_column = 'Class' """Calculate Meta-Features for new Dataset""" mf, success = MetaFeatures().calculate(df=df, class_column=class_column) """Transform Meta-Features into a Vector""" new_mf_vec = pd.DataFrame(mf, index=['i']).to_numpy() """Save all Datasets from DB in a DataFrame""" engine = db.engine with engine.connect() as conn: datasets = pd.read_sql_query('''SELECT id, nr_inst, nr_attr, nr_class, nr_missing_values, pct_missing_values, nr_inst_mv, pct_inst_mv, nr_attr_mv,
def get_progress(dataset_name, run_name): dataset_name = quote(dataset_name) run_name = quote(run_name) ds_path = datasets_path / dataset_name if ds_path.is_dir(): progress = dict() progress['has_config'] = (ds_path / 'config.txt').is_file() if progress['has_config']: dc = DatasetConfig(dataset_name) progress['has_mask'] = (ds_path / 'mask.png').is_file() progress['has_classnames'] = (ds_path / 'classes.txt').is_file() progress['has_calibration'] = (ds_path / 'calib.tacal').is_file() progress['number_of_timestamp_logs'] = len( list((ds_path / "logs").glob('*.log'))) progress['number_of_videos'] = len( list((ds_path / 'videos').glob('*.mkv'))) progress['training_frames_to_annotate'] = len( list((ds_path / "objects" / "train").glob('*/*.jpg'))) progress['training_frames_annotated'] = len( list((ds_path / "objects" / "train").glob('*/*.txt'))) progress['videos_with_point_tracks_computed'] = len( list((ds_path / "klt").glob('*.pklz'))) progress['videos_with_point_tracks_visualized'] = len( list((ds_path / "klt").glob('*.mp4'))) progress['all_runs'] = [ x.stem.split('_')[-1] for x in runs_path.glob(dataset_name + '_*') ] run_path = runs_path / (dataset_name + '_' + run_name) if run_path.is_dir(): progress['has_this_run'] = True rprogress = dict() rprogress['has_pretrained_weights'] = ( ssd_path / 'weights_SSD300.hdf5').is_file() rprogress['videos_with_detected_objects'] = len( list(run_path.glob('csv/*.csv'))) rprogress['videos_with_detected_objects_visualized'] = len( list(run_path.glob('detections/*.mp4'))) rprogress[ 'videos_with_detected_objects_in_world_coordinates'] = len( list(run_path.glob('detections_world/*.csv'))) rprogress[ 'videos_with_detected_objects_in_world_coordinates_visualized'] = len( list(run_path.glob('detections_world/*.mp4'))) rprogress['stored_weight_files'] = len( list(run_path.glob('checkpoints/*.hdf5'))) rprogress['videos_with_pixel_coordinate_tracks'] = len( list(run_path.glob('tracks/*.pklz'))) rprogress['videos_with_pixel_coordinate_tracks_visualized'] = len( list(run_path.glob('tracks/*.mp4'))) rprogress['videos_with_world_coordinate_tracks'] = len( list(run_path.glob('tracks_world/*.pklz'))) rprogress['videos_with_world_coordinate_tracks_visualized'] = len( list(run_path.glob('tracks_world/*.mp4'))) rprogress['has_optimized_world_tracking'] = ( run_path / 'world_tracking_optimization.pklz').is_file() rprogress['has_visualized_optimized_world_tracking'] = ( run_path / 'world_tracking_optimization.mp4').is_file() rprogress['has_world_tracking_ground_truth'] = ( run_path / 'world_trajectory_gt.csv').is_file() rprogress['track_zips'] = [ x.name for x in run_path.glob('track_zips/*.zip') ] all_progress = {'dataset': progress, 'run': rprogress} else: progress['has_this_run'] = False all_progress = {'dataset': progress} return (all_progress, 200) else: return ("Dataset does not exist", 404)
def main(dataset, times, images_per_time, interval): times = times.strip( "'" ) # These are added around it by the quote function in server.py, to make sure it is a single argument instead of being split by the spaces ts = Timestamps(dataset) dc = DatasetConfig(dataset) fps = dc.get('video_fps') half_interval = int((fps * interval) / 2) # in frames timestrings = times.split(',') for timestring in timestrings: print_flush(timestring) # Intepret the requested times, can look like '2017-05-16 00:49:04.954000' splot = timestring.split(' ') date = splot[0].split('-') time = splot[1].replace('.', ':').split(':') year, month, day = map(int, date) hour, minute, second, microsecond = map(int, time) timestamp = datetime(year, month, day, hour, minute, second, microsecond) vid_name, frame_num = ts.get_frame_number(timestamp) print_flush("Time found to be {}, frame {}".format( vid_name, frame_num)) if vid_name is None: raise (ValueError( "This timestamp was incorrect: {} Could it be before the first video?" .format(timestring))) video_path = datasets_path / dataset / "videos" / (vid_name + '.mkv') annot_folder = datasets_path / dataset / "objects" / "train" / vid_name log_path = annot_folder / 'frames.log' if not log_path.is_file(): with log_path.open('w') as f: f.write("{}.mkv\n".format(vid_name)) # See which frames were already annotated, to start at the right index already_ims = list(annot_folder.glob('*.jpg')) if already_ims: already_nums = [int(x.stem) for x in already_ims] i = max(already_nums) + 1 else: i = 1 with iio.get_reader(video_path) as vid: # Find start and end time, in frames start = frame_num - half_interval if start < 0: start = 0 stop = frame_num + half_interval if stop >= len(vid): stop = len(vid) - 1 with open(log_path, 'a') as log: # Choose frames to extract frame_nums = np.linspace(start, stop, images_per_time).astype(int).tolist() frame_nums = sorted(list(set(frame_nums))) # Remove duplicates for frame_num in frame_nums: frame = vid.get_data(frame_num) log.write("{} ".format(frame_num)) impath = annot_folder / "{}.jpg".format(i) imsave(impath, frame) i += 1 print_flush("> Written {}".format(impath)) print_flush("Done!")
def main(dataset, run, n_clips, clip_length): dc = DatasetConfig(dataset) rc = RunConfig(dataset, run) mask = Masker(dataset) classes = get_classnames(dataset) num_classes = len(classes) + 1 calib = Calibration(dataset) dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset) run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run) # Grab a bunch of videos vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path) all_vids = glob(vids_query) all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids] all_vids.sort() vids = [] if n_clips > len(all_vids): n_clips = len(all_vids) if n_clips == len(all_vids): vids = all_vids else: while len(vids) < n_clips: vid = choice(all_vids) if not vid in vids: vids.append(vid) print_flush(vids) # Find out what has been run on all of these videos, what to include include_klt = True include_pixeldets = True include_worlddets = True include_worldtracks = True klts = [] pixeldets = [] worlddets = [] worldtracks = [] # Point tracks need to be converted for faster access vidres = dc.get('video_resolution') kltres = dc.get('point_track_resolution') class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] ssdres = rc.get('detector_resolution') x_scale = vidres[0] / ssdres[0] y_scale = vidres[1] / ssdres[1] colors = class_colors(num_classes) for vid in vids: f = get_klt_path(dataset_path, vid) if not isfile(f): include_klt = False else: klt = load(f) klt, klt_frames = convert_klt(klt, klt_config) pts = (klt, klt_frames, class_colors(n_cols_klts)) klts.append(pts) f = get_pixeldet_path(run_path, vid) if not isfile(f): include_pixeldets = False else: dets = pd.read_csv(f) pixeldets.append((dets, colors, x_scale, y_scale)) f = get_worlddet_path(run_path, vid) if not isfile(f): include_worlddets = False else: dets = pd.read_csv(f) worlddets.append((dets, colors, calib)) f = get_worldtracks_path(run_path, vid) if not isfile(f): include_worldtracks = False else: tracks = load(f) worldtracks.append((tracks, class_colors(n_cols_tracks), calib)) print_flush("Point tracks: {}".format(include_klt)) print_flush("Pixel coordinate detections: {}".format(include_pixeldets)) print_flush("World coordinate detections: {}".format(include_worlddets)) print_flush("World coordinate tracks: {}".format(include_worldtracks)) # Decide where to start and stop in the videos clip_length = clip_length * dc.get( 'video_fps') # convert from seconds to frames print_flush("Clip length in frames: {}".format(clip_length)) clips = [] for vid in vids: start, stop = make_clip(vid, clip_length, dataset_path) clips.append((start, stop)) incs = [ include_klt, include_pixeldets, include_worlddets, include_worldtracks ] funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame] dats = [klts, pixeldets, worlddets, worldtracks] nams = [ "Point tracks", "Detections in pixel coordinates", "Detections in world coordinates", "Tracks in world coordinates" ] print_flush(clips) with iio.get_writer("{trp}summary.mp4".format(trp=run_path), fps=dc.get('video_fps')) as outvid: for i_vid, vid in enumerate(vids): print_flush(vid) old_prog = 0 with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path, v=vid)) as invid: start, stop = clips[i_vid] for i_frame in range(start, stop): frame = invid.get_data(i_frame) pieces = [] for inc, fun, dat, nam in zip(incs, funs, dats, nams): if inc: piece = fun(dat[i_vid], mask.mask(frame.copy(), alpha=0.5), i_frame) draw_text(piece, vid, i_frame, nam) pieces.append(piece) outvid.append_data(join(pieces)) prog = float(i_frame - start) / (stop - start) if prog - old_prog > 0.1: print_flush("{}%".format(round(prog * 100))) old_prog = prog print_flush("Done!")
def _get_parser(): logging_args = argparse.ArgumentParser(add_help=False) logging_args.add_argument('-v', '--verbose', action='count', default=0) logging_args.add_argument('-l', '--logfile') parser = argparse.ArgumentParser(description='Meta-Learning Base Command Line Interface', parents=[logging_args]) subparsers = parser.add_subparsers(title='action', help='Action to perform') parser.set_defaults(action=None) # Common Arguments sql_args = SQLConfig.get_parser() s3_args = S3Config.get_parser() log_args = LogConfig.get_parser() dataset_args = DatasetConfig.get_parser() generic_args = GenericConfig.get_parser() # Enter Data Parser enter_data_parents = [ logging_args, sql_args, s3_args, dataset_args, log_args, generic_args ] enter_data = subparsers.add_parser('enter_data', parents=enter_data_parents, help='Add a Dataset and trigger a Datarun on it.') enter_data.set_defaults(action=_enter_data) # Worker Args worker_args = argparse.ArgumentParser(add_help=False) worker_args.add_argument('--cloud-mode', action='store_true', default=False, help='Whether to run this worker in cloud mode') worker_args.add_argument('--no-save', dest='save_files', action='store_false', help="don't save models and metrics at all") computation_args = WorkerConfig.get_parser() # Worker worker_parents = [ logging_args, worker_args, sql_args, s3_args, log_args, generic_args, computation_args ] worker = subparsers.add_parser('worker', parents=worker_parents, help='Start a single worker in foreground.') worker.set_defaults(action=_work) worker.add_argument('--datasets', help='Only train on datasets with these ids', nargs='+') worker.add_argument('--total-time', help='Number of seconds to run worker', type=int) # Export Data export_data = [ logging_args, log_args, sql_args, generic_args ] export_pipelines = subparsers.add_parser('export_pipelines', parents=export_data, help='Export all data sets and algorithms to a DataFrame') export_pipelines.set_defaults(action=_export_pipelines) export_datasets = subparsers.add_parser('export_datasets', parents=export_data, help='Export all data sets and algorithms to a DataFrame') export_datasets.set_defaults(action=_export_datasets) return parser