Python DatasetConfig Examples, config.DatasetConfig Python Examples

Example #1

0

Show file

File: server.py Project: CasperMariager/strudl

def post_import_videos_job(dataset_name, path, method, logs_path=None, minutes=0):
    dataset_name = quote(dataset_name)

    if logs_path is None:
        logs_path = path
        # Since 'path' probably contains a query, like ending with '*.mkv', this should be removed
        if not (logs_path[-1] == '/'):
            logs_path = right_remove(logs_path, logs_path.split('/')[-1])

    dc = DatasetConfig(dataset_name)
    
    if dc.exists:
        resolution = dc.get('video_resolution')
        fps = dc.get('video_fps')
        cmd = [python_path, "import_videos.py",
               "--query={}".format(path),
               "--dataset={}".format(dataset_name),
               "--resolution={}".format(resolution),
               "--method={}".format(method),
               "--fps={}".format(fps),
               "--logs={}".format(logs_path),
               "--minutes={}".format(minutes)]
       
        job_id = jm.run(cmd, "import_videos")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)       
               
    else:
        return (NoContent, 404)

Example #2

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_autoannotate_job(dataset_name,
                          import_datasets="",
                          epochs=75,
                          resolution="(640,480,3)"):
    dataset_name = quote(dataset_name)
    resolution = quote(resolution)

    dc = DatasetConfig(dataset_name)
    if dc.exists:
        cmd = [
            python_path, "autoannotate.py",
            "--dataset={}".format(dataset_name),
            "--input_shape={}".format(resolution),
            "--image_shape={}".format(dc.get('video_resolution')),
            "--epochs={}".format(epochs)
        ]

        if import_datasets:
            import_datasets = quote(import_datasets)
            cmd.append("--import_datasets={}".format(import_datasets))

        job_id = jm.run(cmd, "autoannotate")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #3

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_train_detector_job(dataset_name,
                            run_name,
                            epochs,
                            import_datasets=""):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)
    rc = RunConfig(dataset_name, run_name)
    dc = DatasetConfig(dataset_name)
    if rc.exists and dc.exists:
        cmd = [
            python_path, "training_script.py",
            "--name={}".format(dataset_name),
            "--experiment={}".format(run_name),
            "--input_shape={}".format(rc.get('detector_resolution')),
            "--train_data_dir=fjlfbwjefrlbwelrfb_man_we_need_a_better_detector_codebase",
            "--batch_size={}".format(rc.get('detection_training_batch_size')),
            "--image_shape={}".format(dc.get('video_resolution')),
            "--epochs={}".format(epochs)
        ]

        if import_datasets:
            import_datasets = quote(import_datasets)
            cmd.append("--import_datasets={}".format(import_datasets))

        job_id = jm.run(cmd, "train_detector")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #4

0

Show file

File: server.py Project: maxencecarrel/strudl

def get_dataset_config(dataset_name):
    dataset_name = quote(dataset_name)
    dc = DatasetConfig(dataset_name)
    if dc.exists:
        return dc.get_data()
    else:
        return (NoContent, 404)

Example #5

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_point_tracks_job(dataset_name, visualize, overwrite):
    assert (type(visualize) == bool)
    assert (type(overwrite) == bool)

    cmd = "findvids"
    if not overwrite:
        cmd = "continue"

    dataset_name = quote(dataset_name)
    dc = DatasetConfig(dataset_name)
    if dc.exists:
        cmd = [
            python_path, "klt.py", "--cmd={}".format(cmd),
            "--dataset={}".format(dataset_name),
            "--imsize={}".format(dc.get('point_track_resolution')),
            "--visualize={}".format(visualize)
        ]

        job_id = jm.run(cmd, "point_tracks")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)

    else:
        return (NoContent, 404)

Example #6

0

Show file

File: tracking_world.py Project: maxencecarrel/strudl

def main(cmd, dataset, run, conf, make_videos):   
    from pathlib import Path
    
    if make_videos:
        from visualize_tracking import render_video
        from config import DatasetConfig
        from apply_mask import Masker
        
        mask = Masker(dataset)
        dc = DatasetConfig(dataset)
        
    config_path = runs_path / "{}_{}".format(dataset,run) / "world_tracking_optimization.pklz"
    if config_path.is_file():
        config = load(config_path)
    else:
        #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path)))
        config = WorldTrackingConfig(default_config)
    
    calib = Calibration(dataset)    
    munkres = Munkres()
    ts = Timestamps(dataset)
    
    start_stop = None
    
    if cmd == "findvids":
        vidnames = (datasets_path / dataset / "videos").glob('*.mkv')
        vidnames = [x.stem for x in vidnames]
        vidnames.sort()
        
        outfolder = runs_path / "{}_{}".format(dataset,run) / "tracks_world"
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = Path('./')
        start_stop = (0,500)
            
    for v in vidnames:
        print_flush(v) 
        out_path = outfolder / (v+'_tracks.pklz')   
        
        print_flush("Loading data...")
        det_path = runs_path / "{}_{}".format(dataset,run) / "detections_world" / (v+'_world.csv')
        detections3D = pd.read_csv(det_path)
        
        klt_path = det_path.with_name(det_path.stem + '_klt.pklz')
        klts = load(klt_path)
        
        print_flush("Tracking...")
        tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop)
        
        print_flush("Saving tracks...")
        save(tracks, out_path)
        
        if make_videos:            
            vidpath = datasets_path / dataset / "videos" / (v+'.mkv')
            print_flush("Rendering video...")
            render_video(tracks, vidpath, out_path.with_suffix('.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps'))

    print_flush("Done!")

Example #7

0

Show file

File: tracking_world.py Project: CasperMariager/strudl

def main(cmd, dataset, run, conf, make_videos):   
    if make_videos:
        from visualize_tracking import render_video
        from config import DatasetConfig
        from apply_mask import Masker
        
        mask = Masker(dataset)
        dc = DatasetConfig(dataset)
        
    config_path = "{rp}{ds}_{rn}/world_tracking_optimization.pklz".format(rp=runs_path, ds=dataset, rn=run)
    if isfile(config_path):
        config = load(config_path)
    else:
        #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path)))
        config = WorldTrackingConfig(default_config)
    
    calib = Calibration(dataset)    
    munkres = Munkres()
    ts = Timestamps(dataset)
    
    start_stop = None
    
    if cmd == "findvids":
        from glob import glob
        vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset))
        vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames]
        vidnames.sort()
        
        outfolder = '{}{}_{}/tracks_world/'.format(runs_path, dataset, run)
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = './'
        start_stop = (0,500)
            
    for v in vidnames:
        print_flush(v)    
        out_path = "{of}{v}_tracks.pklz".format(of=outfolder, v=v)
        
        print_flush("Loading data...")
        det_path = "{rp}{ds}_{rn}/detections_world/{v}_world.csv".format(rp=runs_path, ds=dataset, rn=run, v=v)
        detections3D = pd.read_csv(det_path)
        
        klt_path = det_path.replace('.csv', '_klt.pklz')
        klts = load(klt_path)
        
        print_flush("Tracking...")
        tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop)
        
        print_flush("Saving tracks...")
        save(tracks, out_path)
        
        if make_videos:

            vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v)
            print_flush("Rendering video...")
            render_video(tracks, vidpath, out_path.replace('.pklz','.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps'))

    print_flush("Done!")

Example #8

0

Show file

File: server.py Project: CasperMariager/strudl

def post_dataset_config(dataset_name, dataset_config):
    dataset_name = quote(dataset_name)
    dc = DatasetConfig(dataset_name)
    if dc.set_data(dataset_config):
        
        dc.save()
        return (NoContent, 200)
    else:
        return ("Could not interpret dataset configuration. Is some required parameter missing? Is video resolution divisible by 16?", 500)

Example #9

0

Show file

def get_dataset(id):
    ds = openml.datasets.get_dataset(int(eval(id)))
    X, y, categorical_indicator, attribute_names = ds.get_data(
        dataset_format='dataframe',
        target=ds.default_target_attribute
    )
    if ds.qualities['NumberOfMissingValues'] > 100000 or X.shape[1] > 500000:
        return None, None, None
    dataset_conf = DatasetConfig({'openml': id, 'train_path': None})
    dataset_conf.format = ds.format
    dataset_conf.class_column = ds.default_target_attribute
    dataset_conf.name = '{}_{}_{}'.format(ds.name, int(eval(id)), time.time())
    return X, y, dataset_conf

Example #10

0

Show file

def load_openml(dataset_conf: DatasetConfig) -> pd.DataFrame:
    LOGGER.info("Loading openml dataset {}".format(dataset_conf.openml))
    ds = openml.datasets.get_dataset(dataset_conf.openml)
    X, y, categorical_indicator, attribute_names = ds.get_data(
        dataset_format='dataframe', target=ds.default_target_attribute)
    df = pd.concat([X, y], axis=1)

    # Fix configuration
    dataset_conf.format = ds.format
    dataset_conf.class_column = ds.default_target_attribute
    dataset_conf.name = '{}_{}_{}'.format(ds.name, dataset_conf.openml,
                                          time.time())

    return df

Example #11

0

Show file

File: server.py Project: maxencecarrel/strudl

def get_annotation_slideshow(dataset_name):
    dataset_name = quote(dataset_name)
    dc = DatasetConfig(dataset_name)

    if dc.exists:
        imsize = dc.get('video_resolution')
        outpath = datasets_path / dataset_name / "slideshow.mp4"
        res = slideshow(dataset_name, outpath)

        if not res:
            return ("Failed to make slideshow", 404)
        else:
            vid = send_file(str(outpath), mimetype='video/mp4')
            return (vid, 200)
    else:
        return ("Dataset does not exist", 404)

Example #12

0

Show file

File: server.py Project: CasperMariager/strudl

def get_progress(dataset_name, run_name):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)
    
    ds_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset_name)
    
    if isdir(ds_path):
        progress = dict()
        
        progress['has_config'] = isfile(ds_path + 'config.txt')
        if progress['has_config']: 
            dc = DatasetConfig(dataset_name)
            
        progress['has_mask'] = isfile(ds_path + 'mask.png')
        progress['has_classnames'] = isfile(ds_path + 'classes.txt')
        progress['has_calibration'] = isfile(ds_path + 'calib.tacal')
        
        progress['number_of_timestamp_logs'] = len(glob(ds_path + 'logs/*.log'))
        progress['number_of_videos'] = len(glob(ds_path + 'videos/*.mkv'))
        progress['training_frames_to_annotate'] = len(glob(ds_path + 'objects/train/*/*.jpg'))
        progress['training_frames_annotated'] = len(glob(ds_path + 'objects/train/*/*.txt'))
        
        progress['videos_with_point_tracks_computed'] = len(glob(ds_path + 'klt/*.pklz'))
        progress['videos_with_point_tracks_visualized'] = len(glob(ds_path + 'klt/*.mp4'))
        
        progress['all_runs'] = [x.split('/')[-1].split('_')[-1] for x in glob("{rp}{ds}_*".format(rp=runs_path, ds=dataset_name))]
        
        run_path = "{rp}{ds}_{rn}/".format(rp=runs_path, ds=dataset_name, rn=run_name)
        if isdir(run_path):
            progress['has_this_run'] = True
            
            rprogress = dict()
            rprogress['has_pretrained_weights'] = isfile(ssd_path + '/weights_SSD300.hdf5')
            rprogress['videos_with_detected_objects'] = len(glob(run_path + 'csv/*.csv'))
            rprogress['videos_with_detected_objects_visualized'] = len(glob(run_path + 'detections/*.mp4'))
            rprogress['videos_with_detected_objects_in_world_coordinates'] = len(glob(run_path + 'detections_world/*.csv'))
            rprogress['videos_with_detected_objects_in_world_coordinates_visualized'] = len(glob(run_path + 'detections_world/*.mp4'))
            rprogress['stored_weight_files'] = len(glob(run_path + 'checkpoints/*.hdf5'))
            rprogress['videos_with_pixel_coordinate_tracks'] = len(glob(run_path + 'tracks/*.pklz'))
            rprogress['videos_with_pixel_coordinate_tracks_visualized'] = len(glob(run_path + 'tracks/*.mp4'))
            rprogress['videos_with_world_coordinate_tracks'] = len(glob(run_path + 'tracks_world/*.pklz'))
            rprogress['videos_with_world_coordinate_tracks_visualized'] = len(glob(run_path + 'tracks_world/*.mp4'))
            rprogress['has_optimized_world_tracking'] = isfile(run_path + 'world_tracking_optimization.pklz')
            rprogress['has_visualized_optimized_world_tracking'] = isfile(run_path + 'world_tracking_optimization.mp4')
            rprogress['has_world_tracking_ground_truth'] = isfile(run_path + 'world_trajectory_gt.csv')
            rprogress['track_zips'] = [x.split('/')[-1] for x in glob(run_path + 'track_zips/*.zip')]
            
            all_progress = {'dataset': progress, 'run': rprogress}
        else:
            progress['has_this_run'] = False    
            all_progress = {'dataset': progress}
        
        return (all_progress, 200)
    else:
        return ("Dataset does not exist", 404)

Example #13

0

Show file

File: server.py Project: CasperMariager/strudl

def post_prepare_annotations_job(dataset_name, less_night=True):
    assert(type(less_night) == bool)
    dataset_name = quote(dataset_name)
    dc = DatasetConfig(dataset_name)
    
    if dc.exists:
        cmd = [python_path, "annotation_preparation.py",
               "--dataset={}".format(dataset_name),
               "--num_ims={}".format(dc.get('images_to_annotate')),
               "--ims_per_vid={}".format(dc.get('images_to_annotate_per_video')),
               "--train_amount={}".format(dc.get('annotation_train_split')),
               "--night={}".format(less_night)]
        
        job_id = jm.run(cmd, "prepare_annotations")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #14

0

Show file

File: dataset.py Project: chorseng/MAGIC

    def _get_context_dialog(self, dialog: TidyDialog):
        """Get context dialog.

        Note: The last utterance of the context dialog is system response.

        Args:
            dialog (TidyDialog): Dialog.

        Returns:
            texts: Texts (dialog_context_size + 1, dialog_text_max_len).
            text_lengths: Text lengths (dialog_context_size + 1, ).
            images: Images (dialog_context_size + 1, pos_images_max_num, 3,
                           image_size, image_size).
            utter_type (int): The type of the last user utterance.

        """
        # Text.
        text_list: List[List[int]] = [utter.text for utter in dialog]
        text_length_list: List[int] = [utter.text_len for utter in dialog]

        # Text tensors.
        texts = torch.stack(tuple([torch.tensor(text) for text in text_list]))
        # (dialog_context_size + 1, dialog_text_max_len)
        text_lengths = torch.tensor(text_length_list)
        # (dialog_context_size + 1, )

        # Image.
        image_list = [[] for _ in range(DatasetConfig.dialog_context_size + 1)]

        for idx, utter in enumerate(dialog):
            for img_id in utter.pos_images:
                path = self.image_paths[img_id]
                if path:
                    path = join(DatasetConfig.image_data_directory, path)
                else:
                    path = ''
                if path and isfile(path):
                    try:
                        raw_image = Image.open(path).convert("RGB")
                        image = DatasetConfig.transform(raw_image)
                        image_list[idx].append(image)
                    except OSError:
                        image_list[idx].append(Dataset.EMPTY_IMAGE)
                else:
                    image_list[idx].append(Dataset.EMPTY_IMAGE)

        images = torch.stack(list(map(torch.stack, image_list)))
        # (dialog_context_size + 1, pos_images_max_num,
        # 3, image_size, image_size)

        # Utterance type.
        utter_type = dialog[-2].utter_type
        return texts, text_lengths, images, utter_type

Example #15

0

Show file

File: dataset.py Project: chorseng/MAGIC

    def _get_images_product_texts(self, image_ids: List[int],
                                  num_products: int):
        """Get images and product texts of a response.

        Args:
            image_ids (List[int]): Image ids.
            num_products (int): Number of images (max images).

        Returns:
            num_products (int): Number of products (exclude padding).
            images: Images (num_products, 3, image_size, image_size).
            product_texts: Product texts (num_products, product_text_max_len).
            product_text_lengths: Product text lengths (num_products, ).

        """
        images = []
        product_texts = []
        product_text_lengths = []
        for img_id in image_ids:
            if img_id == 0:
                break
            image_name = self.image_paths[img_id]
            image_path = join(DatasetConfig.image_data_directory, image_name)
            product_path = get_product_path(image_name)

            # Image.
            raw_image = Image.open(image_path).convert("RGB")
            image = DatasetConfig.transform(raw_image)
            images.append(image)

            # Text.
            text = Dataset._get_product_text(product_path)
            text = [
                self.dialog_vocab.get(word, UNK_ID)
                for word in word_tokenize(text)
            ]
            text, text_len = pad_or_clip_text(
                text, DatasetConfig.product_text_max_len)
            product_texts.append(text)
            product_text_lengths.append(text_len)

        # Padding.
        num_pads = (num_products - len(images))
        images.extend([self.EMPTY_IMAGE] * num_pads)
        product_texts.extend([self.EMPTY_PRODUCT_TEXT] * num_pads)
        product_text_lengths.extend([1] * num_pads)

        # To tensors.
        num_products = len(images)
        images = torch.stack(images)
        product_texts = torch.stack(list(map(torch.tensor, product_texts)))
        product_text_lengths = torch.tensor(product_text_lengths)
        return num_products, images, product_texts, product_text_lengths

Example #16

0

Show file

File: tidy_data.py Project: chorseng/magic_simmc

def generate_tidy_data_file(raw_data: RawData, task: int, mode: int):
    """Generate tidy data file.
    Args:
        raw_data (RawData): Raw data.
        task (int): A single task.
        mode (int): A single mode.
    """

    # If item file already exists, then return and print a warning
    item_file_name: str = DatasetConfig.get_dialog_filename(task, mode)
    if isfile(item_file_name):
        print('Warning: Tidy data file {} exists.'.format(item_file_name))
        return

    # Get raw data dialogs according to its mode.
    dialogs: List[Dialog] = None
    if mode == TRAIN_MODE:
        dialogs = raw_data.train_dialogs
    if mode == VALID_MODE:
        dialogs = raw_data.valid_dialogs
    if mode == TEST_MODE:
        dialogs = raw_data.test_dialogs
    assert dialogs is not None

    #if task & KNOWLEDGE_TASK:
    #    ordinal_number = {raw_data.dialog_vocab[key]: value for key, value in
    #                      DatasetConfig.ordinal_number.items()}

    tidy_dialogs: List[TidyDialog] = []
    for item_idx, dialog in enumerate(dialogs):
        print('Getting items from dialogs {}/{}'.format(
            item_idx + 1, len(dialogs)))

        # Get items according to different TASKS.
        if task == INTENTION_TASK:
            # Standardize dialog first.
            std_dialog: Dialog = standardized_dialog(dialog)
            tidy_dialogs.extend(get_intention_task_items(std_dialog))
        elif task == TEXT_TASK:
            tidy_dialogs.extend(get_text_task_items(dialog))
        elif task == RECOMMEND_TASK:
            tidy_dialogs.extend(
                get_recommend_task_items(raw_data.obj_id, dialog))
        elif task == KNOWLEDGE_TASK:
            items = get_knowledge_items(
                dialog,  #ordinal_number,
                KNOWLEDGE_TASK)
            tidy_dialogs.extend(items)

    # Save as pickle file.
    #print('Not saving for now')
    save_pkl(tidy_dialogs, 'tidy_dialogs', item_file_name)

Example #17

0

Show file

File: server.py Project: CasperMariager/strudl

def post_visualize_detections_job(dataset_name, run_name, confidence_threshold, coords):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)
    rc = RunConfig(dataset_name, run_name)
    dc = DatasetConfig(dataset_name)
    if rc.exists and dc.exists:
        cmd = [python_path, "visualize_detections.py",
               "--cmd=findvids",
               "--dataset={}".format(dataset_name),
               "--run={}".format(run_name),
               "--res={}".format(rc.get("detector_resolution")),
               "--conf={}".format(confidence_threshold),
               "--fps={}".format(dc.get('video_fps')),
               "--coords={}".format(coords)]

        job_id = jm.run(cmd, "visualize_detections")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #18

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_detections_to_world_coordinates_job(dataset_name, run_name,
                                             make_videos):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)
    rc = RunConfig(dataset_name, run_name)
    dc = DatasetConfig(dataset_name)
    if rc.exists and dc.exists:
        cmd = [
            python_path, "detections_world.py", "--cmd=findvids",
            "--dataset={}".format(dataset_name), "--run={}".format(run_name),
            "--make_videos={}".format(make_videos),
            "--ssdres={}".format(rc.get("detector_resolution")),
            "--vidres={}".format(dc.get('video_resolution')),
            "--kltres={}".format(dc.get('point_track_resolution'))
        ]

        job_id = jm.run(cmd, "detections_to_world")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #19

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_rare_class_mining_job(dataset_name,
                               class_name,
                               confidence,
                               time_distance,
                               time_sampling,
                               import_datasets="",
                               epochs=75,
                               resolution="(300,300,3)"):
    dataset_name = quote(dataset_name)
    class_name = quote(class_name)
    resolution = quote(resolution)

    dc = DatasetConfig(dataset_name)
    if dc.exists:
        cmd = [
            python_path, "rare_class_mining.py",
            "--dataset={}".format(dataset_name),
            "--class_name={}".format(class_name),
            "--confidence={}".format(confidence),
            "--time_dist={}".format(time_distance),
            "--sampling_rate={}".format(time_sampling),
            "--epochs={}".format(epochs),
            "--input_shape={}".format(resolution),
            "--image_shape={}".format(dc.get('video_resolution'))
        ]

        if import_datasets:
            import_datasets = quote(import_datasets)
            cmd.append("--import_datasets={}".format(import_datasets))

        job_id = jm.run(cmd, "rare_class_mining")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return ("Dataset does not exists or is not configured", 404)

Example #20

0

Show file

File: cli.py Project: Ennosigaeon/meta-learning-base

def _enter_data(args):
    from data import load_data, load_openml

    core = _get_core(args)
    dataset_conf = DatasetConfig(args)

    if dataset_conf.openml is not None:
        df = load_openml(dataset_conf)
    elif dataset_conf.train_path is not None:
        df = load_data(dataset_conf.train_path)
    else:
        raise ValueError('Neither --openml nor --trainpath given.')

    class_column = dataset_conf.class_column

    dataset = core.add_dataset(df, class_column, depth=0, name=dataset_conf.name)

    return dataset.id

Example #21

0

Show file

File: server.py Project: CasperMariager/strudl

def post_tracking_world_coordinates_job(dataset_name, run_name, confidence_threshold, make_videos):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)
    rc = RunConfig(dataset_name, run_name)
    dc = DatasetConfig(dataset_name)
    if rc.exists and dc.exists:
        cmd = [python_path, "tracking_world.py",
               "--cmd=findvids",
               "--dataset={}".format(dataset_name),
               "--run={}".format(run_name),
               "--conf={}".format(confidence_threshold),
               "--make_videos={}".format(make_videos)]

        job_id = jm.run(cmd, "tracking_world_coordinates")
        if job_id:
            return (job_id, 202)
        else:
            return (NoContent, 503)
    else:
        return (NoContent, 404)

Example #22

0

Show file

File: server.py Project: maxencecarrel/strudl

def post_optimize_tracking_world_coordinates_job(
        csv_ground_truth_file, dataset_name, run_name, date, detection_id,
        class_name_conversion, visualize, patience, per_iteration):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)

    rc = RunConfig(dataset_name, run_name)
    dc = DatasetConfig(dataset_name)
    if rc.exists and dc.exists:
        this_run_path = runs_path / "{dn}_{rn}".format(dn=dataset_name,
                                                       rn=run_name)
        csv_path = this_run_path / "world_trajectory_gt.csv"

        try:
            gt = csv_ground_truth_file.decode('utf-8')
        except:
            return ("Could not parse .csv file as UTF-8", 400)
        else:
            with csv_path.open('w') as f:
                f.write(gt)

            cmd = [
                python_path, "tracking_world_optimization.py",
                "--dataset={}".format(dataset_name),
                "--run={}".format(run_name), "--date={}".format(date),
                "--gt_csv={}".format(csv_path),
                "--det_id={}".format(detection_id),
                "--gt_class_name_conversion={}".format(class_name_conversion),
                "--visualize={}".format(visualize),
                "--patience={}".format(patience),
                "--per_iteration={}".format(per_iteration)
            ]

            job_id = jm.run(cmd, "optimize_tracking_world_coordinates")
            if job_id:
                return (job_id, 202)
            else:
                return (NoContent, 404)
    else:
        s = dataset_name + '_' + run_name
        return (s, 404)

Example #23

0

Show file

File: tracking_world_optimization.py Project: maxencecarrel/strudl

def main(dataset, run, date, gt_csv, det_id, gt_class_name_conversion,
         visualize, patience, per_iteration):

    dc = DatasetConfig(dataset)
    vidres = dc.get('video_resolution')
    width, height, _ = vidres
    if width > height:
        stack_axis = 'v'
    else:
        stack_axis = 'h'

    if gt_class_name_conversion is None:
        print_flush("Not converting class names")
    else:
        print_flush("Using class conversion:")
        print_flush(gt_class_name_conversion)
        gt_class_name_conversion = json.loads(gt_class_name_conversion)
        assert (type(gt_class_name_conversion) == dict)

    print_flush("Interpreting ground truth...")
    gt = interpret_tracks_gt(dataset, date, det_id, gt_csv)

    print_flush("Optimizing...")

    config_min = {
        'time_drop_thresh': 0.1,  # in seconds
        'time_region_check_thresh': 0.1,  # in seconds
        'creation_too_close_thresh': 1,  # in meters
        'is_too_close_thresh': {
            'default': 0.2,
            'bicycle_bicycle': 0.1
        },  # in metres
        'incorrect_class_cost': {
            'default': 100,
            'bicycle_person': 3,
            'person_bicycle': 3
        },  # unitless? Compared with WorldTrack.cost output
        'cost_thresh': {
            'default': 5,
            'bicycle': 5
        },  # unitless? Compared with WorldTrack.cost output
        'mask_margin':
        0,  # in pixels, how close to the borders of the interesting region a track can be
        'cost_dist_weight': 0.5,
        'cost_dir_weight': 0.5,
    }

    config_max = {
        'time_drop_thresh': 7.0,  # in seconds
        'time_region_check_thresh': 2.0,  # in seconds
        'creation_too_close_thresh': 10,  # in meters
        'is_too_close_thresh': {
            'default': 3.0,
            'bicycle_bicycle': 2.0
        },  # in metres
        'incorrect_class_cost': {
            'default': 123456789123456789,
            'bicycle_person': 30,
            'person_bicycle': 30
        },  # unitless? Compared with WorldTrack.cost output
        'cost_thresh': {
            'default': 25,
            'bicycle': 35
        },  # unitless? Compared with WorldTrack.cost output
        'mask_margin':
        15,  # in pixels, how close to the borders of the interesting region a track can be
        'cost_dist_weight': 2.0,
        'cost_dir_weight': 2.0,
    }

    config_min, config_max = map(WorldTrackingConfig, (config_min, config_max))

    base_path = runs_path / "{}_{}".format(dataset, run)
    plot_path = base_path / 'world_tracking_optimization.png'

    config, tracks = optimize_tracking(config_min,
                                       config_max,
                                       dataset,
                                       run,
                                       gt,
                                       gt_class_name_conversion,
                                       plot_path=plot_path,
                                       patience=patience,
                                       n=per_iteration)

    save(config, base_path / 'world_tracking_optimization.pklz')

    if visualize:
        print_flush("Visualizing...")
        visualize_tracks(base_path / 'world_tracking_optimization.mp4',
                         dataset,
                         gt,
                         tracks,
                         stack_axis=stack_axis)

    print_flush("Done!")

Example #24

0

Show file

File: tracking_world_optimization.py Project: maxencecarrel/strudl

def visualize_tracks(outvidpath, dataset, gts, tracks=None, stack_axis='v'):
    import imageio as iio
    from visualize_tracking import _draw_world, draw_world
    from visualize import class_colors
    from apply_mask import Masker
    from config import DatasetConfig

    if not (tracks is None):
        calib = Calibration(dataset)

        # Reset IDs
        tracks = sorted(tracks, key=lambda x: x.history[0][0])
        for track in tracks:
            track.id = i
            i += 1

    dc = DatasetConfig(dataset)

    gts_by_vid = split_lambda(gts, lambda x: x[0])
    assert (len(gts_by_vid) == 1)
    vid = list(gts_by_vid.keys())[0]

    n_colors = 50
    colors = class_colors(n_colors)

    mask = Masker(dataset)

    with iio.get_writer(outvidpath, fps=dc.get('video_fps')) as outvid:
        with iio.get_reader(datasets_path / dataset / "videos" /
                            (vid + '.mkv')) as invid:

            gt_by_frame = split_lambda(gts, lambda x: x[1])
            fns = list(gt_by_frame.keys())
            fns.sort()

            for fn in fns:
                gts_frame = gt_by_frame[fn]

                frame = invid.get_data(fn)
                frame = mask.mask(frame, alpha=0.5)

                if not (tracks is None):
                    tracks_frame = frame.copy()

                for gt in gts_frame:
                    vid, fn, t, x, y, i, c, px, py = gt

                    text = "{} {}".format(c, i)
                    col = colors[i % n_colors]

                    frame = _draw_world(frame, text, px, py, col)

                if not (tracks is None):
                    for track in tracks:
                        draw_world(tracks_frame, track, fn,
                                   colors[track.id % n_colors], calib)

                    if stack_axis == 'h':
                        frame = np.hstack((frame, tracks_frame))
                    elif stack_axis == 'v':
                        frame = np.vstack((frame, tracks_frame))
                    else:
                        raise (ValueError(
                            "Incorrect stack axis {}, try 'h' or 'v'".format(
                                stack_axis)))

                outvid.append_data(frame)

Example #25

0

Show file

def main(dataset, run, videos):
    # Note: This main function only works for world coordinate tracks!

    calib = Calibration(dataset)
    dc = DatasetConfig(dataset)
    masker = Masker(dataset)

    if videos == 'all':
        from glob import glob
        files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run))
        video_names = [
            right_remove(x.split('/')[-1], '_tracks.pklz') for x in files
        ]
    elif videos.startswith('random:'):
        num = int(left_remove(videos, 'random:'))

        from glob import glob
        files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run))
        all_video_names = [
            right_remove(x.split('/')[-1], '_tracks.pklz') for x in files
        ]

        video_names = []
        while len(video_names) < num:
            video_name = choice(all_video_names)
            if not video_name in video_names:
                video_names.append(video_name)

            # Just in case user wants more videos than there are
            if len(video_names) == len(all_video_names):
                break

    else:
        # Assumes the user types one or more videos, separated by commas with no spaces
        video_names = videos.split(',')

        # In case user includes endings
        video_names = [right_remove(x.rstrip, '.mkv') for x in video_names]

        # In case user includes spaces
        video_names = [x.strip(' ') for x in video_names]

    print_flush("Chosen videos: ")
    print_flush(str(video_names))
    for video_name in video_names:
        print_flush(video_name)
        print_flush("Loading...")
        tracks = load('{rp}{ds}_{r}/tracks_world/{v}_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run, v=video_name))
        vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path,
                                                    ds=dataset,
                                                    v=video_name)

        if not isfile(vidpath):
            raise (ValueError("Incorrect input {}".format(videos)))

        outvidpath = '{rp}{ds}_{r}/tracks_world/{v}_tracks.mp4'.format(
            rp=runs_path, ds=dataset, r=run, v=video_name)

        print_flush("Rendering...")
        render_video(tracks,
                     vidpath,
                     outvidpath,
                     mask=masker,
                     id_mode="global",
                     calib=calib,
                     fps=dc.get('video_fps'))

    print_flush("Done!")

Example #26

0

Show file

import pandas as pd
import numpy as np

from config import DatasetConfig
from data import load_openml
from database import Database
from metafeatures import MetaFeatures
from utilities import logloss, multiclass_roc_auc_score

logging.basicConfig(level=logging.DEBUG)
"""Database Credentials"""
# db = Database('sqlite', 'ml-base.db')
db = Database('postgres', 'april_dump', 'postgres', 'postgres', '192.168.50.4',
              5432)
"""Load new dataset from OpenML, CSV or ARFF"""
config = DatasetConfig({'openml': 1510, 'train_path': None})
df = load_openml(config)
# df = pd.read_csv('')
# with open('amldata.arff') as f:
#     df = a2p.load(f)
"""Set class column to be predicted"""
class_column = 'Class'
"""Calculate Meta-Features for new Dataset"""
mf, success = MetaFeatures().calculate(df=df, class_column=class_column)
"""Transform Meta-Features into a Vector"""
new_mf_vec = pd.DataFrame(mf, index=['i']).to_numpy()
"""Save all Datasets from DB in a DataFrame"""
engine = db.engine
with engine.connect() as conn:
    datasets = pd.read_sql_query('''SELECT 
        id, nr_inst, nr_attr, nr_class, nr_missing_values, pct_missing_values, nr_inst_mv, pct_inst_mv, nr_attr_mv,

Example #27

0

Show file

File: server.py Project: maxencecarrel/strudl

def get_progress(dataset_name, run_name):
    dataset_name = quote(dataset_name)
    run_name = quote(run_name)

    ds_path = datasets_path / dataset_name

    if ds_path.is_dir():
        progress = dict()

        progress['has_config'] = (ds_path / 'config.txt').is_file()
        if progress['has_config']:
            dc = DatasetConfig(dataset_name)

        progress['has_mask'] = (ds_path / 'mask.png').is_file()
        progress['has_classnames'] = (ds_path / 'classes.txt').is_file()
        progress['has_calibration'] = (ds_path / 'calib.tacal').is_file()

        progress['number_of_timestamp_logs'] = len(
            list((ds_path / "logs").glob('*.log')))
        progress['number_of_videos'] = len(
            list((ds_path / 'videos').glob('*.mkv')))
        progress['training_frames_to_annotate'] = len(
            list((ds_path / "objects" / "train").glob('*/*.jpg')))
        progress['training_frames_annotated'] = len(
            list((ds_path / "objects" / "train").glob('*/*.txt')))

        progress['videos_with_point_tracks_computed'] = len(
            list((ds_path / "klt").glob('*.pklz')))
        progress['videos_with_point_tracks_visualized'] = len(
            list((ds_path / "klt").glob('*.mp4')))

        progress['all_runs'] = [
            x.stem.split('_')[-1] for x in runs_path.glob(dataset_name + '_*')
        ]

        run_path = runs_path / (dataset_name + '_' + run_name)
        if run_path.is_dir():
            progress['has_this_run'] = True

            rprogress = dict()
            rprogress['has_pretrained_weights'] = (
                ssd_path / 'weights_SSD300.hdf5').is_file()
            rprogress['videos_with_detected_objects'] = len(
                list(run_path.glob('csv/*.csv')))
            rprogress['videos_with_detected_objects_visualized'] = len(
                list(run_path.glob('detections/*.mp4')))
            rprogress[
                'videos_with_detected_objects_in_world_coordinates'] = len(
                    list(run_path.glob('detections_world/*.csv')))
            rprogress[
                'videos_with_detected_objects_in_world_coordinates_visualized'] = len(
                    list(run_path.glob('detections_world/*.mp4')))
            rprogress['stored_weight_files'] = len(
                list(run_path.glob('checkpoints/*.hdf5')))
            rprogress['videos_with_pixel_coordinate_tracks'] = len(
                list(run_path.glob('tracks/*.pklz')))
            rprogress['videos_with_pixel_coordinate_tracks_visualized'] = len(
                list(run_path.glob('tracks/*.mp4')))
            rprogress['videos_with_world_coordinate_tracks'] = len(
                list(run_path.glob('tracks_world/*.pklz')))
            rprogress['videos_with_world_coordinate_tracks_visualized'] = len(
                list(run_path.glob('tracks_world/*.mp4')))
            rprogress['has_optimized_world_tracking'] = (
                run_path / 'world_tracking_optimization.pklz').is_file()
            rprogress['has_visualized_optimized_world_tracking'] = (
                run_path / 'world_tracking_optimization.mp4').is_file()
            rprogress['has_world_tracking_ground_truth'] = (
                run_path / 'world_trajectory_gt.csv').is_file()
            rprogress['track_zips'] = [
                x.name for x in run_path.glob('track_zips/*.zip')
            ]

            all_progress = {'dataset': progress, 'run': rprogress}
        else:
            progress['has_this_run'] = False
            all_progress = {'dataset': progress}

        return (all_progress, 200)
    else:
        return ("Dataset does not exist", 404)

Example #28

0

Show file

def main(dataset, times, images_per_time, interval):
    times = times.strip(
        "'"
    )  # These are added around it by the quote function in server.py, to make sure it is a single argument instead of being split by the spaces

    ts = Timestamps(dataset)

    dc = DatasetConfig(dataset)
    fps = dc.get('video_fps')
    half_interval = int((fps * interval) / 2)  # in frames

    timestrings = times.split(',')
    for timestring in timestrings:
        print_flush(timestring)

        # Intepret the requested times, can look like '2017-05-16 00:49:04.954000'
        splot = timestring.split(' ')
        date = splot[0].split('-')
        time = splot[1].replace('.', ':').split(':')

        year, month, day = map(int, date)
        hour, minute, second, microsecond = map(int, time)

        timestamp = datetime(year, month, day, hour, minute, second,
                             microsecond)
        vid_name, frame_num = ts.get_frame_number(timestamp)

        print_flush("Time found to be {}, frame {}".format(
            vid_name, frame_num))

        if vid_name is None:
            raise (ValueError(
                "This timestamp was incorrect: {} Could it be before the first video?"
                .format(timestring)))

        video_path = datasets_path / dataset / "videos" / (vid_name + '.mkv')

        annot_folder = datasets_path / dataset / "objects" / "train" / vid_name
        log_path = annot_folder / 'frames.log'
        if not log_path.is_file():
            with log_path.open('w') as f:
                f.write("{}.mkv\n".format(vid_name))

        # See which frames were already annotated, to start at the right index
        already_ims = list(annot_folder.glob('*.jpg'))
        if already_ims:
            already_nums = [int(x.stem) for x in already_ims]
            i = max(already_nums) + 1
        else:
            i = 1

        with iio.get_reader(video_path) as vid:
            # Find start and end time, in frames
            start = frame_num - half_interval
            if start < 0:
                start = 0

            stop = frame_num + half_interval
            if stop >= len(vid):
                stop = len(vid) - 1

            with open(log_path, 'a') as log:

                # Choose frames to extract
                frame_nums = np.linspace(start, stop,
                                         images_per_time).astype(int).tolist()
                frame_nums = sorted(list(set(frame_nums)))  # Remove duplicates

                for frame_num in frame_nums:
                    frame = vid.get_data(frame_num)

                    log.write("{} ".format(frame_num))

                    impath = annot_folder / "{}.jpg".format(i)
                    imsave(impath, frame)

                    i += 1

                    print_flush("> Written {}".format(impath))

    print_flush("Done!")

Example #29

0

Show file

File: visualize_summary.py Project: CasperMariager/strudl

def main(dataset, run, n_clips, clip_length):
    dc = DatasetConfig(dataset)
    rc = RunConfig(dataset, run)
    mask = Masker(dataset)
    classes = get_classnames(dataset)
    num_classes = len(classes) + 1
    calib = Calibration(dataset)

    dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset)
    run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run)

    # Grab a bunch of videos
    vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path)
    all_vids = glob(vids_query)
    all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids]

    all_vids.sort()

    vids = []

    if n_clips > len(all_vids):
        n_clips = len(all_vids)

    if n_clips == len(all_vids):
        vids = all_vids
    else:
        while len(vids) < n_clips:
            vid = choice(all_vids)
            if not vid in vids:
                vids.append(vid)

    print_flush(vids)

    # Find out what has been run on all of these videos, what to include
    include_klt = True
    include_pixeldets = True
    include_worlddets = True
    include_worldtracks = True

    klts = []
    pixeldets = []
    worlddets = []
    worldtracks = []

    # Point tracks need to be converted for faster access
    vidres = dc.get('video_resolution')
    kltres = dc.get('point_track_resolution')

    class KLTConfig(object):
        klt_x_factor = 0
        klt_y_factor = 0

    klt_config = KLTConfig()
    klt_config.klt_x_factor = vidres[0] / kltres[0]
    klt_config.klt_y_factor = vidres[1] / kltres[1]

    ssdres = rc.get('detector_resolution')
    x_scale = vidres[0] / ssdres[0]
    y_scale = vidres[1] / ssdres[1]

    colors = class_colors(num_classes)

    for vid in vids:
        f = get_klt_path(dataset_path, vid)
        if not isfile(f):
            include_klt = False
        else:
            klt = load(f)
            klt, klt_frames = convert_klt(klt, klt_config)
            pts = (klt, klt_frames, class_colors(n_cols_klts))
            klts.append(pts)

        f = get_pixeldet_path(run_path, vid)
        if not isfile(f):
            include_pixeldets = False
        else:
            dets = pd.read_csv(f)

            pixeldets.append((dets, colors, x_scale, y_scale))

        f = get_worlddet_path(run_path, vid)
        if not isfile(f):
            include_worlddets = False
        else:
            dets = pd.read_csv(f)

            worlddets.append((dets, colors, calib))

        f = get_worldtracks_path(run_path, vid)
        if not isfile(f):
            include_worldtracks = False
        else:
            tracks = load(f)
            worldtracks.append((tracks, class_colors(n_cols_tracks), calib))

    print_flush("Point tracks: {}".format(include_klt))
    print_flush("Pixel coordinate detections: {}".format(include_pixeldets))
    print_flush("World coordinate detections: {}".format(include_worlddets))
    print_flush("World coordinate tracks: {}".format(include_worldtracks))

    # Decide where to start and stop in the videos
    clip_length = clip_length * dc.get(
        'video_fps')  # convert from seconds to frames

    print_flush("Clip length in frames: {}".format(clip_length))

    clips = []
    for vid in vids:
        start, stop = make_clip(vid, clip_length, dataset_path)
        clips.append((start, stop))

    incs = [
        include_klt, include_pixeldets, include_worlddets, include_worldtracks
    ]
    funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame]
    dats = [klts, pixeldets, worlddets, worldtracks]
    nams = [
        "Point tracks", "Detections in pixel coordinates",
        "Detections in world coordinates", "Tracks in world coordinates"
    ]

    print_flush(clips)

    with iio.get_writer("{trp}summary.mp4".format(trp=run_path),
                        fps=dc.get('video_fps')) as outvid:
        for i_vid, vid in enumerate(vids):
            print_flush(vid)
            old_prog = 0

            with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path,
                                                             v=vid)) as invid:
                start, stop = clips[i_vid]
                for i_frame in range(start, stop):
                    frame = invid.get_data(i_frame)

                    pieces = []

                    for inc, fun, dat, nam in zip(incs, funs, dats, nams):
                        if inc:
                            piece = fun(dat[i_vid],
                                        mask.mask(frame.copy(), alpha=0.5),
                                        i_frame)
                            draw_text(piece, vid, i_frame, nam)
                            pieces.append(piece)

                    outvid.append_data(join(pieces))

                    prog = float(i_frame - start) / (stop - start)
                    if prog - old_prog > 0.1:
                        print_flush("{}%".format(round(prog * 100)))
                        old_prog = prog

    print_flush("Done!")

Example #30

0

Show file

File: cli.py Project: Ennosigaeon/meta-learning-base

def _get_parser():
    logging_args = argparse.ArgumentParser(add_help=False)
    logging_args.add_argument('-v', '--verbose', action='count', default=0)
    logging_args.add_argument('-l', '--logfile')

    parser = argparse.ArgumentParser(description='Meta-Learning Base Command Line Interface',
                                     parents=[logging_args])

    subparsers = parser.add_subparsers(title='action', help='Action to perform')
    parser.set_defaults(action=None)

    # Common Arguments
    sql_args = SQLConfig.get_parser()
    s3_args = S3Config.get_parser()
    log_args = LogConfig.get_parser()
    dataset_args = DatasetConfig.get_parser()
    generic_args = GenericConfig.get_parser()

    # Enter Data Parser
    enter_data_parents = [
        logging_args,
        sql_args,
        s3_args,
        dataset_args,
        log_args,
        generic_args
    ]
    enter_data = subparsers.add_parser('enter_data', parents=enter_data_parents,
                                       help='Add a Dataset and trigger a Datarun on it.')
    enter_data.set_defaults(action=_enter_data)

    # Worker Args
    worker_args = argparse.ArgumentParser(add_help=False)
    worker_args.add_argument('--cloud-mode', action='store_true', default=False,
                             help='Whether to run this worker in cloud mode')
    worker_args.add_argument('--no-save', dest='save_files', action='store_false',
                             help="don't save models and metrics at all")
    computation_args = WorkerConfig.get_parser()

    # Worker
    worker_parents = [
        logging_args,
        worker_args,
        sql_args,
        s3_args,
        log_args,
        generic_args,
        computation_args
    ]
    worker = subparsers.add_parser('worker', parents=worker_parents,
                                   help='Start a single worker in foreground.')
    worker.set_defaults(action=_work)
    worker.add_argument('--datasets', help='Only train on datasets with these ids', nargs='+')
    worker.add_argument('--total-time', help='Number of seconds to run worker', type=int)

    # Export Data
    export_data = [
        logging_args,
        log_args,
        sql_args,
        generic_args
    ]
    export_pipelines = subparsers.add_parser('export_pipelines', parents=export_data,
                                             help='Export all data sets and algorithms to a DataFrame')
    export_pipelines.set_defaults(action=_export_pipelines)

    export_datasets = subparsers.add_parser('export_datasets', parents=export_data,
                                            help='Export all data sets and algorithms to a DataFrame')
    export_datasets.set_defaults(action=_export_datasets)

    return parser