Ejemplo n.º 1
0
def parse(basepath, dataset, resolution):
    """ Parses a dataset for data frames CSV files and draws the detections.
        This is used for showing object detector results on the validation set used 
        during training. 
        Arguments:
        basepath -- path to folder with CSV files
        dataset  -- name of the dataset used, used for finding the correct mask
    """
    colors = class_colors()
    masker = Masker(dataset)

    csvpath = basepath + 'detections_0.csv'
    res = pd.read_csv(csvpath)

    outpath = basepath + 'visualize/'
    if not os.path.exists(outpath):
        os.makedirs(outpath)
    else:
        old_files = glob(outpath + '*')
        for old in old_files:
            os.remove(old)

    files = res['filename'].unique()
    for i, filename in enumerate(files):
        df = res.loc[res['filename'] == filename]
        impath = df['filename'].iloc[0]
        im = cv2.imread(impath)
        im = cv2.resize(im, (resolution[0], resolution[1]))
        im = masker.mask(im)
        im = draw(im, df, colors)
        outfilepath = "{}{}".format(
            outpath, '{}_{}'.format(1 + i,
                                    filename.split('/')[-1]))
        cv2.imwrite(outfilepath, im)
        print(outfilepath)
Ejemplo n.º 2
0
def detections_video(detections, videopath, outvideopath, classnames, dataset, res, fps=15, conf_thresh=0.75, show_frame_number=True, coords='pixels'):
    """ Renders a video with the detections drawn on top
    
    Arguments:
    detections        -- the detections as a pandas table
    videopath         -- path to input video
    outvideopath      -- path to output video showing the detections
    classnames        -- list of all the classes
    dataset           -- name of the dataset
    res               -- resolution of output video and coordinates in csv file (assumed to be the same). Probably SSD resolution if performed on direct csv files, and probably the video resolution if performed on csv files with world coordinates
    fps               -- frames-per-second of output video
    conf_thresh       -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences, or set to 0.0 to show all of them.   
    show_frame_number -- writes the frame number in the top left corner of the video
    coords            -- coordinate system of detections
    """
    
    masker = Masker(dataset)
    
    calib = None
    if coords == 'world':
        calib = Calibration(dataset)

    num_classes = len(classnames)+1
    colors = class_colors(num_classes)

    outwidth = make_divisible(res[0], 16)
    outheight = make_divisible(res[1], 16)
    pad_vid = True
    if (outwidth == res[0]) and (outheight == res[1]):
        pad_vid = False
    
    with io.get_reader(videopath) as vid:
        with io.get_writer(outvideopath, fps=fps) as outvid:
            for i,frame in enumerate(vid):
                frame = masker.mask(frame, alpha=0.5)
                frame = cv2.resize(frame, (res[0], res[1]))
                
                dets = detections[detections['frame_number']==i]
                if len(dets) > 0:
                    frame = draw(frame, dets, colors, conf_thresh=conf_thresh, coords=coords, calib=calib)
                
                if pad_vid:
                    padded = 255*np.ones((outheight, outwidth, 3), dtype=np.uint8)
                    padded[0:res[1], 0:res[0], :] = frame
                    frame = padded    
                
                if show_frame_number:
                    cv2.putText(frame, 'Frame {}'.format(i), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
                
                outvid.append_data(frame)
                
                if i%500 == 0:
                    print_flush("Frame {}".format(i))
Ejemplo n.º 3
0
    def generate(self, train=True, do_shuffle=True):
        inputs = []
        targets = []
        while True:
            if train:
                if do_shuffle:
                    shuffle(self.train_keys)
                keys = self.train_keys
            else:
                if do_shuffle:
                    shuffle(self.val_keys)
                keys = self.val_keys
            for key in keys:
                img_path = self.path_prefix + key

                # To know the correct mask, we need the dataset name
                dataset = img_path.split('/')[3]
                if dataset in self.maskers:
                    masker = self.maskers[dataset]
                else:
                    masker = Masker(dataset)
                    self.maskers[dataset] = masker

                img = masker.mask(imread(img_path)).astype('float32')

                y = np.vstack(self.gt.loc[key].y_true)
                if train and self.do_crop:
                    img, y = self.random_sized_crop(img, y)
                img = imresize(img, self.image_size).astype('float32')
                if train:
                    shuffle(self.color_jitter)
                    for jitter in self.color_jitter:
                        img = jitter(img)
                    if self.lighting_std:
                        img = self.lighting(img)
                    if self.hflip_prob > 0:
                        img, y = self.horizontal_flip(img, y)
                    if self.vflip_prob > 0:
                        img, y = self.vertical_flip(img, y)
                y = self.bbox_util.assign_boxes(y)
                inputs.append(img)
                targets.append(y)
                if len(targets) == self.batch_size:
                    tmp_inp = np.array(inputs)
                    tmp_targets = np.array(targets)
                    inputs = []
                    targets = []
                    yield preprocess_input(tmp_inp), tmp_targets
Ejemplo n.º 4
0
def main(cmd, dataset, imsize, visualize):
    imsize = parse_resolution(imsize)

    mask = Masker(dataset)

    if cmd == "findvids" or cmd == "continue":
        vidfolder = datasets_path / dataset / "videos"
        kltfolder = datasets_path / dataset / "klt"
        mkdir(kltfolder)

        allvids = list(vidfolder.glob('*.mkv'))
        allvids.sort()

        if cmd == "continue":
            existing = list(kltfolder.glob('*.pklz'))
            existing.sort()
            existing = [x.stem for x in existing]
            allvids = [x for x in allvids if not x.stem in existing]

        for vidpath in allvids:
            datpath = kltfolder / (vidpath.stem + '.pklz')
            if visualize:
                outvidpath = datpath.with_name(datpath.stem + '_klt.mp4')
                print_flush("{}   ->   {} & {}".format(vidpath, datpath,
                                                       outvidpath))
            else:
                outvidpath = None
                print_flush("{}   ->   {}".format(vidpath, datpath))

            klt_save(vidpath, datpath, imsize, mask, outvidpath)

        print_flush("Done!")
    else:
        raise (ValueError())
Ejemplo n.º 5
0
def main(cmd, dataset, run, conf, make_videos):   
    if make_videos:
        from visualize_tracking import render_video
        from config import DatasetConfig
        from apply_mask import Masker
        
        mask = Masker(dataset)
        dc = DatasetConfig(dataset)
        
    config_path = "{rp}{ds}_{rn}/world_tracking_optimization.pklz".format(rp=runs_path, ds=dataset, rn=run)
    if isfile(config_path):
        config = load(config_path)
    else:
        #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path)))
        config = WorldTrackingConfig(default_config)
    
    calib = Calibration(dataset)    
    munkres = Munkres()
    ts = Timestamps(dataset)
    
    start_stop = None
    
    if cmd == "findvids":
        from glob import glob
        vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset))
        vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames]
        vidnames.sort()
        
        outfolder = '{}{}_{}/tracks_world/'.format(runs_path, dataset, run)
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = './'
        start_stop = (0,500)
            
    for v in vidnames:
        print_flush(v)    
        out_path = "{of}{v}_tracks.pklz".format(of=outfolder, v=v)
        
        print_flush("Loading data...")
        det_path = "{rp}{ds}_{rn}/detections_world/{v}_world.csv".format(rp=runs_path, ds=dataset, rn=run, v=v)
        detections3D = pd.read_csv(det_path)
        
        klt_path = det_path.replace('.csv', '_klt.pklz')
        klts = load(klt_path)
        
        print_flush("Tracking...")
        tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop)
        
        print_flush("Saving tracks...")
        save(tracks, out_path)
        
        if make_videos:

            vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v)
            print_flush("Rendering video...")
            render_video(tracks, vidpath, out_path.replace('.pklz','.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps'))

    print_flush("Done!")
Ejemplo n.º 6
0
def main(cmd, dataset, run, conf, make_videos):   
    from pathlib import Path
    
    if make_videos:
        from visualize_tracking import render_video
        from config import DatasetConfig
        from apply_mask import Masker
        
        mask = Masker(dataset)
        dc = DatasetConfig(dataset)
        
    config_path = runs_path / "{}_{}".format(dataset,run) / "world_tracking_optimization.pklz"
    if config_path.is_file():
        config = load(config_path)
    else:
        #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path)))
        config = WorldTrackingConfig(default_config)
    
    calib = Calibration(dataset)    
    munkres = Munkres()
    ts = Timestamps(dataset)
    
    start_stop = None
    
    if cmd == "findvids":
        vidnames = (datasets_path / dataset / "videos").glob('*.mkv')
        vidnames = [x.stem for x in vidnames]
        vidnames.sort()
        
        outfolder = runs_path / "{}_{}".format(dataset,run) / "tracks_world"
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = Path('./')
        start_stop = (0,500)
            
    for v in vidnames:
        print_flush(v) 
        out_path = outfolder / (v+'_tracks.pklz')   
        
        print_flush("Loading data...")
        det_path = runs_path / "{}_{}".format(dataset,run) / "detections_world" / (v+'_world.csv')
        detections3D = pd.read_csv(det_path)
        
        klt_path = det_path.with_name(det_path.stem + '_klt.pklz')
        klts = load(klt_path)
        
        print_flush("Tracking...")
        tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop)
        
        print_flush("Saving tracks...")
        save(tracks, out_path)
        
        if make_videos:            
            vidpath = datasets_path / dataset / "videos" / (v+'.mkv')
            print_flush("Rendering video...")
            render_video(tracks, vidpath, out_path.with_suffix('.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps'))

    print_flush("Done!")
Ejemplo n.º 7
0
def slideshow(dataset, outpath, fps=10, repeat=20):

    ld = LoadDetections()
    dets = ld.custom(dataset)

    imfiles = list(set(dets.image_file))
    if not imfiles:
        return False

    cc = class_colors()

    mask = Masker(dataset)

    classnames = get_classnames(dataset)

    with io.get_writer(outpath, fps=fps) as vid:
        for imfile in imfiles:
            d = dets[dets.image_file == imfile]

            # Add "class_name" and "class_index" columns which are missing
            d = d.rename(index=str, columns={"type": "class_name"})
            indices = [1 + classnames.index(x) for x in d['class_name']]
            d['class_index'] = indices

            im = io.imread(imfile)
            im = mask.mask(im, alpha=0.5)

            width = float(im.shape[1])
            height = float(im.shape[0])
            frame = draw(im,
                         d,
                         cc,
                         conf_thresh=-1.0,
                         x_scale=width,
                         y_scale=height)

            for i in range(repeat):
                vid.append_data(frame)

    return True
Ejemplo n.º 8
0
def main(cmd, dataset, imsize, visualize):
    imsize = parse_resolution(imsize)

    mask = Masker(dataset)

    if cmd == "findvids" or cmd == "continue":
        vidfolder = "{}{}/videos/".format(datasets_path, dataset)
        kltfolder = "{}{}/klt/".format(datasets_path, dataset)
        mkdir(kltfolder)

        allvids = sorted(glob(vidfolder + "*.mkv"))

        if cmd == "continue":
            existing = sorted(glob(kltfolder + "*.pklz"))
            existing = [
                right_remove(x.split('/')[-1], '.pklz') for x in existing
            ]
            allvids = [
                x for x in allvids
                if not right_remove(x.split('/')[-1], '.mkv') in existing
            ]

        for vidpath in allvids:
            datpath = kltfolder + vidpath.split('/')[-1].replace(
                '.mkv', '.pklz')
            if visualize:
                outvidpath = datpath.replace('.pklz', '_klt.mp4')
                print_flush("{}   ->   {} & {}".format(vidpath, datpath,
                                                       outvidpath))
            else:
                outvidpath = None
                print_flush("{}   ->   {}".format(vidpath, datpath))

            klt_save(vidpath, datpath, imsize, mask, outvidpath)

        print_flush("Done!")
    else:
        raise (ValueError())
Ejemplo n.º 9
0
def test_on_video(model,
                  name,
                  experiment,
                  videopath,
                  outvideopath,
                  classnames,
                  batch_size=32,
                  input_shape=(480, 640, 3),
                  soft=False,
                  width=480,
                  height=640,
                  conf_thresh=0.75,
                  csv_conf_thresh=0.75):
    """ Applies a trained SSD model to a video
    
    Arguments:
    model           -- the SSD model, e.g. from get_model
    name            -- name of dataset
    experiment      -- name of training run
    videopath       -- path to input video
    outvideopath    -- path to output video showing the detections
    classnames      -- list of all the classes
    batch_size      -- number of images processed in parallell, lower this if you get out-of-memory errors
    input_shape     -- size of images fed to SSD
    soft            -- Whether to do soft NMS or normal NMS
    width           -- Width to scale detections with (can be set to 1 if detections are already on right scale)
    height          -- Height to scale detections with (can be set to 1 if detections are already on right scale)
    conf_thresh     -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences.
    csv_conf_thresh -- Detections with confidences below this are ignored. This should be same as conf_thresh unless conf_thresh is negative.
    
    """
    masker = Masker(name)

    num_classes = len(classnames) + 1
    colors = class_colors(num_classes)

    make_vid = True
    suffix = outvideopath.split('.')[-1]
    if suffix == 'csv':
        make_vid = False
        csvpath = outvideopath
    else:
        csvpath = outvideopath.replace('.{}'.format(suffix), '.csv')

    print_flush('Generating priors')
    im_in = np.random.random(
        (1, input_shape[1], input_shape[0], input_shape[2]))
    priors = model.predict(im_in, batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)

    vid = io.get_reader(videopath)
    if make_vid:
        outvid = io.get_writer(outvideopath, fps=30)

    inputs = []
    frames = []

    all_detections = []
    for i, frame in enumerate(vid):
        frame = masker.mask(frame)
        resized = cv2.resize(frame, (input_shape[0], input_shape[1]))

        frames.append(frame.copy())
        inputs.append(resized)

        if len(inputs) == batch_size:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)

            preds = model.predict(inputs, batch_size=batch_size, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)

            for result, frame, frame_number in zip(results, frames,
                                                   range(i - batch_size, i)):
                result = [
                    r if len(r) > 0 else np.zeros((1, 6)) for r in result
                ]
                raw_detections = pd.DataFrame(np.vstack(result),
                                              columns=[
                                                  'class_index', 'confidence',
                                                  'xmin', 'ymin', 'xmax',
                                                  'ymax'
                                              ])

                rescale(raw_detections, 'xmin', width)
                rescale(raw_detections, 'xmax', width)
                rescale(raw_detections, 'ymin', height)
                rescale(raw_detections, 'ymax', height)
                rescale(raw_detections, 'class_index', 1)

                ci = raw_detections['class_index']
                cn = [classnames[int(x) - 1] for x in ci]
                raw_detections['class_name'] = cn

                raw_detections['frame_number'] = (frame_number + 2)
                all_detections.append(raw_detections[
                    raw_detections.confidence > csv_conf_thresh])

                if make_vid:
                    frame = draw(frame,
                                 raw_detections,
                                 colors,
                                 conf_thresh=conf_thresh)
                    outvid.append_data(frame)

            frames = []
            inputs = []

        if i % (10 * batch_size) == 0:
            print_flush(i)

    detections = pd.concat(all_detections)

    detections.to_csv(csvpath)
Ejemplo n.º 10
0
def main(dataset, run, n_clips, clip_length):
    dc = DatasetConfig(dataset)
    rc = RunConfig(dataset, run)
    mask = Masker(dataset)
    classes = get_classnames(dataset)
    num_classes = len(classes) + 1
    calib = Calibration(dataset)

    dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset)
    run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run)

    # Grab a bunch of videos
    vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path)
    all_vids = glob(vids_query)
    all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids]

    all_vids.sort()

    vids = []

    if n_clips > len(all_vids):
        n_clips = len(all_vids)

    if n_clips == len(all_vids):
        vids = all_vids
    else:
        while len(vids) < n_clips:
            vid = choice(all_vids)
            if not vid in vids:
                vids.append(vid)

    print_flush(vids)

    # Find out what has been run on all of these videos, what to include
    include_klt = True
    include_pixeldets = True
    include_worlddets = True
    include_worldtracks = True

    klts = []
    pixeldets = []
    worlddets = []
    worldtracks = []

    # Point tracks need to be converted for faster access
    vidres = dc.get('video_resolution')
    kltres = dc.get('point_track_resolution')

    class KLTConfig(object):
        klt_x_factor = 0
        klt_y_factor = 0

    klt_config = KLTConfig()
    klt_config.klt_x_factor = vidres[0] / kltres[0]
    klt_config.klt_y_factor = vidres[1] / kltres[1]

    ssdres = rc.get('detector_resolution')
    x_scale = vidres[0] / ssdres[0]
    y_scale = vidres[1] / ssdres[1]

    colors = class_colors(num_classes)

    for vid in vids:
        f = get_klt_path(dataset_path, vid)
        if not isfile(f):
            include_klt = False
        else:
            klt = load(f)
            klt, klt_frames = convert_klt(klt, klt_config)
            pts = (klt, klt_frames, class_colors(n_cols_klts))
            klts.append(pts)

        f = get_pixeldet_path(run_path, vid)
        if not isfile(f):
            include_pixeldets = False
        else:
            dets = pd.read_csv(f)

            pixeldets.append((dets, colors, x_scale, y_scale))

        f = get_worlddet_path(run_path, vid)
        if not isfile(f):
            include_worlddets = False
        else:
            dets = pd.read_csv(f)

            worlddets.append((dets, colors, calib))

        f = get_worldtracks_path(run_path, vid)
        if not isfile(f):
            include_worldtracks = False
        else:
            tracks = load(f)
            worldtracks.append((tracks, class_colors(n_cols_tracks), calib))

    print_flush("Point tracks: {}".format(include_klt))
    print_flush("Pixel coordinate detections: {}".format(include_pixeldets))
    print_flush("World coordinate detections: {}".format(include_worlddets))
    print_flush("World coordinate tracks: {}".format(include_worldtracks))

    # Decide where to start and stop in the videos
    clip_length = clip_length * dc.get(
        'video_fps')  # convert from seconds to frames

    print_flush("Clip length in frames: {}".format(clip_length))

    clips = []
    for vid in vids:
        start, stop = make_clip(vid, clip_length, dataset_path)
        clips.append((start, stop))

    incs = [
        include_klt, include_pixeldets, include_worlddets, include_worldtracks
    ]
    funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame]
    dats = [klts, pixeldets, worlddets, worldtracks]
    nams = [
        "Point tracks", "Detections in pixel coordinates",
        "Detections in world coordinates", "Tracks in world coordinates"
    ]

    print_flush(clips)

    with iio.get_writer("{trp}summary.mp4".format(trp=run_path),
                        fps=dc.get('video_fps')) as outvid:
        for i_vid, vid in enumerate(vids):
            print_flush(vid)
            old_prog = 0

            with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path,
                                                             v=vid)) as invid:
                start, stop = clips[i_vid]
                for i_frame in range(start, stop):
                    frame = invid.get_data(i_frame)

                    pieces = []

                    for inc, fun, dat, nam in zip(incs, funs, dats, nams):
                        if inc:
                            piece = fun(dat[i_vid],
                                        mask.mask(frame.copy(), alpha=0.5),
                                        i_frame)
                            draw_text(piece, vid, i_frame, nam)
                            pieces.append(piece)

                    outvid.append_data(join(pieces))

                    prog = float(i_frame - start) / (stop - start)
                    if prog - old_prog > 0.1:
                        print_flush("{}%".format(round(prog * 100)))
                        old_prog = prog

    print_flush("Done!")
Ejemplo n.º 11
0
def autoannotate(dataset, import_datasets, input_shape, image_shape,
                 batch_size, batch_size2, epochs, frozen_layers):

    soft = False

    classes = get_classnames(dataset)

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)

    model, bbox_util = train(dataset,
                             import_datasets,
                             input_shape,
                             batch_size,
                             epochs,
                             frozen_layers,
                             train_amount=1.0)

    print_flush("Auto-annotating...")
    masker = Masker(dataset)

    inputs = []
    impaths = []
    to_annotate = get_images_to_autoannotate(dataset)

    # rep_last needed since we use large batches, for speed, to make sure we run on all images
    for impath in rep_last(to_annotate, batch_size2):
        im = iio.imread(impath)
        im = masker.mask(im)
        resized = cv2.resize(im, (input_shape[0], input_shape[1]))
        inputs.append(resized)
        impaths.append(impath)

        if len(inputs) == batch_size2:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)

            preds = model.predict(inputs, batch_size=batch_size2, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)

            for result, res_path in zip(results, impaths):
                result = [
                    r if len(r) > 0 else np.zeros((1, 6)) for r in result
                ]
                raw_detections = pd.DataFrame(np.vstack(result),
                                              columns=[
                                                  'class_index', 'confidence',
                                                  'xmin', 'ymin', 'xmax',
                                                  'ymax'
                                              ])

                auto_path = res_path.with_suffix('.auto')

                # Sort detections by confidence, keeping the top ones
                # This seems to be more robust than a hard-coded confidence threshold
                # Note that a confidence threshold can be chosen in the annotation web UI
                n = 128
                dets = [x for x in pandas_loop(raw_detections)]
                dets.sort(key=lambda x: 1.0 - x['confidence'])
                if len(dets) > n:
                    dets = dets[:n]

                with auto_path.open('w') as f:
                    for det in dets:
                        conf = round(det['confidence'], 4)
                        line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(
                            index=int(det['class_index']),
                            cx=round((det['xmin'] + det['xmax']) / 2, 4),
                            cy=round((det['ymin'] + det['ymax']) / 2, 4),
                            w=round(det['xmax'] - det['xmin'], 4),
                            h=round(det['ymax'] - det['ymin'], 4),
                            conf=conf,
                            cn=classes[int(det['class_index']) - 1])
                        f.write(line)
                print_flush("Wrote {}".format(auto_path))

            inputs = []
            impaths = []

    assert (not inputs)  # If this fails, not all images were processed!
    print_flush("Done!")
Ejemplo n.º 12
0
def main(dataset, run, videos):
    # Note: This main function only works for world coordinate tracks!

    calib = Calibration(dataset)
    dc = DatasetConfig(dataset)
    masker = Masker(dataset)

    if videos == 'all':
        from glob import glob
        files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run))
        video_names = [
            right_remove(x.split('/')[-1], '_tracks.pklz') for x in files
        ]
    elif videos.startswith('random:'):
        num = int(left_remove(videos, 'random:'))

        from glob import glob
        files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run))
        all_video_names = [
            right_remove(x.split('/')[-1], '_tracks.pklz') for x in files
        ]

        video_names = []
        while len(video_names) < num:
            video_name = choice(all_video_names)
            if not video_name in video_names:
                video_names.append(video_name)

            # Just in case user wants more videos than there are
            if len(video_names) == len(all_video_names):
                break

    else:
        # Assumes the user types one or more videos, separated by commas with no spaces
        video_names = videos.split(',')

        # In case user includes endings
        video_names = [right_remove(x.rstrip, '.mkv') for x in video_names]

        # In case user includes spaces
        video_names = [x.strip(' ') for x in video_names]

    print_flush("Chosen videos: ")
    print_flush(str(video_names))
    for video_name in video_names:
        print_flush(video_name)
        print_flush("Loading...")
        tracks = load('{rp}{ds}_{r}/tracks_world/{v}_tracks.pklz'.format(
            rp=runs_path, ds=dataset, r=run, v=video_name))
        vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path,
                                                    ds=dataset,
                                                    v=video_name)

        if not isfile(vidpath):
            raise (ValueError("Incorrect input {}".format(videos)))

        outvidpath = '{rp}{ds}_{r}/tracks_world/{v}_tracks.mp4'.format(
            rp=runs_path, ds=dataset, r=run, v=video_name)

        print_flush("Rendering...")
        render_video(tracks,
                     vidpath,
                     outvidpath,
                     mask=masker,
                     id_mode="global",
                     calib=calib,
                     fps=dc.get('video_fps'))

    print_flush("Done!")
Ejemplo n.º 13
0
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos):
    from storage import load, save
    from folder import datasets_path, runs_path
    from pathlib import Path
    from folder import mkdir

    mask = Masker(dataset)

    if cmd == "findvids":
        vidnames = (datasets_path / dataset / "videos").glob('*.mkv')
        vidnames = [x.stem for x in vidnames]
        vidnames.sort()

        outfolder = runs_path / '{}_{}'.format(dataset, run) / 'tracks'
    else:
        vidnames = [cmd]
        outfolder = Path('./')

    vidres = parse_resolution(vidres)
    ssdres = parse_resolution(ssdres)
    kltres = parse_resolution(kltres)

    x_factor = float(vidres[0]) / ssdres[0]
    y_factor = float(vidres[1]) / ssdres[1]
    det_dims = ('xmin', 'xmax', 'ymin', 'ymax')
    det_factors = (x_factor, x_factor, y_factor, y_factor)

    c = Config(vidres, kltres, conf)

    mkdir(outfolder)

    for v in vidnames:
        det_path = runs_path / "{}_{}".format(dataset,
                                              run) / "csv" / (v + '.csv')
        detections = pd.read_csv(det_path)
        for dim, factor in zip(det_dims, det_factors):
            detections[dim] = round(detections[dim] * factor).astype(int)

        klt = load(datasets_path / dataset / "klt" / (v + '.pklz'))
        klt, klt_frames = convert_klt(klt, c)

        tracks = []
        if len(detections) > 0:
            tracks = build_tracks(detections, klt, klt_frames, c)
            print_flush("{}  tracks done".format(v))
            save(tracks, outfolder / '{}_tracks.pklz'.format(v))
        else:
            print_flush(
                "{}  skipping tracking, because there were no detections".
                format(v))

        if make_videos:
            if tracks:
                from visualize_tracking import render_video
                vidpath = datasets_path / dataset / "videos" / (v + '.mkv')
                render_video(tracks,
                             vidpath,
                             outfolder / (v + "_tracks.mp4"),
                             mask=mask)
                print_flush("{}  video done".format(v))
            else:
                print_flush(
                    "{}  skipping video rendering, because there were no tracks"
                    .format(v))

    print_flush("Done!")
Ejemplo n.º 14
0
def rare_class_mining(dataset, class_name, time_dist, sampling_rate,
                      import_datasets, input_shape, image_shape, batch_size,
                      batch_size2, epochs, frozen_layers, confidence):
    soft = False
    classes = get_classnames(dataset)

    ts = Timestamps(dataset)

    # Find all videos in dataset
    vidnames = list((datasets_path / dataset / "videos").glob('*.mkv'))

    all_found = []

    for v in vidnames:

        # Find video length from log file (computing this from the video file is too slow)
        log_file = (datasets_path / dataset / "logs" /
                    v.with_suffix('.log').name).read_text().split('\n')
        last = -1
        while not log_file[last]:
            last -= 1
        last_line = log_file[last]
        v_len = int(last_line.split(' ')[0])

        print_flush("{} of length {}".format(v, v_len))

        # Find existing annotations
        frames_log = (datasets_path / dataset / "objects" / "train" / v.stem /
                      "frames.log").read_text().split()
        frames_log = [
            x for x in frames_log[1:] if x
        ]  # Remove first line, which is video name, and any empty lines
        annotated = [int(x) for x in frames_log]
        print_flush("Avoiding the following existing frames: ")
        print_flush(str(annotated))

        curr_time = ts.get(v.stem, 0)

        annotated_times = [ts.get(v.stem, x) for x in annotated]

        found = []
        found_times = []
        done = False

        while not done:
            # Sample in time
            curr_time += timedelta(seconds=sampling_rate)
            curr_frame = ts.get_frame_number_given_vidname(curr_time, v.stem)

            if curr_frame >= v_len:
                # We have reached the end of the video
                done = True
                continue

            if curr_frame in annotated:
                continue

            # Check if we are too close to existing annotations
            dists = [
                abs((curr_time - x).total_seconds()) for x in annotated_times
            ]
            if any([(x <= time_dist) for x in dists]):
                continue

            # Check if we are too close to any previously chosen interesting frames
            dists = [abs((curr_time - x).total_seconds()) for x in found_times]
            if any([(x <= time_dist) for x in dists]):
                continue

            # This is a frame we could work with
            found.append(curr_frame)
            found_times.append(curr_time)

        all_found.append((v, found))

    print_flush("Candidate frames:")
    found_some = False
    for f in all_found:
        v, l = f
        print("{} : {}".format(v, l))
        if l:
            found_some = True

    if not found_some:
        print_flush("Found no interesting frames. Quitting...")
        import sys
        sys.exit(1)

    print_flush(
        "Starting to train object detector with existing annotations...")

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)

    model, bbox_util = train(dataset,
                             import_datasets,
                             input_shape,
                             batch_size,
                             epochs,
                             frozen_layers,
                             train_amount=1.0)

    print_flush(
        "Applying the model to the images to find objects of type '{}'".format(
            class_name))

    masker = Masker(dataset)
    inputs = []
    frame_nums = []
    im_origs = []
    vids = []

    found_data = []

    for f in all_found:
        v, l = f

        with iio.get_reader(v) as vid:
            for frame_number in l:
                im_orig = vid.get_data(frame_number)
                im = im_orig.copy()
                im = masker.mask(im)

                resized = cv2.resize(im, (input_shape[0], input_shape[1]))
                inputs.append(resized)
                frame_nums.append(frame_number)
                im_origs.append(im_orig)
                vids.append(v)

                if len(inputs) == batch_size2:
                    tmp = process(inputs, frame_nums, im_origs, vids,
                                  confidence, class_name, soft, batch_size2,
                                  model, bbox_util, classes)

                    found_data.extend(tmp)

                    inputs = []
                    frame_nums = []
                    im_origs = []
                    vids = []

    if inputs:
        # There are still some leftovers
        tmp = process(inputs, frame_nums, im_origs, vids, confidence,
                      class_name, soft, len(inputs), model, bbox_util, classes)

        found_data.extend(tmp)

    print_flush("Writing images...")
    for x in found_data:
        v, f, im = x

        im_folder = datasets_path / dataset / "objects" / "train" / v.stem
        im_num = max([int(x.stem) for x in im_folder.glob('*.jpg')]) + 1
        im_path = im_folder / "{}.jpg".format(im_num)

        iio.imwrite(im_path, im)
        print_flush("Written {}".format(im_path))

        # Add the new frame numbers to frames.log for this video
        flog = im_folder / "frames.log"
        with flog.open('a') as log:
            log.write(str(f) + ' ')

    print_flush("Done!")
Ejemplo n.º 15
0
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos):
    from storage import load, save
    from folder import datasets_path, runs_path

    mask = Masker(dataset)
    #v = '20170516_163607_4C86'
    #v = '20170516_121024_A586'

    if cmd == "findvids":
        from glob import glob
        vidnames = glob('{}{}/videos/*.mkv'.format(datasets_path, dataset))
        vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames]
        vidnames.sort()

        outfolder = '{}{}_{}/tracks/'.format(runs_path, dataset, run)
    else:
        vidnames = [cmd]
        outfolder = './'

    vidres = parse_resolution(vidres)
    ssdres = parse_resolution(ssdres)
    kltres = parse_resolution(kltres)

    x_factor = float(vidres[0]) / ssdres[0]
    y_factor = float(vidres[1]) / ssdres[1]
    det_dims = ('xmin', 'xmax', 'ymin', 'ymax')
    det_factors = (x_factor, x_factor, y_factor, y_factor)

    c = Config(vidres, kltres, conf)

    from folder import mkdir
    mkdir(outfolder)

    for v in vidnames:
        detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format(
            runs_path, dataset, run, v))
        for dim, factor in zip(det_dims, det_factors):
            detections[dim] = round(detections[dim] * factor).astype(int)

        klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v))
        klt, klt_frames = convert_klt(klt, c)

        tracks = []
        if len(detections) > 0:
            tracks = build_tracks(detections, klt, klt_frames, c)
            print_flush("{}  tracks done".format(v))
            save(tracks, '{}{}_tracks.pklz'.format(outfolder, v))
        else:
            print_flush(
                "{}  skipping tracking, because there were no detections".
                format(v))

        if make_videos:
            if tracks:
                from visualize_tracking import render_video
                vidpath = "{}{}/videos/{}.mkv".format(datasets_path, dataset,
                                                      v)
                render_video(tracks,
                             vidpath,
                             "{}{}_tracks.mp4".format(outfolder, v),
                             mask=mask)
                print_flush("{}  video done".format(v))
            else:
                print_flush(
                    "{}  skipping video rendering, because there were no tracks"
                    .format(v))

    print_flush("Done!")
Ejemplo n.º 16
0
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size):
    
    print_flush("> Predicting...")
    classes = get_classnames(dataset)
    masker = Masker(dataset)
    
    input_shape = parse_resolution(input_shape)
    
    num_classes = len(classes)+1
    model = get_model(dataset, run, input_shape, num_classes, verbose=False)
    priors = get_priors(model, input_shape)
    bbox_util = BBoxUtility(num_classes, priors)
    
    
    width = input_shape[0]
    height = input_shape[1]
    
    inputs = []
    outputs = []
    old_frame = None
    
    with io.get_reader(videopath) as vid: 
        vlen = len(vid)
        for i_in_seq in range(seq_start, seq_stop):
            if i_in_seq < vlen:
                frame = vid.get_data(i_in_seq)
                frame = masker.mask(frame)
                old_frame = frame
            else:
                frame = old_frame
                
            resized = cv2.resize(frame, (width, height))
            inputs.append(resized)
            
            if len(inputs) == batch_size:
                inputs2 = np.array(inputs)
                inputs2 = inputs2.astype(np.float32)
                inputs2 = preprocess_input(inputs2)
                
                y = model.predict_on_batch(inputs2)
                outputs.append(y)
                
                inputs = []     
        
    preds = np.vstack(outputs)
    
    print_flush("> Processing...")
    all_detections = []   
    seq_len = seq_stop - seq_start
         
    for i in range(seq_len):
        frame_num = i + seq_start
        
        if frame_num < vlen:           
            pred = preds[i, :]
            pred = pred.reshape(1, pred.shape[0], pred.shape[1])
            results = bbox_util.detection_out(pred, soft=False)

            detections = process_results(results, width, height, classes, conf_thresh, frame_num)
            all_detections.append(detections)
    
    dets = pd.concat(all_detections)
    
    # For the first line, we should open in write mode, and then in append mode
    # This way, we still overwrite the files if this script is run multiple times
    open_mode = 'a'
    include_header = False
    if i_seq == 0:
        open_mode = 'w'
        include_header = True

    print_flush("> Writing to {} ...".format(outname))    
    with open(outname, open_mode) as f:
        dets.to_csv(f, header=include_header) 
def visualize_tracks(outvidpath, dataset, gts, tracks=None, stack_axis='v'):
    import imageio as iio
    from visualize_tracking import _draw_world, draw_world
    from visualize import class_colors
    from apply_mask import Masker
    from config import DatasetConfig

    if not (tracks is None):
        calib = Calibration(dataset)

        # Reset IDs
        tracks = sorted(tracks, key=lambda x: x.history[0][0])
        for track in tracks:
            track.id = i
            i += 1

    dc = DatasetConfig(dataset)

    gts_by_vid = split_lambda(gts, lambda x: x[0])
    assert (len(gts_by_vid) == 1)
    vid = list(gts_by_vid.keys())[0]

    n_colors = 50
    colors = class_colors(n_colors)

    mask = Masker(dataset)

    with iio.get_writer(outvidpath, fps=dc.get('video_fps')) as outvid:
        with iio.get_reader(datasets_path / dataset / "videos" /
                            (vid + '.mkv')) as invid:

            gt_by_frame = split_lambda(gts, lambda x: x[1])
            fns = list(gt_by_frame.keys())
            fns.sort()

            for fn in fns:
                gts_frame = gt_by_frame[fn]

                frame = invid.get_data(fn)
                frame = mask.mask(frame, alpha=0.5)

                if not (tracks is None):
                    tracks_frame = frame.copy()

                for gt in gts_frame:
                    vid, fn, t, x, y, i, c, px, py = gt

                    text = "{} {}".format(c, i)
                    col = colors[i % n_colors]

                    frame = _draw_world(frame, text, px, py, col)

                if not (tracks is None):
                    for track in tracks:
                        draw_world(tracks_frame, track, fn,
                                   colors[track.id % n_colors], calib)

                    if stack_axis == 'h':
                        frame = np.hstack((frame, tracks_frame))
                    elif stack_axis == 'v':
                        frame = np.vstack((frame, tracks_frame))
                    else:
                        raise (ValueError(
                            "Incorrect stack axis {}, try 'h' or 'v'".format(
                                stack_axis)))

                outvid.append_data(frame)
Ejemplo n.º 18
0
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers):
    soft = False

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)
    
    print_flush("Loading ground truth...")
    load_detections = LoadDetections()
    datasets = [dataset]
    if import_datasets:
        datasets.extend(import_datasets.split(','))

    detections = load_detections.custom(datasets)
    
    detections = detections.reset_index(drop=True)   
    image_props = get_image_props(detections)
    detections = detections_add_ytrue(detections, image_props, dataset)
    
    detections.index = detections.image_file
    
    print_flush('Ground truth object counts:')
    print_flush(detections.type.value_counts())
    
    classes = get_classnames(dataset)
    num_classes = len(classes) + 1
    
    keys = sorted(detections.image_file.unique())
    shuffle(keys)
    
    num_train = int(round(0.9 * len(keys)))
    train_keys = keys[:num_train]
    val_keys = keys[num_train:]

    print_flush('Loading model...')
    model = SSD300((input_shape[1],input_shape[0],input_shape[2]), num_classes=num_classes)  
    model.load_weights(ssd_path+'weights_SSD300.hdf5', by_name=True)
    
    print_flush("Making priors...")    
    im_in = np.random.random((1,input_shape[1],input_shape[0],input_shape[2]))
    priors = model.predict(im_in,batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)
    
    generator_kwargs = {
        'saturation_var': 0.5,
        'brightness_var': 0.5,
        'contrast_var': 0.5,
        'lighting_std': 0.5,
        'hflip_prob': 0.5,
        'vflip_prob': 0,
        'do_crop': True,
        'crop_area_range': [0.1, 1.0],
        'aspect_ratio_range': [0.5, 2]
        }

    path_prefix = ''
    gen = Generator(detections, bbox_util, batch_size, path_prefix,
                    train_keys, val_keys,
                    (input_shape[1], input_shape[0]), **generator_kwargs)

    # freeze several layers
    freeze = [
              ['input_1', 'conv1_1', 'conv1_2', 'pool1'],
              ['conv2_1', 'conv2_2', 'pool2'],
              ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'],
              ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'],
              ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'],
              ][:min(frozen_layers, 5)]

    for L in model.layers:
        if L.name in freeze:
            L.trainable = False
    
    callbacks = [LearningRateScheduler(schedule)]
    
    optim = keras.optimizers.Adam(lr=BASE_LR / 10)
    model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss)
    
    print_flush("Training...")
    history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches,
                                  epochs=epochs, verbose=2, callbacks=callbacks,
                                  validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1)
  
    print_flush("Auto-annotating...")
    masker = Masker(dataset)
    
    inputs = []
    impaths = []
    to_annotate = get_images_to_autoannotate(dataset)
    
    # rep_last needed since we use large batches, for speed, to make sure we run on all images
    for impath in rep_last(to_annotate, batch_size2):
        im = iio.imread(impath)
        im = masker.mask(im)
        resized = cv2.resize(im, (input_shape[0], input_shape[1]))
        inputs.append(resized)
        impaths.append(impath)
        
        if len(inputs) == batch_size2:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)
            
            preds = model.predict(inputs, batch_size=batch_size, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)
            
            for result, res_path in zip(results, impaths):
                result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result]
                raw_detections = pd.DataFrame(np.vstack(result), columns=['class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'])
                
                auto_path = res_path.replace('.jpg','.auto')
                
                # Sort detections by confidence, keeping the top ones
                # This seems to be more robust than a hard-coded confidence threshold
                # Note that a confidence threshold can be chosen in the annotation web UI
                n = 128
                dets = [x for x in pandas_loop(raw_detections)]
                dets.sort(key=lambda x: 1.0-x['confidence'])
                if len(dets) > n:
                    dets = dets[:n]
                
                with open(auto_path, 'w') as f:
                    for det in dets:
                        conf = round(det['confidence'],4)
                        line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(index=int(det['class_index']),
                                     cx = round((det['xmin']+det['xmax'])/2,4),
                                     cy = round((det['ymin']+det['ymax'])/2,4),
                                     w = round(det['xmax']-det['xmin'],4),
                                     h = round(det['ymax']-det['ymin'],4),
                                     conf=conf,
                                     cn = classes[int(det['class_index'])-1])
                        f.write(line)
                print_flush("Wrote {}".format(auto_path))
                
            inputs = []
            impaths = []
            
    assert(not inputs) # If this fails, not all images were processed!
    print_flush("Done!")