Ejemplo n.º 1
0
def annotation_data(dataset_name):
    """ Gets general annotation data for a dataset. Basically everything that the
        annotation Web UI needs, like classes and their colors, which keys to press
        for each class, and progress.
    """

    try:
        classnames = get_classnames(dataset_name)
    except FileNotFoundError:
        return None
    else:
        # Removing R from this would not be sufficient, and would look like a bug
        all_keys = 'abcdefghijklmnopqrstuvwxyz'

        colors = class_colors(len(classnames))

        out_colors = {}
        for cn, cc in zip(classnames, colors):
            out_colors[cn] = to_hex(cc)

        keys_list = []
        keys = set()
        for cn in classnames:
            cn = cn.lower()
            success = False
            for letter in cn:
                if not (letter in keys):
                    if not letter == 'r':  # 'R' is not okay, since that button is used for cancelling
                        keys_list.append(letter)
                        keys.add(letter)
                        success = True
                        break

            while not success:
                letter = choice(all_keys)
                if not (letter in keys):
                    if not letter == 'r':  # 'R' is not okay, since that button is used for cancelling
                        keys.add(letter)
                        keys_list.append(letter)
                        success = True

        key_codes_list = [ord(x) - 32 for x in keys_list
                          ]  # convert to nonsense Javascript keyCodes
        keys_list = [x.upper() for x in keys_list
                     ]  # are nicer visualized in upper case in the Web UI

        train_stats = get_annotation_stats(dataset_name, 'train')
        test_stats = get_annotation_stats(dataset_name, 'test')

        out = [
            classnames, out_colors, keys_list, key_codes_list, train_stats,
            test_stats
        ]

        return out
Ejemplo n.º 2
0
def main(dataset, run, res, conf, invid, outvid):
    res = parse_resolution(res)
    model = get_model(dataset, run, input_shape=(res[0], res[1], 3))
    classnames = get_classnames(dataset)
    test_on_video(model,
                  dataset,
                  run,
                  invid,
                  outvid,
                  classnames,
                  width=res[0],
                  height=res[1],
                  input_shape=(res[0], res[1], 3),
                  conf_thresh=conf,
                  csv_conf_thresh=conf)
Ejemplo n.º 3
0
def detections_add_ytrue(detections, image_props, dataset):
    types = get_classnames(dataset)  #sorted(detections.type.unique())
    y_true = pd.Series()
    for image_file, det in detections.groupby('image_file'):
        xymin_xymax = np.array([
            det.xmin.values, det.ymin.values, det.xmax.values, det.ymax.values
        ]).T
        classes = np.array([[d.type == t for t in types]
                            for _, d in det.iterrows()],
                           dtype=np.uint8)
        y_true = y_true.append(
            pd.Series(np.concatenate([xymin_xymax, classes], axis=1).tolist(),
                      index=det.index))
    y_true = y_true.apply(lambda x: np.array(x)[np.newaxis, :])
    detections['y_true'] = y_true
    return detections
Ejemplo n.º 4
0
def main(cmd, res, dataset, run, conf, fps, coords):
    res = parse_resolution(res)
    classnames = get_classnames(dataset)
    
    local_output = False
    csvs = []
    if cmd == "findvids":
        if coords == "pixels":
            query = "{rp}{ds}_{r}/csv/*.csv".format(rp=runs_path, ds=dataset, r=run)
        elif coords == "world":
            query = "{rp}{ds}_{r}/detections_world/*.csv".format(rp=runs_path, ds=dataset, r=run)
            
        found = glob(query)
        found.sort()
        csvs.extend(found)
    else:
        csvs.append(cmd)
        local_output = True
    
    if coords == "pixels":
        out_folder = '{rp}{ds}_{r}/detections/'.format(rp=runs_path, ds=dataset, r=run)
    elif coords == "world":
        out_folder = '{rp}{ds}_{r}/detections_world/'.format(rp=runs_path, ds=dataset, r=run)
        
    mkdir(out_folder)
    
    for csv_path in csvs:
        vidname = right_remove(csv_path.split('/')[-1], '.csv')
        if coords == "world":
            vidname = right_remove(vidname, '_world')
            
        vid_path = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=vidname)

        if local_output:
            outvid_path = '{}.mp4'.format(vidname)
        else:
            outvid_path = '{}{}.mp4'.format(out_folder, vidname)        
        
        detections = pd.read_csv(csv_path)
        detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords)
        print_flush(outvid_path)
    
    print_flush("Done!")
Ejemplo n.º 5
0
def main(cmd, res, dataset, run, conf, fps, coords):
    res = parse_resolution(res)
    classnames = get_classnames(dataset)
    
    local_output = False
    csvs = []
    if cmd == "findvids":
        if coords == "pixels":
            found = (runs_path / "{}_{}".format(dataset,run) / "csv").glob('*.csv')
        elif coords == "world":
            found = (runs_path / "{}_{}".format(dataset,run) / "detections_world").glob('*.csv')
            
        found = list(found)
        found.sort()
        csvs.extend(found)
    else:
        csvs.append(cmd)
        local_output = True
    
    if coords == "pixels":
        out_folder = runs_path / "{}_{}".format(dataset,run) / "detections"
    elif coords == "world":
        out_folder = runs_path / "{}_{}".format(dataset,run) / "detections_world"
        
    mkdir(out_folder)
    
    for csv_path in csvs:
        vidname = csv_path.stem
        if coords == "world":
            vidname = right_remove(vidname, '_world')
        
        vid_path = datasets_path / dataset / "videos" / (vidname+'.mkv')    

        if local_output:
            outvid_path = Path('.') / '{}.mp4'.format(vidname)
        else:
            outvid_path = out_folder / '{}.mp4'.format(vidname)        
        
        detections = pd.read_csv(csv_path)
        detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords)
        print_flush(outvid_path)
    
    print_flush("Done!")
Ejemplo n.º 6
0
def slideshow(dataset, outpath, fps=10, repeat=20):

    ld = LoadDetections()
    dets = ld.custom(dataset)

    imfiles = list(set(dets.image_file))
    if not imfiles:
        return False

    cc = class_colors()

    mask = Masker(dataset)

    classnames = get_classnames(dataset)

    with io.get_writer(outpath, fps=fps) as vid:
        for imfile in imfiles:
            d = dets[dets.image_file == imfile]

            # Add "class_name" and "class_index" columns which are missing
            d = d.rename(index=str, columns={"type": "class_name"})
            indices = [1 + classnames.index(x) for x in d['class_name']]
            d['class_index'] = indices

            im = io.imread(imfile)
            im = mask.mask(im, alpha=0.5)

            width = float(im.shape[1])
            height = float(im.shape[0])
            frame = draw(im,
                         d,
                         cc,
                         conf_thresh=-1.0,
                         x_scale=width,
                         y_scale=height)

            for i in range(repeat):
                vid.append_data(frame)

    return True
Ejemplo n.º 7
0
def validate_annotation(text, dataset):
    classnames = get_classnames(dataset)

    try:
        lines = text.split('\n')
        for line in lines:
            if line.isspace() or (not line):
                continue

            splot = line.split(' ')

            int(splot[0])
            float(splot[1])
            float(splot[2])
            float(splot[3])
            float(splot[4])

            assert (splot[5][0:3] == "px:")
            assert (splot[6][0:3] == "py:")

            px = splot[5].strip('px:')
            py = splot[6].strip('py:')

            if not (px == 'auto'):
                px = px.split(',')
                assert (len(px) == 4)
                for x in px:
                    float(x)

            if not (py == 'auto'):
                py = py.split(',')
                assert (len(py) == 4)
                for y in py:
                    float(y)

            assert (splot[7] in classnames)
    except:
        return False

    return True
Ejemplo n.º 8
0
def main(dataset, run, n_clips, clip_length):
    dc = DatasetConfig(dataset)
    rc = RunConfig(dataset, run)
    mask = Masker(dataset)
    classes = get_classnames(dataset)
    num_classes = len(classes) + 1
    calib = Calibration(dataset)

    dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset)
    run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run)

    # Grab a bunch of videos
    vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path)
    all_vids = glob(vids_query)
    all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids]

    all_vids.sort()

    vids = []

    if n_clips > len(all_vids):
        n_clips = len(all_vids)

    if n_clips == len(all_vids):
        vids = all_vids
    else:
        while len(vids) < n_clips:
            vid = choice(all_vids)
            if not vid in vids:
                vids.append(vid)

    print_flush(vids)

    # Find out what has been run on all of these videos, what to include
    include_klt = True
    include_pixeldets = True
    include_worlddets = True
    include_worldtracks = True

    klts = []
    pixeldets = []
    worlddets = []
    worldtracks = []

    # Point tracks need to be converted for faster access
    vidres = dc.get('video_resolution')
    kltres = dc.get('point_track_resolution')

    class KLTConfig(object):
        klt_x_factor = 0
        klt_y_factor = 0

    klt_config = KLTConfig()
    klt_config.klt_x_factor = vidres[0] / kltres[0]
    klt_config.klt_y_factor = vidres[1] / kltres[1]

    ssdres = rc.get('detector_resolution')
    x_scale = vidres[0] / ssdres[0]
    y_scale = vidres[1] / ssdres[1]

    colors = class_colors(num_classes)

    for vid in vids:
        f = get_klt_path(dataset_path, vid)
        if not isfile(f):
            include_klt = False
        else:
            klt = load(f)
            klt, klt_frames = convert_klt(klt, klt_config)
            pts = (klt, klt_frames, class_colors(n_cols_klts))
            klts.append(pts)

        f = get_pixeldet_path(run_path, vid)
        if not isfile(f):
            include_pixeldets = False
        else:
            dets = pd.read_csv(f)

            pixeldets.append((dets, colors, x_scale, y_scale))

        f = get_worlddet_path(run_path, vid)
        if not isfile(f):
            include_worlddets = False
        else:
            dets = pd.read_csv(f)

            worlddets.append((dets, colors, calib))

        f = get_worldtracks_path(run_path, vid)
        if not isfile(f):
            include_worldtracks = False
        else:
            tracks = load(f)
            worldtracks.append((tracks, class_colors(n_cols_tracks), calib))

    print_flush("Point tracks: {}".format(include_klt))
    print_flush("Pixel coordinate detections: {}".format(include_pixeldets))
    print_flush("World coordinate detections: {}".format(include_worlddets))
    print_flush("World coordinate tracks: {}".format(include_worldtracks))

    # Decide where to start and stop in the videos
    clip_length = clip_length * dc.get(
        'video_fps')  # convert from seconds to frames

    print_flush("Clip length in frames: {}".format(clip_length))

    clips = []
    for vid in vids:
        start, stop = make_clip(vid, clip_length, dataset_path)
        clips.append((start, stop))

    incs = [
        include_klt, include_pixeldets, include_worlddets, include_worldtracks
    ]
    funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame]
    dats = [klts, pixeldets, worlddets, worldtracks]
    nams = [
        "Point tracks", "Detections in pixel coordinates",
        "Detections in world coordinates", "Tracks in world coordinates"
    ]

    print_flush(clips)

    with iio.get_writer("{trp}summary.mp4".format(trp=run_path),
                        fps=dc.get('video_fps')) as outvid:
        for i_vid, vid in enumerate(vids):
            print_flush(vid)
            old_prog = 0

            with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path,
                                                             v=vid)) as invid:
                start, stop = clips[i_vid]
                for i_frame in range(start, stop):
                    frame = invid.get_data(i_frame)

                    pieces = []

                    for inc, fun, dat, nam in zip(incs, funs, dats, nams):
                        if inc:
                            piece = fun(dat[i_vid],
                                        mask.mask(frame.copy(), alpha=0.5),
                                        i_frame)
                            draw_text(piece, vid, i_frame, nam)
                            pieces.append(piece)

                    outvid.append_data(join(pieces))

                    prog = float(i_frame - start) / (stop - start)
                    if prog - old_prog > 0.1:
                        print_flush("{}%".format(round(prog * 100)))
                        old_prog = prog

    print_flush("Done!")
Ejemplo n.º 9
0
    def custom(self, datasets, train=True):
        """ Parameters:
                datasets     - list of datasets to get images from. The first 
                               one in the list is the "main" one, whose classes 
                               will be used. It can also be a single string, if 
                               only a single dataset is to be used.
                train        - training or test set
        """

        if type(datasets) == str:
            datasets = [datasets]

        classnames = get_classnames(datasets[0])
        classnames_set = set(classnames)

        imfiles = []
        types = []
        xmins = []
        ymins = []
        xmaxs = []
        ymaxs = []

        for dataset in datasets:
            this_classnames = get_classnames(dataset)

            if train:
                trainval = 'train'
            else:
                trainval = 'val'

            all_gts = glob('{dsp}{ds}/objects/{t}/*/*.txt'.format(
                dsp=datasets_path, ds=dataset, t=trainval))

            if len(all_gts) == 0:
                raise (ValueError(
                    "Dataset '{}' doesn't have any grount truth files. Is it a correct dataset? Is there an extra space or something?"
                    .format(dataset)))

            for gt_txt in all_gts:
                imfile = gt_txt.replace('.txt', '.jpg')

                with open(gt_txt, 'r') as f:
                    lines = [x.strip('\n') for x in f.readlines()]

                for line in lines:
                    splot = line.split(' ')
                    imfiles.append(imfile)

                    # Since we can import from different datasets, the class name needs to be
                    # checked, and marked as 'other' (the last class) if it doesn't exist
                    #types.append(classnames[int(splot[0])-1])
                    classname = this_classnames[int(splot[0]) - 1]
                    if classname in classnames_set:
                        types.append(classname)
                    else:
                        types.append(classnames[-1])

                    xc = float(splot[1])
                    yc = float(splot[2])
                    bw = float(splot[3])
                    bh = float(splot[4])

                    xmins.append((xc - bw / 2))
                    ymins.append((yc - bh / 2))
                    xmaxs.append((xc + bw / 2))
                    ymaxs.append((yc + bh / 2))

        detections = pd.DataFrame()
        detections['image_file'] = imfiles
        detections['type'] = types
        detections['xmin'] = xmins
        detections['ymin'] = ymins
        detections['xmax'] = xmaxs
        detections['ymax'] = ymaxs

        return detections
Ejemplo n.º 10
0
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos):
    vidres = parse_resolution(vidres)
    ssdres = parse_resolution(ssdres)
    kltres = parse_resolution(kltres)

    x_factor = float(vidres[0]) / ssdres[0]
    y_factor = float(vidres[1]) / ssdres[1]
    det_dims = ('xmin', 'xmax', 'ymin', 'ymax')
    det_factors = (x_factor, x_factor, y_factor, y_factor)

    calib = Calibration(dataset)
    ts = Timestamps(dataset)
    class_data = get_class_data(dataset)
    class_heights = {d['name']: d['height'] for d in class_data}

    class KLTConfig(object):
        klt_x_factor = 0
        klt_y_factor = 0

    klt_config = KLTConfig()
    klt_config.klt_x_factor = vidres[0] / kltres[0]
    klt_config.klt_y_factor = vidres[1] / kltres[1]

    if cmd == "findvids":
        from glob import glob
        vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path,
                                                        ds=dataset))
        vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames]
        vidnames.sort()

        outfolder = '{}{}_{}/detections_world/'.format(runs_path, dataset, run)
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = './'

    mkdir(outfolder)

    if make_videos:
        classnames = get_classnames(dataset)
        dc = DatasetConfig(dataset)
        fps = dc.get('video_fps')

    for v in vidnames:
        print_flush(v)
        detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format(
            runs_path, dataset, run, v))

        # Convert pixel coordinate positions from SSD resolution to video resolution
        # because Calibration assumes video resolution coordinates
        for dim, factor in zip(det_dims, det_factors):
            detections[dim] = round(detections[dim] * factor).astype(int)

        print_flush("Converting point tracks...")
        klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v))
        klt, klt_frames = convert_klt(klt, klt_config)
        pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1])

        outpath = '{of}{v}_world.csv'.format(of=outfolder, v=v)

        print_flush("Converting to world coordinates...")
        detections3D = detections_to_3D(detections,
                                        pts,
                                        calib,
                                        ts,
                                        v,
                                        klt_save_path=outpath.replace(
                                            '.csv', '_klt.pklz'),
                                        class_heights=class_heights)

        detections3D.to_csv(outpath, float_format='%.4f')

        if make_videos:
            from visualize_detections import detections_video
            vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path,
                                                        ds=dataset,
                                                        v=v)

            print_flush("Rendering video...")
            detections_video(detections3D,
                             vidpath,
                             outpath.replace('.csv', '.mp4'),
                             classnames,
                             dataset,
                             vidres,
                             fps=fps,
                             conf_thresh=0.0,
                             coords='world')

    print_flush("Done!")
Ejemplo n.º 11
0
def rare_class_mining(dataset, class_name, time_dist, sampling_rate,
                      import_datasets, input_shape, image_shape, batch_size,
                      batch_size2, epochs, frozen_layers, confidence):
    soft = False
    classes = get_classnames(dataset)

    ts = Timestamps(dataset)

    # Find all videos in dataset
    vidnames = list((datasets_path / dataset / "videos").glob('*.mkv'))

    all_found = []

    for v in vidnames:

        # Find video length from log file (computing this from the video file is too slow)
        log_file = (datasets_path / dataset / "logs" /
                    v.with_suffix('.log').name).read_text().split('\n')
        last = -1
        while not log_file[last]:
            last -= 1
        last_line = log_file[last]
        v_len = int(last_line.split(' ')[0])

        print_flush("{} of length {}".format(v, v_len))

        # Find existing annotations
        frames_log = (datasets_path / dataset / "objects" / "train" / v.stem /
                      "frames.log").read_text().split()
        frames_log = [
            x for x in frames_log[1:] if x
        ]  # Remove first line, which is video name, and any empty lines
        annotated = [int(x) for x in frames_log]
        print_flush("Avoiding the following existing frames: ")
        print_flush(str(annotated))

        curr_time = ts.get(v.stem, 0)

        annotated_times = [ts.get(v.stem, x) for x in annotated]

        found = []
        found_times = []
        done = False

        while not done:
            # Sample in time
            curr_time += timedelta(seconds=sampling_rate)
            curr_frame = ts.get_frame_number_given_vidname(curr_time, v.stem)

            if curr_frame >= v_len:
                # We have reached the end of the video
                done = True
                continue

            if curr_frame in annotated:
                continue

            # Check if we are too close to existing annotations
            dists = [
                abs((curr_time - x).total_seconds()) for x in annotated_times
            ]
            if any([(x <= time_dist) for x in dists]):
                continue

            # Check if we are too close to any previously chosen interesting frames
            dists = [abs((curr_time - x).total_seconds()) for x in found_times]
            if any([(x <= time_dist) for x in dists]):
                continue

            # This is a frame we could work with
            found.append(curr_frame)
            found_times.append(curr_time)

        all_found.append((v, found))

    print_flush("Candidate frames:")
    found_some = False
    for f in all_found:
        v, l = f
        print("{} : {}".format(v, l))
        if l:
            found_some = True

    if not found_some:
        print_flush("Found no interesting frames. Quitting...")
        import sys
        sys.exit(1)

    print_flush(
        "Starting to train object detector with existing annotations...")

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)

    model, bbox_util = train(dataset,
                             import_datasets,
                             input_shape,
                             batch_size,
                             epochs,
                             frozen_layers,
                             train_amount=1.0)

    print_flush(
        "Applying the model to the images to find objects of type '{}'".format(
            class_name))

    masker = Masker(dataset)
    inputs = []
    frame_nums = []
    im_origs = []
    vids = []

    found_data = []

    for f in all_found:
        v, l = f

        with iio.get_reader(v) as vid:
            for frame_number in l:
                im_orig = vid.get_data(frame_number)
                im = im_orig.copy()
                im = masker.mask(im)

                resized = cv2.resize(im, (input_shape[0], input_shape[1]))
                inputs.append(resized)
                frame_nums.append(frame_number)
                im_origs.append(im_orig)
                vids.append(v)

                if len(inputs) == batch_size2:
                    tmp = process(inputs, frame_nums, im_origs, vids,
                                  confidence, class_name, soft, batch_size2,
                                  model, bbox_util, classes)

                    found_data.extend(tmp)

                    inputs = []
                    frame_nums = []
                    im_origs = []
                    vids = []

    if inputs:
        # There are still some leftovers
        tmp = process(inputs, frame_nums, im_origs, vids, confidence,
                      class_name, soft, len(inputs), model, bbox_util, classes)

        found_data.extend(tmp)

    print_flush("Writing images...")
    for x in found_data:
        v, f, im = x

        im_folder = datasets_path / dataset / "objects" / "train" / v.stem
        im_num = max([int(x.stem) for x in im_folder.glob('*.jpg')]) + 1
        im_path = im_folder / "{}.jpg".format(im_num)

        iio.imwrite(im_path, im)
        print_flush("Written {}".format(im_path))

        # Add the new frame numbers to frames.log for this video
        flog = im_folder / "frames.log"
        with flog.open('a') as log:
            log.write(str(f) + ' ')

    print_flush("Done!")
Ejemplo n.º 12
0
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos):
    vidres = parse_resolution(vidres)
    ssdres = parse_resolution(ssdres)
    kltres = parse_resolution(kltres)

    x_factor = float(vidres[0]) / ssdres[0]
    y_factor = float(vidres[1]) / ssdres[1]
    det_dims = ('xmin', 'xmax', 'ymin', 'ymax')
    det_factors = (x_factor, x_factor, y_factor, y_factor)

    calib = Calibration(dataset)
    ts = Timestamps(dataset)
    class_data = get_class_data(dataset)
    class_heights = {d['name']: d['height'] for d in class_data}

    class KLTConfig(object):
        klt_x_factor = 0
        klt_y_factor = 0

    klt_config = KLTConfig()
    klt_config.klt_x_factor = vidres[0] / kltres[0]
    klt_config.klt_y_factor = vidres[1] / kltres[1]

    if cmd == "findvids":
        vidnames = list((datasets_path / dataset / "videos").glob('*.mkv'))
        vidnames = [x.stem for x in vidnames]
        vidnames.sort()

        outfolder = runs_path / '{}_{}'.format(dataset,
                                               run) / 'detections_world'
        mkdir(outfolder)
    else:
        vidnames = [cmd]
        outfolder = Path('.')

    mkdir(outfolder)

    if make_videos:
        classnames = get_classnames(dataset)
        dc = DatasetConfig(dataset)
        fps = dc.get('video_fps')

    for v in vidnames:
        print_flush(v)
        detections = pd.read_csv(runs_path / '{}_{}'.format(dataset, run) /
                                 'csv' / (v + '.csv'))

        # Convert pixel coordinate positions from SSD resolution to video resolution
        # because Calibration assumes video resolution coordinates
        for dim, factor in zip(det_dims, det_factors):
            detections[dim] = round(detections[dim] * factor).astype(int)

        print_flush("Converting point tracks...")
        klt = load(datasets_path / dataset / 'klt' / (v + '.pklz'))
        klt, klt_frames = convert_klt(klt, klt_config)
        pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1])

        outpath = outfolder / '{v}_world.csv'.format(v=v)

        print_flush("Converting to world coordinates...")
        detections3D = detections_to_3D(
            detections,
            pts,
            calib,
            ts,
            v,
            klt_save_path=outpath.with_name(outpath.stem + '_klt.pklz'),
            class_heights=class_heights)

        detections3D.to_csv(outpath, float_format='%.4f')

        if make_videos:
            from visualize_detections import detections_video
            vidpath = datasets_path / dataset / "videos" / "{}.mkv".format(v)

            print_flush("Rendering video...")
            detections_video(detections3D,
                             vidpath,
                             outpath.with_suffix('.mp4'),
                             classnames,
                             dataset,
                             vidres,
                             fps=fps,
                             conf_thresh=0.0,
                             coords='world')

    print_flush("Done!")
Ejemplo n.º 13
0
def autoannotate(dataset, import_datasets, input_shape, image_shape,
                 batch_size, batch_size2, epochs, frozen_layers):

    soft = False

    classes = get_classnames(dataset)

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)

    model, bbox_util = train(dataset,
                             import_datasets,
                             input_shape,
                             batch_size,
                             epochs,
                             frozen_layers,
                             train_amount=1.0)

    print_flush("Auto-annotating...")
    masker = Masker(dataset)

    inputs = []
    impaths = []
    to_annotate = get_images_to_autoannotate(dataset)

    # rep_last needed since we use large batches, for speed, to make sure we run on all images
    for impath in rep_last(to_annotate, batch_size2):
        im = iio.imread(impath)
        im = masker.mask(im)
        resized = cv2.resize(im, (input_shape[0], input_shape[1]))
        inputs.append(resized)
        impaths.append(impath)

        if len(inputs) == batch_size2:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)

            preds = model.predict(inputs, batch_size=batch_size2, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)

            for result, res_path in zip(results, impaths):
                result = [
                    r if len(r) > 0 else np.zeros((1, 6)) for r in result
                ]
                raw_detections = pd.DataFrame(np.vstack(result),
                                              columns=[
                                                  'class_index', 'confidence',
                                                  'xmin', 'ymin', 'xmax',
                                                  'ymax'
                                              ])

                auto_path = res_path.with_suffix('.auto')

                # Sort detections by confidence, keeping the top ones
                # This seems to be more robust than a hard-coded confidence threshold
                # Note that a confidence threshold can be chosen in the annotation web UI
                n = 128
                dets = [x for x in pandas_loop(raw_detections)]
                dets.sort(key=lambda x: 1.0 - x['confidence'])
                if len(dets) > n:
                    dets = dets[:n]

                with auto_path.open('w') as f:
                    for det in dets:
                        conf = round(det['confidence'], 4)
                        line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(
                            index=int(det['class_index']),
                            cx=round((det['xmin'] + det['xmax']) / 2, 4),
                            cy=round((det['ymin'] + det['ymax']) / 2, 4),
                            w=round(det['xmax'] - det['xmin'], 4),
                            h=round(det['ymax'] - det['ymin'], 4),
                            conf=conf,
                            cn=classes[int(det['class_index']) - 1])
                        f.write(line)
                print_flush("Wrote {}".format(auto_path))

            inputs = []
            impaths = []

    assert (not inputs)  # If this fails, not all images were processed!
    print_flush("Done!")
Ejemplo n.º 14
0
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers):
    soft = False

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)
    
    print_flush("Loading ground truth...")
    load_detections = LoadDetections()
    datasets = [dataset]
    if import_datasets:
        datasets.extend(import_datasets.split(','))

    detections = load_detections.custom(datasets)
    
    detections = detections.reset_index(drop=True)   
    image_props = get_image_props(detections)
    detections = detections_add_ytrue(detections, image_props, dataset)
    
    detections.index = detections.image_file
    
    print_flush('Ground truth object counts:')
    print_flush(detections.type.value_counts())
    
    classes = get_classnames(dataset)
    num_classes = len(classes) + 1
    
    keys = sorted(detections.image_file.unique())
    shuffle(keys)
    
    num_train = int(round(0.9 * len(keys)))
    train_keys = keys[:num_train]
    val_keys = keys[num_train:]

    print_flush('Loading model...')
    model = SSD300((input_shape[1],input_shape[0],input_shape[2]), num_classes=num_classes)  
    model.load_weights(ssd_path+'weights_SSD300.hdf5', by_name=True)
    
    print_flush("Making priors...")    
    im_in = np.random.random((1,input_shape[1],input_shape[0],input_shape[2]))
    priors = model.predict(im_in,batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)
    
    generator_kwargs = {
        'saturation_var': 0.5,
        'brightness_var': 0.5,
        'contrast_var': 0.5,
        'lighting_std': 0.5,
        'hflip_prob': 0.5,
        'vflip_prob': 0,
        'do_crop': True,
        'crop_area_range': [0.1, 1.0],
        'aspect_ratio_range': [0.5, 2]
        }

    path_prefix = ''
    gen = Generator(detections, bbox_util, batch_size, path_prefix,
                    train_keys, val_keys,
                    (input_shape[1], input_shape[0]), **generator_kwargs)

    # freeze several layers
    freeze = [
              ['input_1', 'conv1_1', 'conv1_2', 'pool1'],
              ['conv2_1', 'conv2_2', 'pool2'],
              ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'],
              ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'],
              ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'],
              ][:min(frozen_layers, 5)]

    for L in model.layers:
        if L.name in freeze:
            L.trainable = False
    
    callbacks = [LearningRateScheduler(schedule)]
    
    optim = keras.optimizers.Adam(lr=BASE_LR / 10)
    model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss)
    
    print_flush("Training...")
    history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches,
                                  epochs=epochs, verbose=2, callbacks=callbacks,
                                  validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1)
  
    print_flush("Auto-annotating...")
    masker = Masker(dataset)
    
    inputs = []
    impaths = []
    to_annotate = get_images_to_autoannotate(dataset)
    
    # rep_last needed since we use large batches, for speed, to make sure we run on all images
    for impath in rep_last(to_annotate, batch_size2):
        im = iio.imread(impath)
        im = masker.mask(im)
        resized = cv2.resize(im, (input_shape[0], input_shape[1]))
        inputs.append(resized)
        impaths.append(impath)
        
        if len(inputs) == batch_size2:
            inputs = np.array(inputs).astype(np.float64)
            inputs = preprocess_input(inputs)
            
            preds = model.predict(inputs, batch_size=batch_size, verbose=0)
            results = bbox_util.detection_out(preds, soft=soft)
            
            for result, res_path in zip(results, impaths):
                result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result]
                raw_detections = pd.DataFrame(np.vstack(result), columns=['class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'])
                
                auto_path = res_path.replace('.jpg','.auto')
                
                # Sort detections by confidence, keeping the top ones
                # This seems to be more robust than a hard-coded confidence threshold
                # Note that a confidence threshold can be chosen in the annotation web UI
                n = 128
                dets = [x for x in pandas_loop(raw_detections)]
                dets.sort(key=lambda x: 1.0-x['confidence'])
                if len(dets) > n:
                    dets = dets[:n]
                
                with open(auto_path, 'w') as f:
                    for det in dets:
                        conf = round(det['confidence'],4)
                        line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(index=int(det['class_index']),
                                     cx = round((det['xmin']+det['xmax'])/2,4),
                                     cy = round((det['ymin']+det['ymax'])/2,4),
                                     w = round(det['xmax']-det['xmin'],4),
                                     h = round(det['ymax']-det['ymin'],4),
                                     conf=conf,
                                     cn = classes[int(det['class_index'])-1])
                        f.write(line)
                print_flush("Wrote {}".format(auto_path))
                
            inputs = []
            impaths = []
            
    assert(not inputs) # If this fails, not all images were processed!
    print_flush("Done!")
Ejemplo n.º 15
0
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size):
    
    print_flush("> Predicting...")
    classes = get_classnames(dataset)
    masker = Masker(dataset)
    
    input_shape = parse_resolution(input_shape)
    
    num_classes = len(classes)+1
    model = get_model(dataset, run, input_shape, num_classes, verbose=False)
    priors = get_priors(model, input_shape)
    bbox_util = BBoxUtility(num_classes, priors)
    
    
    width = input_shape[0]
    height = input_shape[1]
    
    inputs = []
    outputs = []
    old_frame = None
    
    with io.get_reader(videopath) as vid: 
        vlen = len(vid)
        for i_in_seq in range(seq_start, seq_stop):
            if i_in_seq < vlen:
                frame = vid.get_data(i_in_seq)
                frame = masker.mask(frame)
                old_frame = frame
            else:
                frame = old_frame
                
            resized = cv2.resize(frame, (width, height))
            inputs.append(resized)
            
            if len(inputs) == batch_size:
                inputs2 = np.array(inputs)
                inputs2 = inputs2.astype(np.float32)
                inputs2 = preprocess_input(inputs2)
                
                y = model.predict_on_batch(inputs2)
                outputs.append(y)
                
                inputs = []     
        
    preds = np.vstack(outputs)
    
    print_flush("> Processing...")
    all_detections = []   
    seq_len = seq_stop - seq_start
         
    for i in range(seq_len):
        frame_num = i + seq_start
        
        if frame_num < vlen:           
            pred = preds[i, :]
            pred = pred.reshape(1, pred.shape[0], pred.shape[1])
            results = bbox_util.detection_out(pred, soft=False)

            detections = process_results(results, width, height, classes, conf_thresh, frame_num)
            all_detections.append(detections)
    
    dets = pd.concat(all_detections)
    
    # For the first line, we should open in write mode, and then in append mode
    # This way, we still overwrite the files if this script is run multiple times
    open_mode = 'a'
    include_header = False
    if i_seq == 0:
        open_mode = 'w'
        include_header = True

    print_flush("> Writing to {} ...".format(outname))    
    with open(outname, open_mode) as f:
        dets.to_csv(f, header=include_header) 
Ejemplo n.º 16
0
def main(batch_size, max_images, epochs, name, import_datasets, frozen_layers,
         experiment, train_data_dir, input_shape, image_shape, memory_fraction,
         do_crop):
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = memory_fraction
    set_session(tf.Session(config=config))

    run_name = "{}_{}".format(name, experiment)

    input_shape = parse_resolution(input_shape)
    image_shape = parse_resolution(image_shape)

    load_detections = LoadDetections()
    session = tf.Session()
    K.set_session(session)
    log('Started TensorFlow session')
    log('Chosen input_shape is {}'.format(input_shape))
    detections_file = runs_path / run_name / "detections.pickle"
    mkdir(runs_path / run_name)

    logging.basicConfig(filename=str(runs_path / run_name / "trainlog.log"),
                        level=logging.INFO)

    try:
        githash = subprocess.check_output(['git', 'rev-parse', 'HEAD'
                                           ]).strip()[0:6].decode('utf-8')
        log("Git hash: {}".format(githash))
    except subprocess.CalledProcessError:
        pass

    log('Loading detections')

    datasets = [name]
    if import_datasets:
        datasets.extend(import_datasets.split(','))
        log('Using these datasets: ' + str(datasets))

    detections = load_detections.custom(datasets)

    log('Detections loaded')
    log('Calculating image properties')
    detections = detections.reset_index(drop=True)
    image_props = get_image_props(detections)
    log('Image properties created')

    log('Adding y_true to detections')
    detections = detections_add_ytrue(detections, image_props, name)

    detections.index = detections.image_file
    print(' ')
    print('Detection frequencies:')
    print(detections.type.value_counts())
    print(' ')
    classes = get_classnames(name)  #sorted(detections.type.unique())
    num_classes = len(classes) + 1

    log('Loading priors')

    keys = sorted(detections.image_file.unique())
    random.shuffle(keys)
    if max_images > 0:
        keys = keys[:max_images]
    shuffle(keys)
    num_train = int(round(0.9 * len(keys)))
    if num_train == len(keys):
        num_train -= 1
    train_keys = keys[:num_train]
    val_keys = keys[num_train:]
    train_keys_file = runs_path / run_name / "train_keys.pickle"
    log('Saving training keys to: {}'.format(train_keys_file))
    pickle.dump(str(train_keys), train_keys_file.open('wb'))
    val_keys_file = runs_path / run_name / "val_keys.pickle"
    log('Saving validation keys to: {}'.format(val_keys_file))
    pickle.dump(str(val_keys), val_keys_file.open('wb'))

    log('Loading model')
    model = SSD300((input_shape[1], input_shape[0], input_shape[2]),
                   num_classes=num_classes)
    model.load_weights(ssd_path / "weights_SSD300.hdf5", by_name=True)

    log('Generating priors')
    im_in = np.random.random(
        (1, input_shape[1], input_shape[0], input_shape[2]))
    priors = model.predict(im_in, batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)

    generator_kwargs = {
        'saturation_var': 0.5,
        'brightness_var': 0.5,
        'contrast_var': 0.5,
        'lighting_std': 0.5,
        'hflip_prob': 0.5,
        'vflip_prob': 0,
        'do_crop': do_crop,
        'crop_area_range': [0.1, 1.0],
        'aspect_ratio_range': [0.5, 2]
    }

    path_prefix = ''
    gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys,
                    val_keys, (input_shape[1], input_shape[0]),
                    **generator_kwargs)

    # freeze several layers
    # freeze = []
    freeze = [
        ['input_1', 'conv1_1', 'conv1_2', 'pool1'],
        ['conv2_1', 'conv2_2', 'pool2'],
        ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'],
        ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'],
        ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'],
    ][:min(frozen_layers, 5)]

    for L in model.layers:
        if L.name in freeze:
            L.trainable = False
    mkdir(runs_path / run_name / "checkpoints")
    shutil.rmtree(str(runs_path / run_name / "logs"), ignore_errors=True)
    mkdir(runs_path / run_name / "logs")

    callbacks = [
        ModelCheckpoint(str(runs_path / run_name / 'checkpoints') +
                        '/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
                        verbose=2,
                        save_weights_only=True),
        TensorBoard(log_dir=str(runs_path / run_name / "logs"),
                    write_graph=False),
        LearningRateScheduler(schedule)
    ]

    optim = keras.optimizers.Adam(lr=BASE_LR / 10)
    # optim = keras.optimizers.RMSprop(lr=BASE_LR / 10)
    model.compile(optimizer=optim,
                  loss=MultiboxLoss(num_classes,
                                    neg_pos_ratio=2.0).compute_loss)

    log('Running model')
    history = model.fit_generator(gen.generate(True),
                                  steps_per_epoch=gen.train_batches,
                                  epochs=epochs,
                                  verbose=2,
                                  callbacks=callbacks,
                                  validation_data=gen.generate(False),
                                  validation_steps=gen.val_batches,
                                  workers=1)
    log('Done training model')
    session.close()
    log('Session closed, starting with writing results')
    results = pd.DataFrame(history.history).unstack().reset_index(0)
    results = results.rename(columns={'level_0': 'type', 0: 'value'})

    x1 = []
    y1 = []
    x2 = []
    y2 = []
    for row in pandas_loop(results):
        if row['type'] == 'loss':
            x1.append(row['_'])
            y1.append(row['value'])
        elif row['type'] == 'val_loss':
            x2.append(row['_'])
            y2.append(row['value'])

    plot_path = runs_path / run_name / "training.png"
    multi_plot([x1, x2], [y1, y2],
               plot_path,
               xlabel='epochs',
               ylabel='loss',
               title='Training',
               legend=['loss', 'validation loss'])

    results.to_csv(runs_path / run_name / "results.csv")

    log('Cleaning up non-optimal weights...')
    cleanup(name, experiment)

    log('Finished TensorFlow session')
    print_flush('Done!')
Ejemplo n.º 17
0
def train(dataset,
          import_datasets,
          input_shape,
          batch_size,
          epochs,
          frozen_layers,
          train_amount=0.9):

    print_flush("Loading ground truth...")
    load_detections = LoadDetections()
    datasets = [dataset]
    if import_datasets:
        datasets.extend(import_datasets.split(','))

    detections = load_detections.custom(datasets)

    detections = detections.reset_index(drop=True)
    image_props = get_image_props(detections)
    detections = detections_add_ytrue(detections, image_props, dataset)

    detections.index = detections.image_file

    print_flush('Ground truth object counts:')
    print_flush(detections.type.value_counts())

    classes = get_classnames(dataset)
    num_classes = len(classes) + 1

    keys = sorted(detections.image_file.unique())
    shuffle(keys)

    if train_amount < 1.0:
        num_train = int(round(train_amount * len(keys)))
        train_keys = keys[:num_train]
        val_keys = keys[num_train:]
    else:
        train_keys = keys

        # Not a very good validation set, but whatever.
        # The ability to train on all the images is important when annotations are sparse,
        # like when doing autoannotation
        val_keys = [keys[0]]

    print_flush('Loading model...')
    model = SSD300((input_shape[1], input_shape[0], input_shape[2]),
                   num_classes=num_classes)
    model.load_weights(ssd_path / 'weights_SSD300.hdf5', by_name=True)

    print_flush("Making priors...")
    im_in = np.random.random(
        (1, input_shape[1], input_shape[0], input_shape[2]))
    priors = model.predict(im_in, batch_size=1)[0, :, -8:]
    bbox_util = BBoxUtility(num_classes, priors)

    generator_kwargs = {
        'saturation_var': 0.5,
        'brightness_var': 0.5,
        'contrast_var': 0.5,
        'lighting_std': 0.5,
        'hflip_prob': 0.5,
        'vflip_prob': 0,
        'do_crop': True,
        'crop_area_range': [0.1, 1.0],
        'aspect_ratio_range': [0.5, 2]
    }

    path_prefix = ''
    gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys,
                    val_keys, (input_shape[1], input_shape[0]),
                    **generator_kwargs)

    # freeze several layers
    freeze = [
        ['input_1', 'conv1_1', 'conv1_2', 'pool1'],
        ['conv2_1', 'conv2_2', 'pool2'],
        ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'],
        ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'],
        ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'],
    ][:min(frozen_layers, 5)]

    for L in model.layers:
        if L.name in freeze:
            L.trainable = False

    callbacks = [LearningRateScheduler(schedule)]

    optim = keras.optimizers.Adam(lr=BASE_LR / 10)
    model.compile(optimizer=optim,
                  loss=MultiboxLoss(num_classes,
                                    neg_pos_ratio=2.0).compute_loss)

    print_flush("Training...")
    history = model.fit_generator(gen.generate(True),
                                  steps_per_epoch=gen.train_batches,
                                  epochs=epochs,
                                  verbose=2,
                                  callbacks=callbacks,
                                  validation_data=gen.generate(False),
                                  validation_steps=gen.val_batches,
                                  workers=1)

    return model, bbox_util