コード例 #1
0
ファイル: plot_loss.py プロジェクト: dbofseuofhust/FTMU
def smoother(y, interval=100):
	avg = MovingAverage(interval)

	for i in range(len(y)):
		avg.append(y[i])
		y[i] = avg.get_avg()
	
	return y
コード例 #2
0
def evaluate(net,
             dataset,
             max_num=-1,
             during_training=False,
             cocoapi=False,
             traditional_nms=False):
    frame_times = MovingAverage()
    dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
    dataset_indices = list(range(len(dataset)))
    dataset_indices = dataset_indices[:dataset_size]
    progress_bar = ProgressBar(40, dataset_size)

    # For each class and iou, stores tuples (score, isPositive)
    # Index ap_data[type][iouIdx][classIdx]
    ap_data = {
        'box': [[APDataObject() for _ in cfg.dataset.class_names]
                for _ in iou_thresholds],
        'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                 for _ in iou_thresholds]
    }
    make_json = Make_json()

    for i, image_idx in enumerate(dataset_indices):
        timer.reset()

        with timer.env('Data loading'):
            img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

            batch = img.unsqueeze(0)
            if cuda:
                batch = batch.cuda()

        with timer.env('Network forward'):
            #changed
            net_outs = net(batch)
            nms_outs = NMS(net_outs, traditional_nms)
            prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd,
                         dataset.ids[image_idx], make_json, cocoapi)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        fps = 0
        if i > 1 and not during_training:
            frame_times.add(timer.total_time())
            fps = 1 / frame_times.get_avg()

        progress = (i + 1) / dataset_size * 100
        progress_bar.set_val(i + 1)
        print('\rProcessing:  %s  %d / %d (%.2f%%)  %.2f fps  ' %
              (repr(progress_bar), i + 1, dataset_size, progress, fps),
              end='')
    else:
        table, box_row, mask_row = calc_map(ap_data)
        print(table)
        return table, box_row, mask_row
コード例 #3
0
ファイル: eval.py プロジェクト: prashiyn/yolact
def savevideo(net: Yolact, in_path: str, out_path: str):

    vid = cv2.VideoCapture(in_path)

    target_fps = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"),
                          target_fps, (frame_width, frame_height))
    frame_freq = 10
    transform = FastBaseTransform()
    frame_times = MovingAverage()
    progress_bar = ProgressBar(30, num_frames)
    preds = None
    try:
        for i in range(num_frames):
            timer.reset()
            with timer.env('Video'):
                # process only 10th frame
                # care to be taken that the first frame is read always
                frame = torch.from_numpy(vid.read()[1]).cuda().float()
                # need to adjust for multi frame
                if i % frame_freq == 0:
                    batch = transform(frame.unsqueeze(0))
                    preds = net(batch)
                current_preds = make_copy(preds)
                processed = prep_display(current_preds,
                                         frame,
                                         None,
                                         None,
                                         undo_transform=False,
                                         class_color=True)
                out.write(processed)
            if i > 1:
                frame_times.add(timer.total_time())
                fps = 1 / frame_times.get_avg()
                progress = (i + 1) / num_frames * 100
                progress_bar.set_val(i + 1)

                print(
                    '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    % (repr(progress_bar), i + 1, num_frames, progress, fps),
                    end='')
    except KeyboardInterrupt:
        print('Stopping early.')

    vid.release()
    out.release()
    print()
コード例 #4
0
ファイル: eval.py プロジェクト: yasudakn/yolact
    def play_video():
        nonlocal frame_buffer, running, video_fps, is_webcam, slide_num

        video_frame_times = MovingAverage(100)
        frame_time_stabilizer = frame_time_target
        last_time = None
        stabilizer_step = 0.0005

        while running:
            frame_time_start = time.time()

            if not frame_buffer.empty():
                next_time = time.time()
                if last_time is not None:
                    video_frame_times.add(next_time - last_time)
                    video_fps = 1 / video_frame_times.get_avg()
                cv2.imshow(path, frame_buffer.get())
                last_time = next_time

            key_press = cv2.waitKey(1) & 0xff
            if key_press == 27: # Press Escape to close
                running = False
            elif key_press == ord('n'):
                if slide_num < len(bg_imgs) - 1:
                    slide_num = slide_num + 1
            elif key_press == ord('b'):
                if 0 < slide_num:
                    slide_num = slide_num - 1

            buffer_size = frame_buffer.qsize()
            if buffer_size < args.video_multiframe:
                frame_time_stabilizer += stabilizer_step
            elif buffer_size > args.video_multiframe:
                frame_time_stabilizer -= stabilizer_step
                if frame_time_stabilizer < 0:
                    frame_time_stabilizer = 0

            new_target = frame_time_stabilizer if is_webcam else max(frame_time_stabilizer, frame_time_target)

            next_frame_target = max(2 * new_target - video_frame_times.get_avg(), 0)
            target_time = frame_time_start + next_frame_target - 0.001 # Let's just subtract a millisecond to be safe
            # This gives more accurate timing than if sleeping the whole amount at once
            while time.time() < target_time:
                time.sleep(0.001)
コード例 #5
0
        def play_video():
            nonlocal frame_buffer, running, video_fps, is_webcam

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    cv2.imshow(path, frame_buffer.get())
                    # print("how many masks ", frame_buffer.shape)
                    last_time = next_time

                #self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame_buffer.get(), "bgr8"))

                if cv2.waitKey(1) == 27:  # Press Escape to close
                    running = False

                buffer_size = frame_buffer.qsize()
                if buffer_size < args.video_multiframe:
                    frame_time_stabilizer += stabilizer_step
                elif buffer_size > args.video_multiframe:
                    frame_time_stabilizer -= stabilizer_step
                    if frame_time_stabilizer < 0:
                        frame_time_stabilizer = 0

                new_target = frame_time_stabilizer if is_webcam else max(
                    frame_time_stabilizer, frame_time_target)

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                # This gives more accurate timing than if sleeping the whole amount at once
                while time.time() < target_time:
                    time.sleep(0.001)
コード例 #6
0
ファイル: eval.py プロジェクト: xychen9459/yolact
    def play_video():
        nonlocal frame_buffer, running, video_fps

        video_frame_times = MovingAverage(100)
        frame_time_stabilizer = frame_time_target
        last_time = None
        stabilizer_step = 0.0005

        while running:
            frame_time_start = time.time()

            if not frame_buffer.empty():
                next_time = time.time()
                if last_time is not None:
                    video_frame_times.add(next_time - last_time)
                    video_fps = 1 / video_frame_times.get_avg()
                cv2.imshow(path, frame_buffer.get())
                last_time = next_time

            if cv2.waitKey(1) == 27:  # Press Escape to close
                running = False

            buffer_size = frame_buffer.qsize()
            if buffer_size < args.video_multiframe:
                frame_time_stabilizer += stabilizer_step
            elif buffer_size > args.video_multiframe:
                frame_time_stabilizer -= stabilizer_step
                if frame_time_stabilizer < 0:
                    frame_time_stabilizer = 0

            next_frame_target = max(
                2 * max(frame_time_stabilizer, frame_time_target) -
                video_frame_times.get_avg(), 0)
            target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe
            # This gives more accurate timing than if sleeping the whole amount at once
            while time.time() < target_time:
                time.sleep(0.001)
コード例 #7
0
ファイル: eval.py プロジェクト: li-xl/Yolact.jittor
def evalvideo(net: Yolact, path: str, out_path: str = None):
    # If the path is a digit, parse it as a webcam index
    is_webcam = path.isdigit()

    # If the input image size is constant, this make things faster (hence why we can use it in a video setting).
    #cudnn.benchmark = True

    if is_webcam:
        vid = cv2.VideoCapture(int(path))
    else:
        vid = cv2.VideoCapture(path)

    if not vid.isOpened():
        print('Could not open video "%s"' % path)
        exit(-1)

    target_fps = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

    if is_webcam:
        num_frames = float('inf')
    else:
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    transform = FastBaseTransform()
    frame_times = MovingAverage(100)
    fps = 0
    frame_time_target = 1 / target_fps
    running = True
    fps_str = ''
    vid_done = False
    frames_displayed = 0

    if out_path is not None:
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*"mp4v"),
                              target_fps, (frame_width, frame_height))

    def cleanup_and_exit():
        print()
        pool.terminate()
        vid.release()
        if out_path is not None:
            out.release()
        cv2.destroyAllWindows()
        exit()

    def get_next_frame(vid):
        frames = []
        for idx in range(args.video_multiframe):
            frame = vid.read()[1]
            if frame is None:
                return frames
            frames.append(frame)
        return frames

    def transform_frame(frames):
        with jt.no_grad():
            frames = [jt.array(frame).float() for frame in frames]
            return frames, transform(jt.stack(frames, 0))

    def eval_network(inp):
        with jt.no_grad():
            frames, imgs = inp
            num_extra = 0
            while imgs.size(0) < args.video_multiframe:
                imgs = jt.contrib.concat([imgs, imgs[0].unsqueeze(0)], dim=0)
                num_extra += 1
            out = net(imgs)
            if num_extra > 0:
                out = out[:-num_extra]
            return frames, out

    def prep_frame(inp, fps_str):
        with jt.no_grad():
            frame, preds = inp
            return prep_display(preds,
                                frame,
                                None,
                                None,
                                undo_transform=False,
                                class_color=True,
                                fps_str=fps_str)

    frame_buffer = Queue()
    video_fps = 0

    # All this timing code to make sure that
    def play_video():
        try:
            nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005
            progress_bar = ProgressBar(30, num_frames)

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    if out_path is None:
                        cv2.imshow(path, frame_buffer.get())
                    else:
                        out.write(frame_buffer.get())
                    frames_displayed += 1
                    last_time = next_time

                    if out_path is not None:
                        if video_frame_times.get_avg() == 0:
                            fps = 0
                        else:
                            fps = 1 / video_frame_times.get_avg()
                        progress = frames_displayed / num_frames * 100
                        progress_bar.set_val(frames_displayed)

                        print(
                            '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                            % (repr(progress_bar), frames_displayed,
                               num_frames, progress, fps),
                            end='')

                # This is split because you don't want savevideo to require cv2 display functionality (see #197)
                if out_path is None and cv2.waitKey(1) == 27:
                    # Press Escape to close
                    running = False
                if not (frames_displayed < num_frames):
                    running = False

                if not vid_done:
                    buffer_size = frame_buffer.qsize()
                    if buffer_size < args.video_multiframe:
                        frame_time_stabilizer += stabilizer_step
                    elif buffer_size > args.video_multiframe:
                        frame_time_stabilizer -= stabilizer_step
                        if frame_time_stabilizer < 0:
                            frame_time_stabilizer = 0

                    new_target = frame_time_stabilizer if is_webcam else max(
                        frame_time_stabilizer, frame_time_target)
                else:
                    new_target = frame_time_target

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                if out_path is None or args.emulate_playback:
                    # This gives more accurate timing than if sleeping the whole amount at once
                    while time.time() < target_time:
                        time.sleep(0.001)
                else:
                    # Let's not starve the main thread, now
                    time.sleep(0.001)
        except:
            # See issue #197 for why this is necessary
            import traceback
            traceback.print_exc()

    extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else
                                  x[0][i].to(x[1][i]['detection']['box'].device
                                             ), [x[1][i]])

    # Prime the network on the first frame because I do some thread unsafe things otherwise
    print('Initializing model.. ', end='')
    first_batch = eval_network(transform_frame(get_next_frame(vid)))
    print('Done.')

    # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
    sequence = [prep_frame, eval_network, transform_frame]
    pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
    pool.apply_async(play_video)
    active_frames = [{
        'value': extract_frame(first_batch, i),
        'idx': 0
    } for i in range(len(first_batch[0]))]

    print()
    if out_path is None: print('Press Escape to close.')
    try:
        while vid.isOpened() and running:
            # Hard limit on frames in buffer so we don't run out of memory >.>
            while frame_buffer.qsize() > 100:
                time.sleep(0.001)

            start_time = time.time()

            # Start loading the next frames from the disk
            if not vid_done:
                next_frames = pool.apply_async(get_next_frame, args=(vid, ))
            else:
                next_frames = None

            if not (vid_done and len(active_frames) == 0):
                # For each frame in our active processing queue, dispatch a job
                # for that frame using the current function in the sequence
                for frame in active_frames:
                    _args = [frame['value']]
                    if frame['idx'] == 0:
                        _args.append(fps_str)
                    frame['value'] = pool.apply_async(sequence[frame['idx']],
                                                      args=_args)

                # For each frame whose job was the last in the sequence (i.e. for all final outputs)
                for frame in active_frames:
                    if frame['idx'] == 0:
                        frame_buffer.put(frame['value'].get())

                # Remove the finished frames from the processing queue
                active_frames = [x for x in active_frames if x['idx'] > 0]

                # Finish evaluating every frame in the processing queue and advanced their position in the sequence
                for frame in list(reversed(active_frames)):
                    frame['value'] = frame['value'].get()
                    frame['idx'] -= 1

                    if frame['idx'] == 0:
                        # Split this up into individual threads for prep_frame since it doesn't support batch size
                        active_frames += [{
                            'value':
                            extract_frame(frame['value'], i),
                            'idx':
                            0
                        } for i in range(1, len(frame['value'][0]))]
                        frame['value'] = extract_frame(frame['value'], 0)

                # Finish loading in the next frames and add them to the processing queue
                if next_frames is not None:
                    frames = next_frames.get()
                    if len(frames) == 0:
                        vid_done = True
                    else:
                        active_frames.append({
                            'value': frames,
                            'idx': len(sequence) - 1
                        })

                # Compute FPS
                frame_times.add(time.time() - start_time)
                fps = args.video_multiframe / frame_times.get_avg()
            else:
                fps = 0

            fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (
                fps, video_fps, frame_buffer.qsize())
            if not args.display_fps:
                print('\r' + fps_str + '    ', end='')

    except KeyboardInterrupt:
        print('\nStopping..')

    cleanup_and_exit()
コード例 #8
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net = nn.DataParallel(net).cuda()
        criterion = nn.DataParallel(criterion).cuda()

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S']  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Load training data
                # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                images, targets, masks, num_crowds = prepare_data(datum)

                # Forward Pass
                out = net(images)

                # Compute Loss
                optimizer.zero_grad()

                wrapper = ScatterWrapper(targets, masks, num_crowds)
                losses = criterion(out, wrapper, wrapper.make_mask())

                losses = {k: v.mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(yolact_net, val_dataset)
    except KeyboardInterrupt:
        print('Stopping early. Saving network...')

        # Delete previous copy of the interrupted network so we don't spam the weights folder
        SavePath.remove_interrupt(args.save_folder)

        yolact_net.save_weights(
            save_path(epoch,
                      repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
コード例 #9
0
    def play_video():
        try:
            nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005
            progress_bar = ProgressBar(30, num_frames)

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    if out_path is None or os.path.isdir(out_path):
                        cv2.imshow(path, frame_buffer.get()[0])
                    else:
                        out.write(frame_buffer.get()[0])
                    frames_displayed += 1
                    last_time = next_time

                    if out_path is not None and not os.path.isdir(out_path):
                        if video_frame_times.get_avg() == 0:
                            fps = 0
                        else:
                            fps = 1 / video_frame_times.get_avg()
                        progress = frames_displayed / num_frames * 100
                        progress_bar.set_val(frames_displayed)
                        print(
                            '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                            % (repr(progress_bar), frames_displayed,
                               num_frames, progress, fps),
                            end='')

                if (out_path is None
                        or os.path.isdir(out_path)) and cv2.waitKey(1) == 27:
                    running = False

                if not vid_done:
                    buffer_size = frame_buffer.qsize()
                    if buffer_size < args.video_multiframe:
                        frame_time_stabilizer += stabilizer_step
                    elif buffer_size > args.video_multiframe:
                        frame_time_stabilizer -= stabilizer_step
                        if frame_time_stabilizer < 0:
                            frame_time_stabilizer = 0

                    new_target = frame_time_stabilizer if is_webcam else max(
                        frame_time_stabilizer, frame_time_target)
                else:
                    new_target = frame_time_target

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001

                if out_path is None or os.path.isdir(
                        out_path) or args.emulate_playback:
                    while time.time() < target_time:
                        time.sleep(0.001)
                else:
                    time.sleep(0.001)
        except:
            import traceback
            traceback.print_exc()
コード例 #10
0
ファイル: train_ex.py プロジェクト: ethan-jiang-1/yolact
def get_default_log_avgs():
    return { k: MovingAverage(100) for k in loss_types }
コード例 #11
0
ファイル: eval.py プロジェクト: w55100/YOLACT
def evaluate(net: Yolact, dataset, train_mode=False):
    # net.detection.use_fast_nms = args.fast_nms
    # net.detection.use_cross_class_nms = args.cross_class_nms

    frame_times = MovingAverage()
    dataset_size = len(dataset)  # if args.max_images < 0 else min(args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()
    iou_thresholds = [x / 100 for x in range(50, 100, 5)]
    if not args.display and not args.benchmark:
        # 不显示,直接算分
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in COCO_CLASSES] for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in COCO_CLASSES] for _ in iou_thresholds]
        }
        detections = Detections()

    dataset_indices = list(range(len(dataset)))
    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        # Do a deterministic shuffle based on the image ids
        #
        # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
        # the order of insertion. That means on python 3.6, the images come in the order they are in
        # in the annotations file. For some reason, the first images in the annotations file are
        # the hardest. To combat this, I use a hard-coded hash function based on the image ids
        # to shuffle the indices we use. That way, no matter what python version or how pycocotools
        # handles the data, we get the same result every time.
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    # 再else就什么也不做

    # 我们去掉了args.max_images之后,这句也可以不要了。不过以免万一先做保留。
    dataset_indices = dataset_indices[:dataset_size]

    # Main eval loop
    for it, image_idx in enumerate(dataset_indices):

        # Load Data
        img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

        batch = torch.autograd.Variable(img.unsqueeze(0))
        if args.cuda:
            batch = batch.cuda()

        # 送入网络'Network Extra'
        preds = net(batch)

        # Perform the meat of the operation here depending on our mode.
        if args.display:
            img_numpy = prep_display(preds, img, h, w)  # 我们不搞display
        elif args.benchmark:
            prep_benchmark(preds, h, w)  # 我们也不搞这个
        else:
            prep_metrics(ap_data, preds, img, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], detections)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        # if it > 1:
        #     frame_times.add(timer.total_time())

        # if args.display:
        #     if it > 1:
        #         print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
        #     plt.imshow(img_numpy)
        #     plt.title(str(dataset.ids[image_idx]))
        #     plt.show()
        # elif not args.no_bar:
        #     if it > 1:
        #         fps = 1 / frame_times.get_avg()
        #     else:
        #         fps = 0
        #     progress = (it + 1) / dataset_size * 100
        #     progress_bar.set_val(it + 1)
        #     print('\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
        #           % (repr(progress_bar), it + 1, dataset_size, progress, fps), end='')

    if not args.display and not args.benchmark:
        print()
        if args.output_coco_json:
            print('Dumping detections...')
            if args.output_web_json:
                detections.dump_web()
            else:
                detections.dump()
        else:
            if not train_mode:
                print('Saving data...')
                with open(args.ap_data_file, 'wb') as f:
                    pickle.dump(ap_data, f)

            return calc_map(ap_data)
コード例 #12
0
def train(rank, args):
    if args.num_gpus > 1:
        multi_gpu_rescale(args)
    if rank == 0:
        if not os.path.exists(args.save_folder):
            os.mkdir(args.save_folder)

    # set up logger
    setup_logger(output=os.path.join(args.log_folder, cfg.name),
                 distributed_rank=rank)
    logger = logging.getLogger("yolact.train")

    w = SummaryHelper(distributed_rank=rank,
                      log_dir=os.path.join(args.log_folder, cfg.name))
    w.add_text("argv", " ".join(sys.argv))
    logger.info("Args: {}".format(" ".join(sys.argv)))
    import git
    with git.Repo(search_parent_directories=True) as repo:
        w.add_text("git_hash", repo.head.object.hexsha)
        logger.info("git hash: {}".format(repo.head.object.hexsha))

    try:
        logger.info("Initializing torch.distributed backend...")
        dist.init_process_group(backend='nccl',
                                init_method=args.dist_url,
                                world_size=args.num_gpus,
                                rank=rank)
    except Exception as e:
        logger.error("Process group URL: {}".format(args.dist_url))
        raise e

    dist.barrier()

    if torch.cuda.device_count() > 1:
        logger.info('Multiple GPUs detected! Turning off JIT.')

    collate_fn = detection_collate
    if cfg.dataset.name == 'YouTube VIS':
        dataset = YoutubeVIS(image_path=cfg.dataset.train_images,
                             info_file=cfg.dataset.train_info,
                             configs=cfg.dataset,
                             transform=SSDAugmentationVideo(MEANS))

        if cfg.dataset.joint == 'coco':
            joint_dataset = COCODetection(
                image_path=cfg.joint_dataset.train_images,
                info_file=cfg.joint_dataset.train_info,
                transform=SSDAugmentation(MEANS))
            joint_collate_fn = detection_collate

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images,
                                     info_file=cfg.dataset.valid_info,
                                     configs=cfg.dataset,
                                     transform=BaseTransformVideo(MEANS))
        collate_fn = collate_fn_youtube_vis

    elif cfg.dataset.name == 'FlyingChairs':
        dataset = FlyingChairs(image_path=cfg.dataset.trainval_images,
                               info_file=cfg.dataset.trainval_info)

        collate_fn = collate_fn_flying_chairs

    else:
        dataset = COCODetection(image_path=cfg.dataset.train_images,
                                info_file=cfg.dataset.train_info,
                                transform=SSDAugmentation(MEANS))

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                        info_file=cfg.dataset.valid_info,
                                        transform=BaseTransform(MEANS))

    # Set cuda device early to avoid duplicate model in master GPU
    if args.cuda:
        torch.cuda.set_device(rank)

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs.

    # use timer for experiments
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        logger.info('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume, args=args)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        logger.info('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    if cfg.flow.train_flow:
        criterion = OpticalFlowLoss()

    else:
        criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                                 pos_threshold=cfg.positive_iou_threshold,
                                 neg_threshold=cfg.negative_iou_threshold,
                                 negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net.cuda(rank)
        criterion.cuda(rank)
        net = nn.parallel.DistributedDataParallel(net,
                                                  device_ids=[rank],
                                                  output_device=rank,
                                                  broadcast_buffers=False,
                                                  find_unused_parameters=True)
        # net       = nn.DataParallel(net).cuda()
        # criterion = nn.DataParallel(criterion).cuda()

    optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    w.set_step(iteration)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size // args.num_gpus
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    from data.sampler_utils import InfiniteSampler, build_batch_data_sampler

    infinite_sampler = InfiniteSampler(dataset,
                                       seed=args.random_seed,
                                       num_replicas=args.num_gpus,
                                       rank=rank,
                                       shuffle=True)
    train_sampler = build_batch_data_sampler(infinite_sampler,
                                             images_per_batch=args.batch_size)

    data_loader = data.DataLoader(
        dataset,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        multiprocessing_context="fork" if args.num_workers > 1 else None,
        batch_sampler=train_sampler)
    data_loader_iter = iter(data_loader)

    if cfg.dataset.joint:
        joint_infinite_sampler = InfiniteSampler(joint_dataset,
                                                 seed=args.random_seed,
                                                 num_replicas=args.num_gpus,
                                                 rank=rank,
                                                 shuffle=True)
        joint_train_sampler = build_batch_data_sampler(
            joint_infinite_sampler, images_per_batch=args.batch_size)
        joint_data_loader = data.DataLoader(
            joint_dataset,
            num_workers=args.num_workers,
            collate_fn=joint_collate_fn,
            multiprocessing_context="fork" if args.num_workers > 1 else None,
            batch_sampler=joint_train_sampler)
        joint_data_loader_iter = iter(joint_data_loader)

    dist.barrier()

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()
    data_time_avg = MovingAverage(10)

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    def backward_and_log(prefix,
                         net_outs,
                         targets,
                         masks,
                         num_crowds,
                         extra_loss=None):
        optimizer.zero_grad()

        out = net_outs["pred_outs"]
        wrapper = ScatterWrapper(targets, masks, num_crowds)
        losses = criterion(out, wrapper, wrapper.make_mask())

        losses = {k: v.mean()
                  for k, v in losses.items()}  # Mean here because Dataparallel

        if extra_loss is not None:
            assert type(extra_loss) == dict
            losses.update(extra_loss)

        loss = sum([losses[k] for k in losses])

        # Backprop
        loss.backward()  # Do this to free up vram even if loss is not finite
        if torch.isfinite(loss).item():
            optimizer.step()

        # Add the loss to the moving average for bookkeeping
        for k in losses:
            loss_avgs[k].add(losses[k].item())
            w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k),
                         losses[k].item())

        return losses

    logger.info('Begin training!')
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            while True:
                data_start_time = time.perf_counter()
                datum = next(data_loader_iter)
                dist.barrier()
                data_end_time = time.perf_counter()
                data_time = data_end_time - data_start_time
                if iteration != args.start_iter:
                    data_time_avg.add(data_time)
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                elif cfg.lr_schedule == 'cosine':
                    set_lr(
                        optimizer,
                        args.lr *
                        ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) *
                         .5))

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while cfg.lr_schedule == 'step' and step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                global lr
                w.add_scalar('meta/lr', lr)

                if cfg.dataset.name == "FlyingChairs":
                    imgs_1, imgs_2, flows = prepare_flow_data(datum)
                    net_outs = net(None, extras=(imgs_1, imgs_2))
                    # Compute Loss
                    optimizer.zero_grad()

                    losses = criterion(net_outs, flows)

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('loss/%s' % k, losses[k].item())

                elif cfg.dataset.joint or not cfg.dataset.is_video:
                    if cfg.dataset.joint:
                        joint_datum = next(joint_data_loader_iter)
                        dist.barrier()
                        # Load training data
                        # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                        images, targets, masks, num_crowds = prepare_data(
                            joint_datum)
                    else:
                        images, targets, masks, num_crowds = prepare_data(
                            datum)
                    extras = {
                        "backbone": "full",
                        "interrupt": False,
                        "moving_statistics": {
                            "aligned_feats": []
                        }
                    }
                    net_outs = net(images, extras=extras)
                    out = net_outs["pred_outs"]
                    # Compute Loss
                    optimizer.zero_grad()

                    wrapper = ScatterWrapper(targets, masks, num_crowds)
                    losses = criterion(out, wrapper, wrapper.make_mask())

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('joint/%s' % k, losses[k].item())

                # Forward Pass
                if cfg.dataset.is_video:
                    # reference frames
                    references = []
                    moving_statistics = {"aligned_feats": [], "conf_hist": []}
                    for idx, frame in enumerate(datum[:0:-1]):
                        images, annots = frame

                        extras = {
                            "backbone": "full",
                            "interrupt": True,
                            "keep_statistics": True,
                            "moving_statistics": moving_statistics
                        }

                        with torch.no_grad():
                            net_outs = net(images, extras=extras)

                        moving_statistics["feats"] = net_outs["feats"]
                        moving_statistics["lateral"] = net_outs["lateral"]

                        keys_to_save = ("outs_phase_1", "outs_phase_2")
                        for key in set(net_outs.keys()) - set(keys_to_save):
                            del net_outs[key]
                        references.append(net_outs)

                    # key frame with annotation, but not compute full backbone
                    frame = datum[0]
                    images, annots = frame
                    frame = (
                        images,
                        annots,
                    )
                    images, targets, masks, num_crowds = prepare_data(frame)

                    extras = {
                        "backbone": "full",
                        "interrupt": not cfg.flow.base_backward,
                        "moving_statistics": moving_statistics
                    }
                    gt_net_outs = net(images, extras=extras)
                    if cfg.flow.base_backward:
                        losses = backward_and_log("compute", gt_net_outs,
                                                  targets, masks, num_crowds)

                    keys_to_save = ("outs_phase_1", "outs_phase_2")
                    for key in set(gt_net_outs.keys()) - set(keys_to_save):
                        del gt_net_outs[key]

                    # now do the warp
                    if len(references) > 0:
                        reference_frame = references[0]
                        extras = {
                            "backbone": "partial",
                            "moving_statistics": moving_statistics
                        }

                        net_outs = net(images, extras=extras)
                        extra_loss = yolact_net.extra_loss(
                            net_outs, gt_net_outs)

                        losses = backward_and_log("warp",
                                                  net_outs,
                                                  targets,
                                                  masks,
                                                  num_crowds,
                                                  extra_loss=extra_loss)

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time
                w.add_scalar('meta/data_time', data_time)
                w.add_scalar('meta/iter_time', elapsed)

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]
                    if torch.cuda.is_available():
                        max_mem_mb = torch.cuda.max_memory_allocated(
                        ) / 1024.0 / 1024.0
                        # torch.cuda.reset_max_memory_allocated()
                    else:
                        max_mem_mb = None

                    logger.info("""\
eta: {eta}  epoch: {epoch}  iter: {iter}  \
{losses}  {loss_total}  \
time: {time}  data_time: {data_time}  lr: {lr}  {memory}\
""".format(eta=eta_str,
                    epoch=epoch,
                    iter=iteration,
                    losses="  ".join([
                    "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses
                    ]),
                    loss_total="T: {:.3f}".format(
                    sum([loss_avgs[k].get_avg() for k in losses])),
                    data_time="{:.3f}".format(data_time_avg.get_avg()),
                    time="{:.3f}".format(elapsed),
                    lr="{:.6f}".format(lr),
                    memory="max_mem: {:.0f}M".format(max_mem_mb)))

                if rank == 0 and iteration % 100 == 0:

                    if cfg.flow.train_flow:
                        import flowiz as fz
                        from layers.warp_utils import deform_op
                        tgt_size = (64, 64)
                        flow_size = flows.size()[2:]
                        vis_data = []
                        for pred_flow in net_outs:
                            vis_data.append(pred_flow)

                        deform_gt = deform_op(imgs_2, flows)
                        flows_pred = [
                            F.interpolate(x,
                                          size=flow_size,
                                          mode='bilinear',
                                          align_corners=False)
                            for x in net_outs
                        ]
                        deform_preds = [
                            deform_op(imgs_2, x) for x in flows_pred
                        ]

                        vis_data.append(
                            F.interpolate(flows, size=tgt_size, mode='area'))

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]

                        def convert_image(image):
                            image = F.interpolate(image,
                                                  size=tgt_size,
                                                  mode='area')
                            image = image[0]
                            image = image.data.cpu().numpy()
                            image = image[::-1]
                            image = image.transpose(1, 2, 0)
                            image = image * np.array(STD) + np.array(MEANS)
                            image = image.transpose(2, 0, 1)
                            image = image / 255
                            image = np.clip(image, -1, 1)
                            image = image[::-1]
                            return image

                        vis_data.append(convert_image(imgs_1))
                        vis_data.append(convert_image(imgs_2))
                        vis_data.append(convert_image(deform_gt))
                        vis_data.extend(
                            [convert_image(x) for x in deform_preds])

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                    elif cfg.flow.warp_mode == "flow":
                        import flowiz as fz
                        tgt_size = (64, 64)
                        vis_data = []
                        for pred_flow, _, _ in net_outs["preds_flow"]:
                            vis_data.append(pred_flow)

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]
                        input_image = F.interpolate(images,
                                                    size=tgt_size,
                                                    mode='area')
                        input_image = input_image[0]
                        input_image = input_image.data.cpu().numpy()
                        input_image = input_image.transpose(1, 2, 0)
                        input_image = input_image * np.array(
                            STD[::-1]) + np.array(MEANS[::-1])
                        input_image = input_image.transpose(2, 0, 1)
                        input_image = input_image / 255
                        input_image = np.clip(input_image, -1, 1)
                        vis_data.append(input_image)

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                iteration += 1
                w.set_step(iteration)

                if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    logger.info('Saving state, iter: {}'.format(iteration))
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            logger.info('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    if rank == 0:
                        compute_validation_map(yolact_net, val_dataset)
                    dist.barrier()

    except KeyboardInterrupt:
        if args.interrupt_no_save:
            logger.info('No save on interrupt, just exiting...')
        elif rank == 0:
            print('Stopping early. Saving network...')
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        return

    if rank == 0:
        yolact_net.save_weights(save_path(epoch, iteration))
コード例 #13
0
ファイル: eval.py プロジェクト: Jinming-Su/STMask
def evaluate(net: STMask, dataset):
    net.detect.use_fast_nms = args.fast_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    frame_times = MovingAverage()
    dataset_size = math.ceil(len(dataset) /
                             args.batch_size) if args.max_images < 0 else min(
                                 args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  shuffle=False,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    results = []

    try:
        # Main eval loop
        for it, data_batch in enumerate(data_loader):
            timer.reset()

            with timer.env('Load Data'):
                images, images_meta, ref_images, ref_images_meta = prepare_data(
                    data_batch, is_cuda=True, train_mode=False)
            pad_h, pad_w = images.size()[2:4]

            with timer.env('Network Extra'):
                preds = net(images,
                            img_meta=images_meta,
                            ref_x=ref_images,
                            ref_imgs_meta=ref_images_meta)

            # Perform the meat of the operation here depending on our mode.
            if it == dataset_size - 1:
                batch_size = len(dataset) % args.batch_size
            else:
                batch_size = images.size(0)

            for batch_id in range(batch_size):
                if args.display:
                    img_id = (images_meta[batch_id]['video_id'],
                              images_meta[batch_id]['frame_id'])
                    if not cfg.display_mask_single:
                        img_numpy = prep_display(
                            preds[batch_id],
                            images[batch_id],
                            pad_h,
                            pad_w,
                            img_meta=images_meta[batch_id],
                            img_ids=img_id)
                    else:
                        for p in range(
                                preds[batch_id]['detection']['box'].size(0)):
                            preds_single = {'detection': {}}
                            for k in preds[batch_id]['detection']:
                                if preds[batch_id]['detection'][
                                        k] is not None and k not in {'proto'}:
                                    preds_single['detection'][k] = preds[
                                        batch_id]['detection'][k][p]
                                else:
                                    preds_single['detection'][k] = None
                            preds_single['net'] = preds[batch_id]['net']
                            preds_single['detection'][
                                'box_ids'] = torch.tensor(-1)

                            img_numpy = prep_display(
                                preds_single,
                                images[batch_id],
                                pad_h,
                                pad_w,
                                img_meta=images_meta[batch_id],
                                img_ids=img_id)
                            plt.imshow(img_numpy)
                            plt.axis('off')
                            plt.savefig(''.join([
                                args.mask_det_file[:-12], 'out_single/',
                                str(img_id), '_',
                                str(p), '.png'
                            ]))
                            plt.clf()

                else:
                    cfg.preserve_aspect_ratio = True
                    preds_cur = postprocess_ytbvis(
                        preds[batch_id],
                        pad_h,
                        pad_w,
                        images_meta[batch_id],
                        score_threshold=cfg.eval_conf_thresh)
                    segm_results = bbox2result_with_id(preds_cur, cfg.classes)
                    results.append(segm_results)

                # First couple of images take longer because we're constructing the graph.
                # Since that's technically initialization, don't include those in the FPS calculations.
                if it > 1:
                    frame_times.add(timer.total_time() / batch_size)

                if args.display and not cfg.display_mask_single:
                    if it > 1:
                        print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                    plt.imshow(img_numpy)
                    plt.axis('off')
                    plt.title(str(img_id))

                    root_dir = ''.join([
                        args.mask_det_file[:-12], 'out/',
                        str(images_meta[batch_id]['video_id']), '/'
                    ])
                    if not os.path.exists(root_dir):
                        os.makedirs(root_dir)
                    plt.savefig(''.join([
                        root_dir,
                        str(images_meta[batch_id]['frame_id']), '.png'
                    ]))
                    plt.clf()
                    # plt.show()
                elif not args.no_bar:
                    if it > 1: fps = 1 / frame_times.get_avg()
                    else: fps = 0
                    progress = (it + 1) / dataset_size * 100
                    progress_bar.set_val(it + 1)
                    print(
                        '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                        % (repr(progress_bar), it + 1, dataset_size, progress,
                           fps),
                        end='')

        if not args.display and not args.benchmark:
            print()
            if args.output_json:
                print('Dumping detections...')
                results2json_videoseg(dataset, results, args.mask_det_file)

                if cfg.use_valid_sub or cfg.use_train_sub:
                    if cfg.use_valid_sub:
                        print('calculate evaluation metrics ...')
                        ann_file = cfg.valid_sub_dataset.ann_file
                    else:
                        print('calculate train_sub metrics ...')
                        ann_file = cfg.train_dataset.ann_file
                    dt_file = args.mask_det_file
                    metrics = calc_metrics(ann_file, dt_file)

                    return metrics

        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))

    except KeyboardInterrupt:
        print('Stopping...')
コード例 #14
0
ファイル: eval.py プロジェクト: Jinming-Su/STMask
def validation(net: STMask, valid_data=False, output_metrics_file=None):
    cfg.mask_proto_debug = args.mask_proto_debug
    if not valid_data:
        cfg.valid_sub_dataset.test_mode = True
        dataset = get_dataset(cfg.valid_sub_dataset)
    else:
        cfg.valid_dataset.test_mode = True
        dataset = get_dataset(cfg.valid_dataset)

    frame_times = MovingAverage()
    dataset_size = math.ceil(len(dataset) /
                             args.batch_size) if args.max_images < 0 else min(
                                 args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()
    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  shuffle=False,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    results = []

    try:
        # Main eval loop
        for it, data_batch in enumerate(data_loader):
            timer.reset()
            with timer.env('Load Data'):
                images, images_meta, ref_images, ref_images_meta = prepare_data(
                    data_batch, is_cuda=True, train_mode=False)
                pad_h, pad_w = images.size()[2:4]

            with timer.env('Network Extra'):
                preds = net(images,
                            img_meta=images_meta,
                            ref_x=ref_images,
                            ref_imgs_meta=ref_images_meta)

                if it == dataset_size - 1:
                    batch_size = len(dataset) % args.batch_size
                else:
                    batch_size = images.size(0)

                for batch_id in range(batch_size):
                    cfg.preserve_aspect_ratio = True
                    preds_cur = postprocess_ytbvis(
                        preds[batch_id],
                        pad_h,
                        pad_w,
                        images_meta[batch_id],
                        score_threshold=cfg.eval_conf_thresh)
                    segm_results = bbox2result_with_id(preds_cur, cfg.classes)
                    results.append(segm_results)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                if batch_size == 0:
                    batch_size = 1
                frame_times.add(timer.total_time() / batch_size)

            if it > 1 and frame_times.get_avg() > 0:
                fps = 1 / frame_times.get_avg()
            else:
                fps = 0
            progress = (it + 1) / dataset_size * 100
            progress_bar.set_val(it + 1)
            print(
                '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                % (repr(progress_bar), it + 1, dataset_size, progress, fps),
                end='')

        print()
        print('Dumping detections...')

        if not valid_data:
            results2json_videoseg(dataset, results, args.mask_det_file)
            print('calculate evaluation metrics ...')
            ann_file = cfg.valid_sub_dataset.ann_file
            dt_file = args.mask_det_file
            calc_metrics(ann_file, dt_file, output_file=output_metrics_file)
        else:
            results2json_videoseg(dataset, results,
                                  output_metrics_file.replace('.txt', '.json'))

    except KeyboardInterrupt:
        print('Stopping...')
コード例 #15
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))
    
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()
    print('\n--- Generator created! ---')

    # NOTE
    # I maunally set the original image size and seg size as 138
    # might change in the future, for example 550
    if cfg.pred_seg:
        dis_size = 138
        dis_net  = Discriminator_Wgan(i_size = dis_size, s_size = dis_size)
        # Change the initialization inside the dis_net class inside 
        # set the dis net's initial parameter values
        # dis_net.apply(gan_init)
        dis_net.train()
        print('--- Discriminator created! ---\n')

    if args.log:
        log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()),
            overwrite=(args.resume is None), log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check    
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path)

    # optimizer_gen = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
    #                       weight_decay=args.decay)
    # if cfg.pred_seg:
    #     optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr, momentum=args.momentum,
    #                         weight_decay=args.decay)
    #     schedule_dis  = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6)

    # NOTE: Using the Ranger Optimizer for the generator
    optimizer_gen     = Ranger(net.parameters(), lr = args.lr, weight_decay=args.decay)
    # optimizer_gen = optim.RMSprop(net.parameters(), lr = args.lr)

    # FIXME: Might need to modify the lr in the optimizer carefually
    # check this
    # def make_D_optimizer(cfg, model):
    # params = []
    # for key, value in model.named_parameters():
    #     if not value.requires_grad:
    #         continue
    #     lr = cfg.SOLVER.BASE_LR/5.0
    #     weight_decay = cfg.SOLVER.WEIGHT_DECAY
    #     if "bias" in key:
    #         lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR/5.0
    #         weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
    #     params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]

    # optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
    # return optimizer

    if cfg.pred_seg:
        optimizer_dis = optim.SGD(dis_net.parameters(), lr=cfg.dis_lr)
        # optimizer_dis = optim.RMSprop(dis_net.parameters(), lr = cfg.dis_lr)
        schedule_dis  = ReduceLROnPlateau(optimizer_dis, mode = 'min', patience=6, min_lr=1E-6)

    criterion     = MultiBoxLoss(num_classes=cfg.num_classes,
                                pos_threshold=cfg.positive_iou_threshold,
                                neg_threshold=cfg.negative_iou_threshold,
                                negpos_ratio=cfg.ohem_negpos_ratio, pred_seg=cfg.pred_seg)

    # criterion_dis = nn.BCELoss()
    # Take the advice from WGAN
    criterion_dis = DiscriminatorLoss_Maskrcnn()
    criterion_gen = GeneratorLoss_Maskrcnn()


    if args.batch_alloc is not None:
        # e.g. args.batch_alloc: 24,24
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion, pred_seg=cfg.pred_seg))

    if args.cuda:
        net     = net.cuda()
        # NOTE
        if cfg.pred_seg:
            dis_net = nn.DataParallel(dis_net)
            dis_net = dis_net.cuda()
    
    # Initialize everything
    if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())

    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)
    
    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset, args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    # NOTE
    val_loader  = data.DataLoader(val_dataset, args.batch_size,
                                  num_workers=args.num_workers*2,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    
    
    save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types # Forms the print order
                      # TODO: global command can modify global variable inside of the function.
    loss_avgs  = { k: MovingAverage(100) for k in loss_types }

    # NOTE
    # Enable AMP
    amp_enable = cfg.amp
    scaler = torch.cuda.amp.GradScaler(enabled=amp_enable)

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter

            if (epoch+1)*epoch_size < iteration:
                continue
            
            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch+1)*epoch_size:
                    break
      
                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()
                
                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer_gen, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer_gen, args.lr * (args.gamma ** step_index))
                
                
                # NOTE
                if cfg.pred_seg:
                    # ====== GAN Train ======
                    # train the gen and dis in different iteration
                    # it_alter_period = iteration % (cfg.gen_iter + cfg.dis_iter)
                    # FIXME:
                    # present_time = time.time()
                    for _ in range(cfg.dis_iter):
                        # freeze_pretrain(yolact_net, freeze=False)
                        # freeze_pretrain(net, freeze=False)
                        # freeze_pretrain(dis_net, freeze=False)
                        # if it_alter_period == 0:
                        #     print('--- Generator     freeze   ---')
                        #     print('--- Discriminator training ---')

                        if cfg.amp:
                            with torch.cuda.amp.autocast():
                                # ----- Discriminator part -----
                                # seg_list  is the prediction mask
                                # can be regarded as generated images from YOLACT
                                # pred_list is the prediction label
                                # seg_list  dim: list of (138,138,instances)
                                # pred_list dim: list of (instances)
                                losses, seg_list, pred_list = net(datum)
                                seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                                
                                # input image size is [b, 3, 550, 550]
                                # downsample to       [b, 3, seg_h, seg_w]
                                image_list = [img.to(cuda0) for img in datum[0]]
                                image    = interpolate(torch.stack(image_list), size = seg_size, 
                                                            mode='bilinear',align_corners=False)

                                # Because in the discriminator training, we do not 
                                # want the gradient flow back to the generator part
                                # we detach seg_clas (mask_clas come the data, does not have grad)
                        
                                output_pred = dis_net(img = image.detach(), seg = seg_clas.detach())
                                output_grou = dis_net(img = image.detach(), seg = mask_clas.detach())

                                # p = elem_mul_p.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # g = elem_mul_g.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # image = image.squeeze().permute(1,2,0).cpu().detach().numpy()
                                # from PIL import Image
                                # seg_PIL = Image.fromarray(p, 'RGB')
                                # mask_PIL = Image.fromarray(g, 'RGB')
                                # seg_PIL.save('mul_seg.png')
                                # mask_PIL.save('mul_mask.png')
                                # raise RuntimeError

                                # from matplotlib import pyplot as plt
                                # fig, (ax1, ax2) = plt.subplots(1,2)
                                # ax1.imshow(mask_show)
                                # ax2.imshow(seg_show)
                                # plt.show(block=False)
                                # plt.pause(2)
                                # plt.close()  

                                # if iteration % (cfg.gen_iter + cfg.dis_iter) == 0:
                                #     print(f'Probability of fake is fake: {output_pred.mean().item():.2f}')
                                #     print(f'Probability of real is real: {output_grou.mean().item():.2f}')

                                # 0 for Fake/Generated
                                # 1 for True/Ground Truth
                                # fake_label = torch.zeros(b)
                                # real_label = torch.ones(b)

                                # Advice of practical implementation 
                                # from https://arxiv.org/abs/1611.08408
                                # loss_pred = -criterion_dis(output_pred,target=real_label)
                                # loss_pred = criterion_dis(output_pred,target=fake_label)
                                # loss_grou = criterion_dis(output_grou,target=real_label)
                                # loss_dis  = loss_pred + loss_grou

                                # Wasserstein Distance (Earth-Mover)
                                loss_dis = criterion_dis(input=output_grou,target=output_pred)
                            
                            # Backprop the discriminator
                            # Scales loss. Calls backward() on scaled loss to create scaled gradients.
                            scaler.scale(loss_dis).backward()
                            scaler.step(optimizer_dis)
                            scaler.update()
                            optimizer_dis.zero_grad()

                            # clip the updated parameters
                            _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()]


                            # ----- Generator part -----
                            # freeze_pretrain(yolact_net, freeze=False)
                            # freeze_pretrain(net, freeze=False)
                            # freeze_pretrain(dis_net, freeze=False)
                            # if it_alter_period == (cfg.dis_iter+1):
                            #     print('--- Generator     training ---')
                            #     print('--- Discriminator freeze   ---')

                            # FIXME:
                            # print(f'dis time pass: {time.time()-present_time:.2f}')
                            # FIXME:
                            # present_time = time.time()

                            with torch.cuda.amp.autocast():
                                losses, seg_list, pred_list = net(datum)
                                seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                                image_list = [img.to(cuda0) for img in datum[0]]
                                image      = interpolate(torch.stack(image_list), size = seg_size, 
                                                            mode='bilinear',align_corners=False)
                                # Perform forward pass of all-fake batch through D
                                # NOTE this seg_clas CANNOT detach, in order to flow the 
                                # gradient back to the generator
                                # output = dis_net(img = image, seg = seg_clas)
                                # Since the log(1-D(G(x))) not provide sufficient gradients
                                # We want log(D(G(x)) instead, this can be achieve by
                                # use the real_label as target.
                                # This step is crucial for the information of discriminator
                                # to go into the generator.
                                # Calculate G's loss based on this output
                                # real_label = torch.ones(b)
                                # loss_gen   = criterion_dis(output,target=real_label)
                            
                                # GAN MaskRCNN
                                output_pred = dis_net(img = image, seg = seg_clas)
                                output_grou = dis_net(img = image, seg = mask_clas)

                                # Advice from WGAN
                                # loss_gen = -torch.mean(output)
                                loss_gen = criterion_gen(input=output_grou,target=output_pred)

                                # since the dis is already freeze, the gradients will only
                                # record the YOLACT
                                losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                                loss = sum([losses[k] for k in losses])
                                loss += loss_gen
                            
                            # Generator backprop
                            scaler.scale(loss).backward()
                            scaler.step(optimizer_gen)
                            scaler.update()
                            optimizer_gen.zero_grad()
                            

                            # FIXME:
                            # print(f'gen time pass: {time.time()-present_time:.2f}')
                            # print('GAN part over')

                        else:
                            losses, seg_list, pred_list = net(datum)
                            seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)

                            image_list = [img.to(cuda0) for img in datum[0]]
                            image    = interpolate(torch.stack(image_list), size = seg_size, 
                                                        mode='bilinear',align_corners=False)

                            output_pred = dis_net(img = image.detach(), seg = seg_clas.detach())
                            output_grou = dis_net(img = image.detach(), seg = mask_clas.detach())
                            loss_dis = criterion_dis(input=output_grou,target=output_pred)

                            loss_dis.backward()
                            optimizer_dis.step()
                            optimizer_dis.zero_grad()
                            _ = [par.data.clamp_(-cfg.clip_value, cfg.clip_value) for par in dis_net.parameters()]
                        
                            # ----- Generator part -----
                            # FIXME:
                            # print(f'dis time pass: {time.time()-present_time:.2f}')
                            # FIXME:
                            # present_time = time.time()

                            losses, seg_list, pred_list = net(datum)
                            seg_clas, mask_clas, b, seg_size = seg_mask_clas(seg_list, pred_list, datum)
                            image_list = [img.to(cuda0) for img in datum[0]]
                            image      = interpolate(torch.stack(image_list), size = seg_size, 
                                                        mode='bilinear',align_corners=False)
                                                        
                            # GAN MaskRCNN
                            output_pred = dis_net(img = image, seg = seg_clas)
                            output_grou = dis_net(img = image, seg = mask_clas)

                            loss_gen = criterion_gen(input=output_grou,target=output_pred)

                            # since the dis is already freeze, the gradients will only
                            # record the YOLACT
                            losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                            loss = sum([losses[k] for k in losses])
                            loss += loss_gen
                            loss.backward()
                            # Do this to free up vram even if loss is not finite
                            optimizer_gen.zero_grad()
                            if torch.isfinite(loss).item():
                                # since the optimizer_gen is for YOLACT only
                                # only the gen will be updated
                                optimizer_gen.step()       

                            # FIXME:
                            # print(f'gen time pass: {time.time()-present_time:.2f}')
                            # print('GAN part over')
                else:
                    # ====== Normal YOLACT Train ======
                    # Zero the grad to get ready to compute gradients
                    optimizer_gen.zero_grad()
                    # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                    losses = net(datum)
                    losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])
                    # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                    # all_loss = sum([v.mean() for v in losses.values()])

                    # Backprop
                    loss.backward() # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer_gen.step()                    
                
                # Add the loss to the moving average for bookkeeping
                _ = [loss_avgs[k].add(losses[k].item()) for k in losses]
                # for k in losses:
                #     loss_avgs[k].add(losses[k].item())

                cur_time  = time.time()
                elapsed   = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0]
                    
                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], [])
                    if cfg.pred_seg:
                        print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f')
                                % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True)
                        # print(f'Generator loss: {loss_gen:.2f} | Discriminator loss: {loss_dis:.2f}')
                    # Loss Key:
                    #  - B: Box Localization Loss
                    #  - C: Class Confidence Loss
                    #  - M: Mask Loss
                    #  - P: Prototype Loss
                    #  - D: Coefficient Diversity Loss
                    #  - E: Class Existence Loss
                    #  - S: Semantic Segmentation Loss
                    #  - T: Total loss

                if args.log:
                    precision = 5
                    loss_info = {k: round(losses[k].item(), precision) for k in losses}
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow
                        
                    log.log('train', loss=loss_info, epoch=epoch, iter=iteration,
                        lr=round(cur_lr, 10), elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu
                
                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder, cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)
            
            # This is done per epoch
            if args.validation_epoch > 0:
                # NOTE: Validation loss
                # if cfg.pred_seg:
                #     net.eval()
                #     dis_net.eval()
                #     cfg.gan_eval = True
                #     with torch.no_grad():
                #         for datum in tqdm(val_loader, desc='GAN Validation'):
                #             losses, seg_list, pred_list = net(datum)
                #             losses, seg_list, pred_list = net(datum)
                #             # TODO: warp below as a function
                #             seg_list = [v.permute(2,1,0).contiguous() for v in seg_list]
                #             b = len(seg_list) # batch size
                #             _, seg_h, seg_w = seg_list[0].size()

                #             seg_clas    = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w)
                #             mask_clas   = torch.zeros(b, cfg.num_classes-1, seg_h, seg_w)
                #             target_list = [target for target in datum[1][0]]
                #             mask_list   = [interpolate(mask.unsqueeze(0), size = (seg_h,seg_w),mode='bilinear', \
                #                             align_corners=False).squeeze() for mask in datum[1][1]]

                #             for idx in range(b):
                #                 for i, (pred, i_target) in enumerate(zip(pred_list[idx], target_list[idx])):
                #                     seg_clas[idx, pred, ...]                 += seg_list[idx][i,...]
                #                     mask_clas[idx, i_target[-1].long(), ...] += mask_list[idx][i,...]
                               
                #             seg_clas = torch.clamp(seg_clas, 0, 1)
                #             image    = interpolate(torch.stack(datum[0]), size = (seg_h,seg_w), 
                #                                         mode='bilinear',align_corners=False)
                #             real_label  = torch.ones(b)
                #             output_pred = dis_net(img = image, seg = seg_clas)
                #             output_grou = dis_net(img = image, seg = mask_clas)
                #             loss_pred   = -criterion_dis(output_pred,target=real_label)
                #             loss_grou   =  criterion_dis(output_grou,target=real_label)
                #             loss_dis    = loss_pred + loss_grou
                #         losses = { k: (v).mean() for k,v in losses.items() }
                #         loss = sum([losses[k] for k in losses])
                #         val_loss = loss - cfg.lambda_dis*loss_dis
                #         schedule_dis.step(loss_dis)
                #         lr = [group['lr'] for group in optimizer_dis.param_groups]
                #         print(f'Discriminator lr: {lr[0]}')
                #     net.train()
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    cfg.gan_eval = False
                    dis_net.eval()
                    compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)
        
        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')
            
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)
            
            yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
コード例 #16
0
def evalvideo(net:Yolact, path:str):
    # If the path is a digit, parse it as a webcam index
    if path.isdigit():
        vid = cv2.VideoCapture(int(path))
    else:
        vid = cv2.VideoCapture(path)
    
    if not vid.isOpened():
        print('Could not open video "%s"' % path)
        exit(-1)
    
    net = CustomDataParallel(net).cuda()
    transform = torch.nn.DataParallel(FastBaseTransform()).cuda()
    frame_times = MovingAverage()
    fps = 0
    # The 0.8 is to account for the overhead of time.sleep
    frame_time_target = 0.8 / vid.get(cv2.CAP_PROP_FPS)

    def cleanup_and_exit():
        print()
        pool.terminate()
        vid.release()
        cv2.destroyAllWindows()
        exit()

    def get_next_frame(vid):
        return [vid.read()[1] for _ in range(args.video_multiframe)]

    def transform_frame(frames):
        with torch.no_grad():
            frames = [torch.from_numpy(frame).cuda().float() for frame in frames]
            return frames, transform(torch.stack(frames, 0))

    def eval_network(inp):
        with torch.no_grad():
            frames, imgs = inp
            return frames, net(imgs)

    def prep_frame(inp):
        with torch.no_grad():
            frame, preds = inp
            return prep_display(preds, frame, None, None, undo_transform=False, class_color=True)

    extract_frame = lambda x, i: (x[0][i] if x[1][i] is None else x[0][i].to(x[1][i]['box'].device), [x[1][i]])

    # Prime the network on the first frame because I do some thread unsafe things otherwise
    print('Initializing model... ', end='')
    eval_network(transform_frame(get_next_frame(vid)))
    print('Done.')

    # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
    sequence = [prep_frame, eval_network, transform_frame]
    pool = ThreadPool(processes=len(sequence) + args.video_multiframe)

    active_frames = []

    print()
    while vid.isOpened():
        start_time = time.time()

        # Start loading the next frames from the disk
        next_frames = pool.apply_async(get_next_frame, args=(vid,))
        
        # For each frame in our active processing queue, dispatch a job
        # for that frame using the current function in the sequence
        for frame in active_frames:
            frame['value'] = pool.apply_async(sequence[frame['idx']], args=(frame['value'],))
        
        # For each frame whose job was the last in the sequence (i.e. for all final outputs)
        for frame in active_frames:
            if frame['idx'] == 0:
                # Wait here so that the frame has time to process and so that the video plays at the proper speed
                time.sleep(frame_time_target)

                cv2.imshow(path, frame['value'].get())
                if cv2.waitKey(1) == 27: # Press Escape to close
                    cleanup_and_exit()

        # Remove the finished frames from the processing queue
        active_frames = [x for x in active_frames if x['idx'] > 0]

        # Finish evaluating every frame in the processing queue and advanced their position in the sequence
        for frame in list(reversed(active_frames)):
            frame['value'] = frame['value'].get()
            frame['idx'] -= 1

            if frame['idx'] == 0:
                # Split this up into individual threads for prep_frame since it doesn't support batch size
                active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, args.video_multiframe)]
                frame['value'] = extract_frame(frame['value'], 0)

        
        # Finish loading in the next frames and add them to the processing queue
        active_frames.append({'value': next_frames.get(), 'idx': len(sequence)-1})
        
        # Compute FPS
        frame_times.add(time.time() - start_time)
        fps = args.video_multiframe / frame_times.get_avg()

        print('\rAvg FPS: %.2f     ' % fps, end='')
    
    cleanup_and_exit()
コード例 #17
0
    def evalvideo(self, net: Yolact, path: str):
        # If the path is a digit, parse it as a webcam index
        is_webcam = path.isdigit()

        if is_webcam:
            vid = cv2.VideoCapture(int(path))
        else:
            vid = cv2.VideoCapture(path)

        if not vid.isOpened():
            print('Could not open video "%s"' % path)
            exit(-1)

        net = CustomDataParallel(net).cuda()
        transform = torch.nn.DataParallel(FastBaseTransform()).cuda()
        frame_times = MovingAverage(100)
        fps = 0
        # The 0.8 is to account for the overhead of time.sleep
        frame_time_target = 1 / vid.get(cv2.CAP_PROP_FPS)
        running = True

        def cleanup_and_exit():
            print()
            pool.terminate()
            vid.release()
            cv2.destroyAllWindows()
            exit()

        def get_next_frame(vid):
            return [vid.read()[1] for _ in range(args.video_multiframe)]

        def transform_frame(frames):
            with torch.no_grad():
                frames = [
                    torch.from_numpy(frame).cuda().float() for frame in frames
                ]
                return frames, transform(torch.stack(frames, 0))

        def eval_network(inp):
            with torch.no_grad():
                frames, imgs = inp
                return frames, net(imgs)

        def prep_frame(inp):
            with torch.no_grad():
                frame, preds = inp
                return self.prep_display(preds,
                                         frame,
                                         None,
                                         None,
                                         undo_transform=False,
                                         class_color=True)

        frame_buffer = Queue()
        video_fps = 0

        # All this timing code to make sure that
        def play_video():
            nonlocal frame_buffer, running, video_fps, is_webcam

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    cv2.imshow(path, frame_buffer.get())
                    last_time = next_time

                #self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame_buffer.get(), "bgr8"))

                if cv2.waitKey(1) == 27:  # Press Escape to close
                    running = False

                buffer_size = frame_buffer.qsize()
                if buffer_size < args.video_multiframe:
                    frame_time_stabilizer += stabilizer_step
                elif buffer_size > args.video_multiframe:
                    frame_time_stabilizer -= stabilizer_step
                    if frame_time_stabilizer < 0:
                        frame_time_stabilizer = 0

                new_target = frame_time_stabilizer if is_webcam else max(
                    frame_time_stabilizer, frame_time_target)

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                # This gives more accurate timing than if sleeping the whole amount at once
                while time.time() < target_time:
                    time.sleep(0.001)

        extract_frame = lambda x, i: (x[0][i] if x[1][i][
            'detection'] is None else x[0][i].to(x[1][i]['detection']['box'].
                                                 device), [x[1][i]])

        # Prime the network on the first frame because I do some thread unsafe things otherwise
        print('Initializing model... ', end='')
        eval_network(transform_frame(get_next_frame(vid)))
        print('Done.')

        # For each frame the sequence of functions it needs to go through to be processed (in reversed order)
        sequence = [prep_frame, eval_network, transform_frame]
        pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
        pool.apply_async(play_video)

        active_frames = []

        print()

        while vid.isOpened() and running:
            start_time = time.time()

            # Start loading the next frames from the disk
            next_frames = pool.apply_async(get_next_frame, args=(vid, ))

            # For each frame in our active processing queue, dispatch a job
            # for that frame using the current function in the sequence
            for frame in active_frames:
                frame['value'] = pool.apply_async(sequence[frame['idx']],
                                                  args=(frame['value'], ))

            # For each frame whose job was the last in the sequence (i.e. for all final outputs)
            for frame in active_frames:
                if frame['idx'] == 0:
                    frame_buffer.put(frame['value'].get())

            # Remove the finished frames from the processing queue
            active_frames = [x for x in active_frames if x['idx'] > 0]

            # Finish evaluating every frame in the processing queue and advanced their position in the sequence
            for frame in list(reversed(active_frames)):
                frame['value'] = frame['value'].get()
                frame['idx'] -= 1

                if frame['idx'] == 0:
                    # Split this up into individual threads for prep_frame since it doesn't support batch size
                    active_frames += [{
                        'value': extract_frame(frame['value'], i),
                        'idx': 0
                    } for i in range(1, args.video_multiframe)]
                    # active_frames += [{'value': extract_frame(frame['value'], i), 'idx': 0} for i in range(1, len(frame['value'][0]))]
                    frame['value'] = extract_frame(frame['value'], 0)

            # Finish loading in the next frames and add them to the processing queue
            active_frames.append({
                'value': next_frames.get(),
                'idx': len(sequence) - 1
            })

            # Compute FPS
            frame_times.add(time.time() - start_time)
            fps = args.video_multiframe / frame_times.get_avg()

            print(
                '\rProcessing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d    '
                % (fps, video_fps, frame_buffer.qsize()),
                end='')

        cleanup_and_exit()
コード例 #18
0
ファイル: train.py プロジェクト: weitaoatvison/Yolact_minimal
    criterion = nn.DataParallel(criterion).cuda()

dataset = COCODetection(image_path=cfg.dataset.train_images,
                        info_file=cfg.dataset.train_info,
                        augmentation=SSDAugmentation())

data_loader = data.DataLoader(dataset,
                              cfg.batch_size,
                              num_workers=8,
                              shuffle=True,
                              collate_fn=detection_collate,
                              pin_memory=True)

step_index = 0
start_step = resume_step if args.resume else 0
batch_time = MovingAverage()
loss_types = ['B', 'C', 'M', 'S']
loss_avgs = {k: MovingAverage() for k in loss_types}
map_tables = []
training = True
step = start_step
writer = SummaryWriter('tensorboard_log')
try:  # Use try-except to use ctrl+c to stop and save early.
    while training:
        for i, datum in enumerate(data_loader):
            if cfg.warmup_until > 0 and step <= cfg.warmup_until:  # Warm up learning rate.
                set_lr(optimizer, (cfg.lr - cfg.warmup_init) *
                       (step / cfg.warmup_until) + cfg.warmup_init)

            # Adjust the learning rate according to the current step.
            while step_index < len(
コード例 #19
0
ファイル: eval.py プロジェクト: zengtiwei/CIoU
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(':')
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ':' in args.video:
            inp, out = args.video.split(':')
            evalvideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = len(dataset) if args.max_images < 0 else min(args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box' : [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds]
        }
        detections = Detections()
    else:
        timer.disable('Load Data')
コード例 #20
0
    # GPU
    net = net.cuda()
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    x = torch.zeros((1, 3, cfg.max_size, cfg.max_size))
    y = net(x)

    for p in net.prediction_layers:
        print(p.last_conv_size)

    print()
    for k, a in y.items():
        print(k + ': ', a.size(), torch.sum(a))
    exit()
    
    net(x)
    # timer.disable('pass2')
    avg = MovingAverage()
    try:
        while True:
            timer.reset()
            with timer.env('everything else'):
                net(x)
            avg.add(timer.total_time())
            print('\033[2J') # Moves console cursor to 0,0
            timer.print_stats()
            print('Avg fps: %.2f\tAvg ms: %.2f         ' % (1/avg.get_avg(), avg.get_avg()*1000))
    except KeyboardInterrupt:
        pass
コード例 #21
0
ファイル: train_ex.py プロジェクト: ethan-jiang-1/yolact
def train(args, dataset, val_dataset, data_loader, yolact_net, netloss, optimizer, log):
    # loss counters
    #loc_loss = 0
    #conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()
    
    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    time_avg = MovingAverage()

    #global loss_types # Forms the print order
    loss_avgs  = get_default_log_avgs()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    print('Begin training!')
    print("num_epochs", num_epochs)
 
    for epoch in range(num_epochs):
        # Resume from start_iter
        if (epoch+1)*epoch_size < iteration:
            continue
        
        for datum in data_loader:
            # Stop if we've reached an epoch if we're resuming from start_iter
            if iteration == (epoch+1)*epoch_size:
                break

            # Stop at the configured number of iterations even if mid-epoch
            if iteration == cfg.max_iter:
                break

            update_cfg_lr(iteration, optimizer, loss_avgs, args)

            # Warm up by linearly interpolating the learning rate from some smaller value
            if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init)

            # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
            while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]:
                step_index += 1
                set_lr(optimizer, args.lr * (args.gamma ** step_index))
            
            # Zero the grad to get ready to compute gradients
            optimizer.zero_grad()

            # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
            losses = netloss(datum)
            
            losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel
            loss = sum([losses[k] for k in losses])
            
            # no_inf_mean removes some components from the loss, so make sure to backward through all of it
            # all_loss = sum([v.mean() for v in losses.values()])

            # Backprop
            loss.backward() # Do this to free up vram even if loss is not finite
            if torch.isfinite(loss).item():
                optimizer.step()
            
            # Add the loss to the moving average for bookkeeping
            for k in losses:
                loss_avgs[k].add(losses[k].item())

            cur_time  = time.time()
            elapsed   = cur_time - last_time
            last_time = cur_time

            # Exclude graph setup from the timing information
            if iteration != args.start_iter:
                time_avg.add(elapsed)

            if iteration % 10 == 0:
                prompt_progress(epoch, iteration, elapsed, time_avg, loss_avgs, losses)

            if args.log:
                log_iteration(log, losses, loss, iteration, epoch, elapsed, args)
            
            iteration += 1

            if iteration % args.save_interval == 0 and iteration != args.start_iter:
                save_yolact_net(yolact_net, args, iteration, epoch)
        
        # This is done per epoch
        if args.validation_epoch > 0:
            if epoch % args.validation_epoch == 0 and epoch > 0:
                compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)

        if sig_num is not None:
            print("#r# break traning loop due to sig_num", sig_num)
            break
    
    # Compute validation mAP after training is finished
    compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None)

    saved_pathname = None
    if sig_num is not None:
        saved_pathname = save_yolact_net(yolact_net, args, iteration, epoch, mode="sig_num")
    else:
        saved_pathname = save_yolact_net(yolact_net, args, iteration, epoch, mode="finial")

    return saved_pathname 
コード例 #22
0
class yolact_prediction(object):
    def __init__(self):

        parser = argparse.ArgumentParser(description='YOLACT Predict in ROS')
        parser.add_argument(
            '--visual_top_k',
            default=100,
            type=int,
            help='Further restrict the number of predictions to parse')
        parser.add_argument('--traditional_nms',
                            default=False,
                            action='store_true',
                            help='Whether to use traditional nms.')
        parser.add_argument('--hide_mask',
                            default=False,
                            action='store_true',
                            help='Whether to display masks')
        parser.add_argument('--hide_bbox',
                            default=True,
                            action='store_true',
                            help='Whether to display bboxes')
        parser.add_argument('--hide_score',
                            default=True,
                            action='store_true',
                            help='Whether to display scores')
        parser.add_argument(
            '--show_lincomb',
            default=False,
            action='store_true',
            help='Whether to show the generating process of masks.')
        parser.add_argument(
            '--no_crop',
            default=False,
            action='store_true',
            help='Do not crop output masks with the predicted bounding box.')
        parser.add_argument('--real_time',
                            default=True,
                            action='store_true',
                            help='Show the detection results real-timely.')
        parser.add_argument(
            '--visual_thre',
            default=0.3,
            type=float,
            help='Detections with a score under this threshold will be removed.'
        )
        self.args = parser.parse_args()

        r = rospkg.RosPack()
        self.bridge = CvBridge()

        self.path = r.get_path('yolact_prediction')
        model_name = "/src/weights/best_89.48_res101_custom_610000.pth"
        strs = model_name.split('_')
        config = strs[-3] + "_" + strs[-2] + "_config"
        update_config(config)
        print("Using " + config + " according to the trained_model.")

        with torch.no_grad():

            self.cuda = torch.cuda.is_available()
            if self.cuda:
                cudnn.benchmark = True
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            self.net = Yolact()
            self.net.load_weights(self.path + model_name, self.cuda)
            print('Model loaded.')

            if self.cuda:
                self.net = self.net.cuda()

            self.time_here = 0
            self.frame_times = MovingAverage()

            #### Publisher
            self.rgb_pub = rospy.Publisher("Yolact_predict_img/",
                                           Image,
                                           queue_size=1)

            image_sub = rospy.Subscriber("/camera/color/image_raw",
                                         Image,
                                         self.img_cb,
                                         queue_size=1)

            print("============ Ready ============")

    def img_cb(self, rgb_data):

        self.rgb_data = rgb_data

        if self.rgb_data is not None:
            cv_image = self.bridge.imgmsg_to_cv2(self.rgb_data, "bgr8")

            predict_img = self.predict(cv_image)

            self.rgb_pub.publish(self.bridge.cv2_to_imgmsg(
                predict_img, "bgr8"))

            self.rgb_data = None

    def predict(self, img):

        rgb_origin = img
        img_numpy = img

        img = torch.from_numpy(img.copy()).float()
        img = img.cuda()

        img_h, img_w = img.shape[0], img.shape[1]
        img_trans = FastBaseTransform()(img.unsqueeze(0))

        net_outs = self.net(img_trans)
        nms_outs = NMS(net_outs, 0)

        results = after_nms(nms_outs,
                            img_h,
                            img_w,
                            crop_masks=not self.args.no_crop,
                            visual_thre=self.args.visual_thre)
        torch.cuda.synchronize()

        temp = self.time_here
        self.time_here = time.time()

        self.frame_times.add(self.time_here - temp)
        fps = 1 / self.frame_times.get_avg()

        frame_numpy = draw_img(results,
                               img,
                               self.args,
                               class_color=True,
                               fps=fps)

        return frame_numpy

    def onShutdown(self):
        rospy.loginfo("Shutdown.")
        torch.cuda.empty_cache()
コード例 #23
0
def evalvideo(net: Sewer, path: str, out_path: str = None):
    is_webcam = path.isdigit()

    cudnn.benchmark = True

    if is_webcam:
        vid = cv2.VideoCapture(int(path))
    else:
        vid = cv2.VideoCapture(path)

    if not vid.isOpened():
        print('Could not open video "%s"' % path)
        exit(-1)

    target_fps = round(vid.get(cv2.CAP_PROP_FPS))
    frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

    if is_webcam:
        num_frames = float('inf')
    else:
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    net = CustomDataParallel(net).cuda()
    transform = torch.nn.DataParallel(FastBaseTransform()).cuda()
    frame_times = MovingAverage(100)
    fps = 0
    frame_time_target = 1 / target_fps
    running = True
    fps_str = ''
    vid_done = False
    frames_displayed = 0
    out_images_path = path.split('.')[0]

    base_name = os.path.splitext(os.path.basename(path))[0]
    createFolder(out_images_path)
    ori_folder = os.path.join(out_images_path + '\#ori')
    res_folder = os.path.join(out_images_path + '\#res')

    createFolder(ori_folder)
    createFolder(res_folder)

    if out_path is not os.path.isdir(out_path):
        out = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'avc1'),
                              target_fps, (frame_width, frame_height))

    def cleanup_and_exit():
        print()
        pool.terminate()
        vid.release()
        if out_path is not None:
            out.release()
        cv2.destroyAllWindows()
        #exit()

    def get_next_frame(vid):
        frames = []
        for idx in range(args.video_multiframe):
            frame = vid.read()[1]
            if frame is None:
                return frames
            frames.append(frame)
        return frames

    def transform_frame(frames):
        with torch.no_grad():
            frames = [
                torch.from_numpy(frame).cuda().float() for frame in frames
            ]
            return frames, transform(torch.stack(frames, 0))

    def eval_network(inp):
        with torch.no_grad():
            frames, imgs = inp
            num_extra = 0
            while imgs.size(0) < args.video_multiframe:
                imgs = torch.cat([imgs, imgs[0].unsqueeze(0)], dim=0)
                num_extra += 1
            out = net(imgs)
            if num_extra > 0:
                out = out[:-num_extra]
            return frames, out

    def prep_frame(inp, fps_str, save_folder=None):
        with torch.no_grad():
            frame, preds = inp
            return prep_display_for_video(preds,
                                          frame,
                                          save_folder=save_folder,
                                          undo_transform=False,
                                          class_color=True,
                                          fps_str=fps_str,
                                          override_args=args)

    frame_buffer = Queue()
    video_fps = 0

    def play_video():
        try:
            nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005
            progress_bar = ProgressBar(30, num_frames)

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    if out_path is None or os.path.isdir(out_path):
                        cv2.imshow(path, frame_buffer.get()[0])
                    else:
                        out.write(frame_buffer.get()[0])
                    frames_displayed += 1
                    last_time = next_time

                    if out_path is not None and not os.path.isdir(out_path):
                        if video_frame_times.get_avg() == 0:
                            fps = 0
                        else:
                            fps = 1 / video_frame_times.get_avg()
                        progress = frames_displayed / num_frames * 100
                        progress_bar.set_val(frames_displayed)
                        print(
                            '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                            % (repr(progress_bar), frames_displayed,
                               num_frames, progress, fps),
                            end='')

                if (out_path is None
                        or os.path.isdir(out_path)) and cv2.waitKey(1) == 27:
                    running = False

                if not vid_done:
                    buffer_size = frame_buffer.qsize()
                    if buffer_size < args.video_multiframe:
                        frame_time_stabilizer += stabilizer_step
                    elif buffer_size > args.video_multiframe:
                        frame_time_stabilizer -= stabilizer_step
                        if frame_time_stabilizer < 0:
                            frame_time_stabilizer = 0

                    new_target = frame_time_stabilizer if is_webcam else max(
                        frame_time_stabilizer, frame_time_target)
                else:
                    new_target = frame_time_target

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001

                if out_path is None or os.path.isdir(
                        out_path) or args.emulate_playback:
                    while time.time() < target_time:
                        time.sleep(0.001)
                else:
                    time.sleep(0.001)
        except:
            import traceback
            traceback.print_exc()

    extract_frame = lambda x, i: (x[0][i] if x[1][i]['detection'] is None else
                                  x[0][i].to(x[1][i]['detection']['box'].device
                                             ), [x[1][i]])

    print('Initializing model... ', end='')
    first_batch = eval_network(transform_frame(get_next_frame(vid)))
    print('Done.')

    sequence = [prep_frame, eval_network, transform_frame]
    pool = ThreadPool(processes=len(sequence) + args.video_multiframe + 2)
    pool.apply_async(play_video)
    active_frames = [{
        'value': extract_frame(first_batch, i),
        'idx': 0
    } for i in range(len(first_batch[0]))]

    print()
    if out_path is None or os.path.isdir(out_path):
        print('Press Escape to close.')
    try:
        while vid.isOpened() and running:
            while frame_buffer.qsize() > 100:
                time.sleep(0.001)

            start_time = time.time()

            if not vid_done:
                next_frames = pool.apply_async(get_next_frame, args=(vid, ))
            else:
                next_frames = None

            if not (vid_done and len(active_frames) == 0):
                for frame in active_frames:
                    _args = [frame['value']]
                    if frame['idx'] == 0:
                        _args.append(fps_str)
                        if out_path is not None:
                            _args.append([
                                out_images_path, base_name, ori_folder,
                                res_folder, frames_displayed
                            ])
                    frame['value'] = pool.apply_async(sequence[frame['idx']],
                                                      args=_args)

                for frame in active_frames:
                    if frame['idx'] == 0:
                        frame_buffer.put(frame['value'].get())

                active_frames = [x for x in active_frames if x['idx'] > 0]

                for frame in list(reversed(active_frames)):
                    frame['value'] = frame['value'].get()
                    frame['idx'] -= 1

                    if frame['idx'] == 0:
                        active_frames += [{
                            'value':
                            extract_frame(frame['value'], i),
                            'idx':
                            0
                        } for i in range(1, len(frame['value'][0]))]
                        frame['value'] = extract_frame(frame['value'], 0)

                if next_frames is not None:
                    frames = next_frames.get()
                    if len(frames) == 0:
                        vid_done = True
                    else:
                        active_frames.append({
                            'value': frames,
                            'idx': len(sequence) - 1
                        })

                frame_times.add(time.time() - start_time)
                if frame_times.get_avg() != 0:
                    fps = args.video_multiframe / frame_times.get_avg()
            else:
                fps = 0
                running = False

            fps_str = 'Processing FPS: %.2f | Video Playback FPS: %.2f | Frames in Buffer: %d' % (
                fps, video_fps, frame_buffer.qsize())
            if not args.display_fps and os.path.isdir(out_path):
                print('\r' + fps_str + '    ', end='')

    except KeyboardInterrupt:
        print('\nStopping...')
        if os.path.isdir(out_images_path):
            file_csv(os.path.join(out_images_path, base_name + '.csv'),
                     result_list)

    if os.path.isdir(out_images_path):
        file_csv(os.path.join(out_images_path, base_name + '.csv'),
                 result_list)

    cleanup_and_exit()
コード例 #24
0
    def __init__(self):

        parser = argparse.ArgumentParser(description='YOLACT Predict in ROS')
        parser.add_argument(
            '--visual_top_k',
            default=100,
            type=int,
            help='Further restrict the number of predictions to parse')
        parser.add_argument('--traditional_nms',
                            default=False,
                            action='store_true',
                            help='Whether to use traditional nms.')
        parser.add_argument('--hide_mask',
                            default=False,
                            action='store_true',
                            help='Whether to display masks')
        parser.add_argument('--hide_bbox',
                            default=True,
                            action='store_true',
                            help='Whether to display bboxes')
        parser.add_argument('--hide_score',
                            default=True,
                            action='store_true',
                            help='Whether to display scores')
        parser.add_argument(
            '--show_lincomb',
            default=False,
            action='store_true',
            help='Whether to show the generating process of masks.')
        parser.add_argument(
            '--no_crop',
            default=False,
            action='store_true',
            help='Do not crop output masks with the predicted bounding box.')
        parser.add_argument('--real_time',
                            default=True,
                            action='store_true',
                            help='Show the detection results real-timely.')
        parser.add_argument(
            '--visual_thre',
            default=0.3,
            type=float,
            help='Detections with a score under this threshold will be removed.'
        )
        self.args = parser.parse_args()

        r = rospkg.RosPack()
        self.bridge = CvBridge()

        self.path = r.get_path('yolact_prediction')
        model_name = "/src/weights/best_89.48_res101_custom_610000.pth"
        strs = model_name.split('_')
        config = strs[-3] + "_" + strs[-2] + "_config"
        update_config(config)
        print("Using " + config + " according to the trained_model.")

        with torch.no_grad():

            self.cuda = torch.cuda.is_available()
            if self.cuda:
                cudnn.benchmark = True
                cudnn.fastest = True
                torch.set_default_tensor_type('torch.cuda.FloatTensor')
            else:
                torch.set_default_tensor_type('torch.FloatTensor')

            self.net = Yolact()
            self.net.load_weights(self.path + model_name, self.cuda)
            print('Model loaded.')

            if self.cuda:
                self.net = self.net.cuda()

            self.time_here = 0
            self.frame_times = MovingAverage()

            #### Publisher
            self.rgb_pub = rospy.Publisher("Yolact_predict_img/",
                                           Image,
                                           queue_size=1)

            image_sub = rospy.Subscriber("/camera/color/image_raw",
                                         Image,
                                         self.img_cb,
                                         queue_size=1)

            print("============ Ready ============")
コード例 #25
0
ファイル: yolact.py プロジェクト: michfelip/yolact_yx
    net = net
    # cudnn.benchmark = True
    torch.set_default_tensor_type('torch.FloatTensor')

    x = torch.zeros((1, 3, cfg.max_size, cfg.max_size))
    y = net(x)

    for p in net.prediction_layers:
        print(p.last_conv_size)

    print()
    for k, a in y.items():
        print(k + ': ', a.size(), torch.sum(a))
    exit()

    net(x)
    # timer.disable('pass2')
    avg = MovingAverage()
    try:
        while True:
            timer.reset()
            with timer.env('everything else'):
                net(x)
            avg.add(timer.total_time())
            print('\033[2J')  # Moves console cursor to 0,0
            timer.print_stats()
            print('Avg fps: %.2f\tAvg ms: %.2f         ' %
                  (1 / avg.get_avg(), avg.get_avg() * 1000))
    except KeyboardInterrupt:
        pass
コード例 #26
0
ファイル: train.py プロジェクト: ChanWoo25/namu-yolact
def train():
    #1: train 결과를 저장할 폴더를 생성
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    #2: MSCOCO에서 제공하는 API를 통해 train dataset을 준비한다.
    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))

    #   만약 train-validation기법을 사용한다면, eval dataset도 준비한다.
    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    #3: 구현한 yolact() class의 객체를 만들고 train모드로 설정.
    #주의 : net과 yolact_net은 메모리에 저장된 같은 객체를 공유한다.
    #       다만 net은 이후에 yolact와 MultiBoxLoss가 결함되어 train을 위한
    #       통합된 객체로 다시 정의되기 때문에 yolact넷 객체에만 따로 접근하기 위해
    #       yolact_net을 deep copy본으로 가지고 있는다.
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    #######################################################################
    #######RESUME 관련#####################################################
    #4: args.log와 args.resume은 train도중 log를 남기는 것과, train이
    # 불가피하게 중도에 정지되었을 경우, 중단 지점부터 재시작할 수 있도록
    # 기능을 만든 것이므로 필요한 경우에만 더 자세히 보도록 하자.
    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)
    #######END#############################################################
    #######################################################################

    #5: yolact의 optimizer와 loss함수를 설정한다.
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    #6: 멀티 GPU를 사용하는 경우 각 GPU에 batch size를 분할해준다.
    #   만약 총 Batch size가 맞지 않으면 뭔가 잘못된 것이므로 프로그램 종료.

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    #7: 현재까지 설정된 net과 loss 함수를 엮어 더 통합된 net으로 만듬.
    #   이제 net을 호출하면, bbox를 detection하고, fast nms를 거쳐 한 번
    #   필터링을 한 후, ground truth와 비교하여 loss를 계산하고, 이 과정을
    #   멀티 GPU일 경우 알아서 각 device에 작업을 분할해준다.
    #   yolact_net은 net에 포함된 yolact()만을 가리킨다.
    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    #8: yolact_net의 batch_normalization layer를 모두 false로 만든 뒤에
    #   0만을 가지고 있는 zero_tensor를 모델에 통과시켜, 파라미터를 초기화시켜준다.
    #   그 후에 다시 batch_normalization layer를 train모드로 바꿔준다.
    #   굳이 이런 과정을 거치는 이유는 저자가 batch_normalization에 미리 넣어놓은
    #  평균/분산 값은 초기화하고 싶지 않기 때문이다.
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    (torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    #9: loss counters
    #   bbox의 위치에 대한 loss와, class confidence에 대한 loss 를 담을 변수를 생성하고,
    #   batch_size와 dataset의 크기에 맞는 1 epoch의 size와 몇 epoch를 돌려야하는지 구한다.
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)  #cw : 음수입력을 허용치 않기 위해... GOOD
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    #10:Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    #   step_index는 learning rate decay를 위해 사용하는 index이다.
    #   data_loader는 train중에 순서대로 데이터셋을 준비해서 넘겨주는 class이다.
    #   여기서 객체를 만들어 저장한다.
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    #11:특정 epoch와 iteration에 도달했을 때, 중간 과정을 save_path에 저장하기 위한
    #  람다 함수를 정의하고, time_avg와 loss_avg는 MovingAverage 클래스의 객체로써
    #  훈련 중간 과정의 loss를 이동평균 값으로 보여주기 위해 선언되는 객체이다.
    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    #12: main train이 시작되는 부분(#A ~ #F)
    print('Begin training!')
    print()

    # A
    #    try-except를 사용하여 ctrl+c(keyboardInterrupt)를 통해
    #   훈련을 중단하고 진행내용은 저장할 수 있다.
    #   중단지점부터 재시작하고 싶으면 train.py실행 시 --resume인자를 사용한다.
    try:
        #9에서 계산된 num_epochs만큼 반복.
        for epoch in range(num_epochs):
            # B
            #   --resume을 이용해 시작했다면, 재시작 iter에 도달할 때까지 continue,
            #   또한 data_loader에서 data를 불러오며 loss를 계산하는데,
            #   도중에 목표 iteration에 도달했으면 break하여 1 epoch를 종료한다.
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # 목표한만큼 훈련이 되었다면, 종료한다.
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # 목표로 설정된 반복횟수가 max_iter보다 크면 max_iter에서 훈련을 마친다.
                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # 특정 iteration에 config값이 바뀌도록 할 경우의 작업을 수행한다.
                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # C
                #   [learning rate 조정]

                # train시작한지 얼마 안되었을 경우(lr_warmup_until기준) 훈련을 조금 가속시키기 위해 조정.
                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                #   특정 iteration에 도달할 때마다 learning rate decay수행.
                #   Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # D
                #   loss 함수 계산.

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                #   Forward Propagation을 수행하고 수행 결과로 loss 함수를 통해 1 iteration의 loss를 계산한다.
                #   구체적인 동작은 Backbone.py의 resnet101, yolact.py의 yolact, MultiBoxLoss.py의 MultiBoxLoss 클래스를 모두 보아야 한다.
                #   (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # E
                #   Backward Propagation을 수행하고,
                #   계산가능한 값일 경우, optimizer.step()을 통해 parameters에 적용

                # Backprop
                loss.backward()

                # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # F
                #   train진행 과정에서 소요 시간과, 중간 loss값을 출력하여 중간 성과를
                #   파악 할 수 있도록 해주는 파트.

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                #   log를 파일로 기록
                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(loss.item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu
                # ~F

                # 1번 반복하면, 1 iter증가.
                iteration += 1

                #   주기마다 진행과정을 저장하는 작업 수행.
                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # train-validation으로 작업을 수행하는 경우,
            # 1 epoch를 돌렸을 때 validation 주기에 도달한 epoch였으면 validate 1회 진행하여 mAP측정.
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)

    #13: Ctrl + c를 이용하여 훈련을 중단했을 경우, save_foler에 weights를 저장하고 중단하여
    #   다음에 다시 재시작할 수 있도록 한다.
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))
コード例 #27
0
ファイル: eval.py プロジェクト: li-xl/Yolact.jittor
def evaluate(net: Yolact, dataset, train_mode=False):
    net.detect.use_fast_nms = args.fast_nms
    net.detect.use_cross_class_nms = args.cross_class_nms
    cfg.mask_proto_debug = args.mask_proto_debug

    # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
    if args.image is not None:
        if ':' in args.image:
            inp, out = args.image.split(':')
            evalimage(net, inp, out)
        else:
            evalimage(net, args.image)
        return
    elif args.images is not None:
        inp, out = args.images.split(':')
        evalimages(net, inp, out)
        return
    elif args.video is not None:
        if ':' in args.video:
            inp, out = args.video.split(':')
            evalvideo(net, inp, out)
        else:
            evalvideo(net, args.video)
        return

    frame_times = MovingAverage()
    dataset_size = len(dataset) if args.max_images < 0 else min(
        args.max_images, len(dataset))
    progress_bar = ProgressBar(30, dataset_size)

    print()

    if not args.display and not args.benchmark:
        # For each class and iou, stores tuples (score, isPositive)
        # Index ap_data[type][iouIdx][classIdx]
        ap_data = {
            'box': [[APDataObject() for _ in cfg.dataset.class_names]
                    for _ in iou_thresholds],
            'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                     for _ in iou_thresholds]
        }
        detections = Detections()
    else:
        timer.disable('Load Data')

    dataset_indices = list(range(len(dataset)))

    if args.shuffle:
        random.shuffle(dataset_indices)
    elif not args.no_sort:
        # Do a deterministic shuffle based on the image ids
        #
        # I do this because on python 3.5 dictionary key order is *random*, while in 3.6 it's
        # the order of insertion. That means on python 3.6, the images come in the order they are in
        # in the annotations file. For some reason, the first images in the annotations file are
        # the hardest. To combat this, I use a hard-coded hash function based on the image ids
        # to shuffle the indices we use. That way, no matter what python version or how pycocotools
        # handles the data, we get the same result every time.
        hashed = [badhash(x) for x in dataset.ids]
        dataset_indices.sort(key=lambda x: hashed[x])

    # dataset_size=1000
    dataset_indices = dataset_indices[:dataset_size]

    try:
        # Main eval loop
        dataset.batch_size = 1
        dataset.num_workers = 1
        for it, batch in enumerate(dataset):
            timer.reset()
            image_idx, img, gt, gt_masks, h, w, num_crowd = batch[0]

            if not args.benchmark:
                gt = gt.numpy()
                gt_masks = gt_masks.numpy()
            batch = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
            # batch = jt.array([img])

            with timer.env('Network Extra'):
                preds = net(batch)

            if args.display:
                img_numpy = prep_display(preds, img, h, w)
            elif args.benchmark:
                prep_benchmark(preds, h, w)
            else:
                prep_metrics(ap_data, preds, img, gt, gt_masks, h, w,
                             num_crowd, dataset.ids[image_idx], detections)

            # First couple of images take longer because we're constructing the graph.
            # Since that's technically initialization, don't include those in the FPS calculations.
            if it > 1:
                frame_times.add(timer.total_time())

            if args.display:
                if it > 1:
                    print('Avg FPS: %.4f' % (1 / frame_times.get_avg()))
                plt.imshow(img_numpy)
                plt.title(str(dataset.ids[image_idx]))
                plt.show()
            elif not args.no_bar:
                if it > 1: fps = 1 / frame_times.get_avg()
                else: fps = 0
                progress = (it + 1) / dataset_size * 100
                progress_bar.set_val(it + 1)
                print(
                    '\rProcessing Images  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                    %
                    (repr(progress_bar), it + 1, dataset_size, progress, fps),
                    end='')

        jt.sync_all(True)

        if not args.display and not args.benchmark:
            print()
            if args.output_coco_json:
                print('Dumping detections..')
                if args.output_web_json:
                    detections.dump_web()
                else:
                    detections.dump()
            else:
                if not train_mode:
                    print('Saving data..')
                    with open(args.ap_data_file, 'wb') as f:
                        pickle.dump(ap_data, f)

                return calc_map(ap_data)
        elif args.benchmark:
            print()
            print()
            print('Stats for the last frame:')
            timer.print_stats()
            avg_seconds = frame_times.get_avg()
            print('Average: %5.2f fps, %5.2f ms' %
                  (1 / frame_times.get_avg(), 1000 * avg_seconds))
    except KeyboardInterrupt:
        print('Stopping..')
コード例 #28
0
    # detect videos
    elif args.video is not None:
        vid = cv2.VideoCapture('videos/' + args.video)

        target_fps = round(vid.get(cv2.CAP_PROP_FPS))
        frame_width = round(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = round(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        num_frames = round(vid.get(cv2.CAP_PROP_FRAME_COUNT))

        name = args.video.split('/')[-1]
        video_writer = cv2.VideoWriter(f'results/videos/{name}',
                                       cv2.VideoWriter_fourcc(*"mp4v"),
                                       target_fps, (frame_width, frame_height))

        frame_times = MovingAverage()
        progress_bar = ProgressBar(40, num_frames)

        time_here = 0
        fps = 0
        for i in range(num_frames):
            frame_origin = torch.from_numpy(vid.read()[1]).cuda().float()
            img_h, img_w = frame_origin.shape[0], frame_origin.shape[1]
            frame_trans = FastBaseTransform()(frame_origin.unsqueeze(0))
            net_outs = net(frame_trans)
            nms_outs = NMS(net_outs, args.traditional_nms)
            results = after_nms(nms_outs,
                                img_h,
                                img_w,
                                crop_masks=not args.no_crop,
                                visual_thre=args.visual_thre)
コード例 #29
0
ファイル: eval.py プロジェクト: li-xl/Yolact.jittor
    def play_video():
        try:
            nonlocal frame_buffer, running, video_fps, is_webcam, num_frames, frames_displayed, vid_done

            video_frame_times = MovingAverage(100)
            frame_time_stabilizer = frame_time_target
            last_time = None
            stabilizer_step = 0.0005
            progress_bar = ProgressBar(30, num_frames)

            while running:
                frame_time_start = time.time()

                if not frame_buffer.empty():
                    next_time = time.time()
                    if last_time is not None:
                        video_frame_times.add(next_time - last_time)
                        video_fps = 1 / video_frame_times.get_avg()
                    if out_path is None:
                        cv2.imshow(path, frame_buffer.get())
                    else:
                        out.write(frame_buffer.get())
                    frames_displayed += 1
                    last_time = next_time

                    if out_path is not None:
                        if video_frame_times.get_avg() == 0:
                            fps = 0
                        else:
                            fps = 1 / video_frame_times.get_avg()
                        progress = frames_displayed / num_frames * 100
                        progress_bar.set_val(frames_displayed)

                        print(
                            '\rProcessing Frames  %s %6d / %6d (%5.2f%%)    %5.2f fps        '
                            % (repr(progress_bar), frames_displayed,
                               num_frames, progress, fps),
                            end='')

                # This is split because you don't want savevideo to require cv2 display functionality (see #197)
                if out_path is None and cv2.waitKey(1) == 27:
                    # Press Escape to close
                    running = False
                if not (frames_displayed < num_frames):
                    running = False

                if not vid_done:
                    buffer_size = frame_buffer.qsize()
                    if buffer_size < args.video_multiframe:
                        frame_time_stabilizer += stabilizer_step
                    elif buffer_size > args.video_multiframe:
                        frame_time_stabilizer -= stabilizer_step
                        if frame_time_stabilizer < 0:
                            frame_time_stabilizer = 0

                    new_target = frame_time_stabilizer if is_webcam else max(
                        frame_time_stabilizer, frame_time_target)
                else:
                    new_target = frame_time_target

                next_frame_target = max(
                    2 * new_target - video_frame_times.get_avg(), 0)
                target_time = frame_time_start + next_frame_target - 0.001  # Let's just subtract a millisecond to be safe

                if out_path is None or args.emulate_playback:
                    # This gives more accurate timing than if sleeping the whole amount at once
                    while time.time() < target_time:
                        time.sleep(0.001)
                else:
                    # Let's not starve the main thread, now
                    time.sleep(0.001)
        except:
            # See issue #197 for why this is necessary
            import traceback
            traceback.print_exc()
コード例 #30
0
def train():
    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    dataset = COCODetection(image_path=cfg.dataset.train_images,
                            info_file=cfg.dataset.train_info,
                            transform=SSDAugmentation(MEANS))
    print("dataset:", dataset[0])

    if args.validation_epoch > 0:
        setup_eval()
        val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                    info_file=cfg.dataset.valid_info,
                                    transform=BaseTransform(MEANS))

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    if args.log:
        log = Log(cfg.name,
                  args.log_folder,
                  dict(args._get_kwargs()),
                  overwrite=(args.resume is None),
                  log_gpu_stats=args.log_gpu)

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs, so disable it just to be safe.
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        print('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        print('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)
    criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                             pos_threshold=cfg.positive_iou_threshold,
                             neg_threshold=cfg.negative_iou_threshold,
                             negpos_ratio=cfg.ohem_negpos_ratio)

    if args.batch_alloc is not None:
        args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')]
        if sum(args.batch_alloc) != args.batch_size:
            print(
                'Error: Batch allocation (%s) does not sum to batch size (%s).'
                % (args.batch_alloc, args.batch_size))
            exit(-1)

    net = CustomDataParallel(NetLoss(net, criterion))
    if args.cuda:
        net = net.cuda()

    # Initialize everything
    if not cfg.freeze_bn:
        yolact_net.freeze_bn()  # Freeze bn so we don't kill our means
    yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda())
    if not cfg.freeze_bn: yolact_net.freeze_bn(True)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    print('Begin training!')
    print()
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            for datum in data_loader:
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                # Zero the grad to get ready to compute gradients
                optimizer.zero_grad()

                # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss)
                losses = net(datum)

                losses = {k: (v).mean()
                          for k, v in losses.items()
                          }  # Mean here because Dataparallel
                loss = sum([losses[k] for k in losses])

                # no_inf_mean removes some components from the loss, so make sure to backward through all of it
                # all_loss = sum([v.mean() for v in losses.values()])

                # Backprop
                loss.backward(
                )  # Do this to free up vram even if loss is not finite
                if torch.isfinite(loss).item():
                    optimizer.step()

                # Add the loss to the moving average for bookkeeping
                for k in losses:
                    loss_avgs[k].add(losses[k].item())

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]

                    total = sum([loss_avgs[k].get_avg() for k in losses])
                    loss_labels = sum([[k, loss_avgs[k].get_avg()]
                                       for k in loss_types if k in losses], [])

                    print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) +
                           ' T: %.3f || ETA: %s || timer: %.3f') %
                          tuple([epoch, iteration] + loss_labels +
                                [total, eta_str, elapsed]),
                          flush=True)

                if args.log:
                    precision = 5
                    loss_info = {
                        k: round(losses[k].item(), precision)
                        for k in losses
                    }
                    loss_info['T'] = round(losses[k].item(), precision)

                    if args.log_gpu:
                        log.log_gpu_stats = (iteration % 10 == 0
                                             )  # nvidia-smi is sloooow

                    log.log('train',
                            loss=loss_info,
                            epoch=epoch,
                            iter=iteration,
                            lr=round(cur_lr, 10),
                            elapsed=elapsed)

                    log.log_gpu_stats = args.log_gpu

                iteration += 1

                if iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    print('Saving state, iter:', iteration)
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            print('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    compute_validation_map(epoch, iteration, yolact_net,
                                           val_dataset,
                                           log if args.log else None)

        # Compute validation mAP after training is finished
        compute_validation_map(epoch, iteration, yolact_net, val_dataset,
                               log if args.log else None)
    except KeyboardInterrupt:
        if args.interrupt:
            print('Stopping early. Saving network...')

            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        exit()

    yolact_net.save_weights(save_path(epoch, iteration))