Exemple #1
0
def save_all_tensors_parser(opt, real_input_T, real_input_S, fake_slo, fake_slo_raw, fake_slo_first, real_slo, flow_ref, conf_ref, flow, weight):
    #pose:
    input_spose_ = real_input_S[0, -1, 0:3]
    input_spose_[input_spose_==-1] = 1
    #layout:
    input_tlo_ = real_input_T[0, -1, -opt.label_nc_1:]
    output_slo_ = fake_slo[0, -1]
    output_slo_raw_ = fake_slo_raw[0, -1]
    output_slo_first_ = fake_slo_first
    real_slo_ = real_slo[0, -1]

    input_tlo = tensor2lo(input_tlo_, opt.label_nc_1, old_type=True)
    input_spose = tensor2im(input_spose_)
    output_slo = tensor2lo(output_slo_, opt.label_nc_1, old_type=True)
    output_slo_raw = tensor2lo(output_slo_raw_, opt.label_nc_1, old_type=True)
    output_slo_first = tensor2lo(output_slo_first_, opt.label_nc_1, old_type=True)
    real_slo = tensor2lo(real_slo_, opt.label_nc_1, old_type=True)

    output_flow = fz.convert_from_flow(flow[0, -1].permute(1,2,0).data.cpu().numpy())
    output_flow_weight = tensor2im(weight[0, -1], normalize=False)
    real_flow = fz.convert_from_flow(flow_ref[0, -1].permute(1,2,0).cpu().numpy())

    visual_list = [('input_tlo', input_tlo),
                   ('input_spose', input_spose),
                   ('output_slo', output_slo),
                   ('output_slo_raw', output_slo_raw),
                   ('output_flow', output_flow),
                   ('output_flow_weight', output_flow_weight),
                   ('output_slo_first', output_slo_first),
                   ('real_slo', real_slo),
                   ('real_flow', real_flow)]

    visuals = OrderedDict(visual_list)
    return visuals
Exemple #2
0
    def save_images(self, input1: np.array, input2: np.array, result: np.array, target: np.array, label: str):
        img_count = result.data.shape[0]

        img_count = self.img_count if img_count >= self.img_count else img_count

        # TODO: this worked for grayscale images, rewrite to work for both formats
        # in_stack1 = np.repeat(np.hstack(input1[:img_count, :, :, :]), 3, 2)
        # in_stack2 = np.repeat(np.hstack(input2[:img_count, :, :, :]), 3, 2)

        in_stack1 = np.hstack(input1[:img_count, :, :, :])
        in_stack2 = np.hstack(input2[:img_count, :, :, :])
        re_stack = []
        tar_stack = []

        for i in range(img_count):
            re_stack.append(convert_from_flow(result[i]))
            tar_stack.append(convert_from_flow(target[i]))

        # stacks = (in_stack1, in_stack2, np.hstack(re_stack), np.hstack(tar_stack))
        stacks = (cv2.addWeighted(in_stack1, 0.9, in_stack2, 0.3, 0),
                  np.hstack(re_stack), np.hstack(tar_stack))

        collage = np.vstack(stacks)

        # convert to BGR because imwritew
        cv2.imwrite(f"{self.output_folder}/{label}.png", collage[:, :, ::-1])
Exemple #3
0
def save_all_tensors_cloth(opt, real_input_1, real_input_2, fg_tps, fg_dense, lo_tps, lo_dense, fg_dense_first, real_SFG, real_SFG_full, flow_tps, flow_dense, flow_total):
    #print(real_input_1.size(), real_input_2.size(), lo_dense.size())
    #turn to white background:
    #layout:
    input_tlo_ = real_input_1[0, -1, -1-opt.label_nc_2:-1]
    input_slo_ = real_input_2[0, -1, -opt.label_nc_2:]
    output_slo_tps_ = lo_tps[0, -1]
    output_slo_dense_ = lo_dense[0, -1]
    #foreground:
    input_tfg_ = real_input_1[0, -1, -1:]
    input_tfg_[input_tlo_[1:].sum(dim=0, keepdim=True)==0] = 1
    real_sfg_ = real_SFG[0, -1]
    real_sfg_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(real_sfg_)==0] = 1
    real_sfg_full_ = real_SFG_full[0, -1]

    output_sfg_tps_ = fg_tps[0, -1]
    output_sfg_tps_[output_slo_tps_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_tps_)==0] = 1
    output_sfg_dense_ = fg_dense[0, -1]
    output_sfg_dense_[output_slo_dense_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_dense_)==0] = 1
    output_sfg_full_ = torch.ones_like(real_sfg_full_)
    output_sfg_full_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)==0] = real_sfg_full_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)==0]
    output_sfg_full_[(input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)==1) & (output_slo_dense_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)!=0)] = output_sfg_dense_[(input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)==1) & (output_slo_dense_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)!=0)]

    input_slo = tensor2lo(input_slo_, opt.label_nc_2)
    real_sfg = tensor2im(real_sfg_)
    real_sfg_full = tensor2im(real_sfg_full_)
    input_tlo = tensor2lo(input_tlo_, opt.label_nc_2)
    input_tfg = tensor2im(input_tfg_)
    output_sfg_tps = tensor2im(output_sfg_tps_)
    output_sfg_dense = tensor2im(output_sfg_dense_)
    output_slo_tps = tensor2lo(output_slo_tps_, opt.label_nc_2)
    output_slo_dense = tensor2lo(output_slo_dense_, opt.label_nc_2)
    output_sfg_first = tensor2im(fg_dense_first)
    output_sfg_full = tensor2im(output_sfg_full_)
    output_flow_tps = fz.convert_from_flow(flow_tps[0, -1].permute(1,2,0).data.cpu().numpy())
    output_flow_dense = fz.convert_from_flow(flow_dense[0, -1].permute(1,2,0).data.cpu().numpy())
    output_flow_total = fz.convert_from_flow(flow_total[0, -1].permute(1,2,0).data.cpu().numpy())
    #output_flow_tps = tensor2flow(flow_tps[0, -1])
    #output_flow_dense = tensor2flow(flow_dense[0, -1])
    #output_flow_total = tensor2flow(flow_total[0, -1])

    visual_list = [('input_slo', input_slo),
                   ('real_sfg', real_sfg),
                   ('real_sfg_full', real_sfg_full),
                   ('input_tlo', input_tlo),
                   ('input_tfg', input_tfg),
                   ('output_sfg_tps', output_sfg_tps),
                   ('output_sfg_dense', output_sfg_dense),
                   ('output_slo_tps', output_slo_tps),
                   ('output_slo_dense', output_slo_dense),
                   ('output_sfg_first', output_sfg_first),
                   ('output_sfg_full', output_sfg_full),
                   ('output_flow_tps', output_flow_tps),
                   ('output_flow_dense', output_flow_dense),
                   ('output_flow_total', output_flow_total)]
    visuals = OrderedDict(visual_list)
    return visuals
Exemple #4
0
 def _debug_display(self, i1, i2, flows, winpref="", wait=True):
     cv2.imshow(winpref + "1", cv2.cvtColor(i1, cv2.COLOR_GRAY2BGR))
     cv2.imshow(winpref + "2", cv2.cvtColor(i2, cv2.COLOR_GRAY2BGR))
     cv2.imshow(
         winpref + "f",
         cv2.cvtColor(convert_from_flow(flows[-1].transpose((1, 2, 0))),
                      cv2.COLOR_RGB2BGR))
     if wait:
         cv2.waitKey()
Exemple #5
0
def SaveFlowImg(flows_pred, flow_label):
    flow_label = F.avg_pool2d(flow_label, kernel_size=2)
    flow_pred_64, flow_pred_32, flow_pred_16, flow_pred_8, flow_pred_4 = flows_pred
    flow_pred_4 = UpScale(flow_pred_4)
    flow_pred_8 = UpScale(UpScale(flow_pred_8))
    flow_pred_16 = UpScale(UpScale(UpScale(flow_pred_16)))
    flow_pred_32 = UpScale(UpScale(UpScale(UpScale(flow_pred_32))))
    flow_pred_64 = UpScale(UpScale(UpScale(UpScale(UpScale(flow_pred_64)))))
    for batch in range(flow_label.size(0)):
        flow = fz.convert_from_flow(flow_pred_4[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_pred_4.png" % batch)
        flow = fz.convert_from_flow(flow_pred_8[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_pred_8.png" % batch)
        flow = fz.convert_from_flow(flow_pred_16[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_pred_16.png" % batch)
        flow = fz.convert_from_flow(flow_pred_32[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_pred_32.png" % batch)
        flow = fz.convert_from_flow(flow_pred_64[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_pred_64.png" % batch)
        flow = fz.convert_from_flow(flow_label[batch].permute(
            1, 2, 0).detach().cpu().numpy())
        flow = Image.fromarray(flow)
        flow.save("ignore/visual/%d_label.png" % batch)
Exemple #6
0
def display(key, obj):
    st.subheader(key)
    sel = selector(key, obj)
    if sel == "Text":
        st.text(obj)

    elif sel == "Image":
        st.image((obj + 1.0) / 2.0)

    elif sel == "Flow":
        import flowiz as fz

        img = fz.convert_from_flow(obj)
        st.image(img)
Exemple #7
0
def compute_flow(network, inputs):
    global last_frame
    current_frame = np.array(inputs["input_image"])

    if last_frame is None:
        output = np.full(current_frame.shape, 255)
    else:
        tensorFirst = torch.FloatTensor(
            np.array(last_frame)[:, :, ::-1].transpose(2, 0, 1).astype(
                np.float32) * (1.0 / 255.0))
        tensorSecond = torch.FloatTensor(
            np.array(current_frame)[:, :, ::-1].transpose(2, 0, 1).astype(
                np.float32) * (1.0 / 255.0))
        tensorOutput = estimate(network, tensorFirst, tensorSecond)
        output = tensorOutput.numpy().transpose(1, 2, 0)
        output = flowiz.convert_from_flow(output)

    last_frame = current_frame

    return {"output_image": output}
Exemple #8
0
def save_all_tensors_sampled(opt, flows_sampled_0, flows_sampled_1):
    #print(real_input_1.size(), real_input_2.size(), lo_dense.size())
    #turn to white background:
    #pose:
    flow_warp_0, flow_prev_0, flow_0 = flows_sampled_0
    flow_warp_1, flow_prev_1, flow_1 = flows_sampled_1
    flow_warp_0_vis = fz.convert_from_flow(flow_warp_0[0].permute(1,2,0).data.cpu().numpy())
    flow_prev_0_vis = fz.convert_from_flow(flow_prev_0[0].permute(1,2,0).data.cpu().numpy())
    flow_0_vis = fz.convert_from_flow(flow_0[0].permute(1,2,0).data.cpu().numpy())
    flow_warp_1_vis = fz.convert_from_flow(flow_warp_1[0].permute(1,2,0).data.cpu().numpy())
    flow_prev_1_vis = fz.convert_from_flow(flow_prev_1[0].permute(1,2,0).data.cpu().numpy())
    flow_1_vis = fz.convert_from_flow(flow_1[0].permute(1,2,0).data.cpu().numpy())

    visual_list = [('0_flow_warp_vis', flow_warp_0_vis),
                   ('0_flow_prev_vis', flow_prev_0_vis),
                   ('0_flow_vis', flow_0_vis),
                   ('1_flow_warp_vis', flow_warp_1_vis),
                   ('1_flow_prev_vis', flow_prev_1_vis),
                   ('1_flow_vis', flow_1_vis)]
    visuals = OrderedDict(visual_list)
    return visuals
Exemple #9
0
def save_all_tensors_composer(opt, real_input_T, real_input_S, real_input_SFG, real_input_BG, fake_SI, fake_SI_raw, fake_SI_first, fake_SFG_full, fake_SFG_res, fake_sd, real_SI, real_SFG_full, flow_ref, conf_ref, flow, weight, modelD):
    #pose:
    input_spose_ = real_input_S[0, -1, 0:3]
    input_spose_[input_spose_==-1] = 1
    #layout:
    input_tlo_ = real_input_T[0, -1, -3-(opt.label_nc_3-opt.label_nc_2+1):-3]
    input_slo_ = real_input_S[0, -1, -opt.label_nc_3:]
    #foreground:
    input_tfg_ = real_input_T[0, -1, -3:]
    #input_tfg_[input_tlo_[1:].sum(dim=0, keepdim=True).expand_as(input_tfg_)==0] = 1
    input_sfg_ = real_input_SFG[0, -1]
    #input_sfg_[input_slo_[1:opt.label_nc_2].sum(dim=0, keepdim=True).expand_as(input_sfg_)==0] = 1
    output_sfg_full_ = fake_SFG_full[0, -1]
    output_sfg_full_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_full_)==0] = 1
    output_sfg_res_raw_ = fake_SFG_res[0, -1]
    #output_sfg_res_raw_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(output_sfg_res_raw_)==0] = 1
    output_sfg_res_raw = tensor2im(output_sfg_res_raw_)
    output_sfg_res_ = output_sfg_res_raw_
    output_sfg_res_[input_slo_[opt.label_nc_2:].sum(dim=0, keepdim=True).expand_as(output_sfg_res_)==0] = 1
    output_sfg_res = tensor2im(output_sfg_res_)
    real_sfg_full_ = real_SFG_full[0, -1]
    real_sfg_full_[input_slo_[1:].sum(dim=0, keepdim=True).expand_as(real_sfg_full_)==0] = 1
    #full image:
    output_si_raw_ = fake_SI_raw[0, -1]
    output_si_ = fake_SI[0, -1]
    real_si_ = real_SI[0, -1]

    input_tlo = tensor2lo(input_tlo_, opt.label_nc_3-opt.label_nc_2+1)
    input_tfg = tensor2im(input_tfg_)
    input_spose = tensor2im(input_spose_)
    input_slo = tensor2lo(input_slo_, opt.label_nc_3)
    input_sfg = tensor2im(input_sfg_)
    input_bg = tensor2im(real_input_BG[0, -1])
    output_sfg_full = tensor2im(output_sfg_full_)
    output_si_raw = tensor2im(output_si_raw_)
    output_si = tensor2im(output_si_)
    output_si_first = tensor2im(fake_SI_first)
    output_sd = tensor2im(fake_sd[0, -1], normalize=False)
    output_flow = fz.convert_from_flow(flow[0, -1].permute(1,2,0).data.cpu().numpy())
    #output_flow = tensor2flow(flow[0, -1])
    output_flow_weight = tensor2im(weight[0, -1], normalize=False)
    real_sfg_full = tensor2im(real_sfg_full_)
    real_si = tensor2im(real_si_)
    real_flow = fz.convert_from_flow(flow_ref[0, -1].permute(1,2,0).cpu().numpy())
    #real_flow = tensor2flow(flow_ref[0, -1])

    if opt.add_face_disc:
        ys_T, ye_T, xs_T, xe_T = modelD.module.get_face_region(real_input_T[0, -1:, -3-(opt.label_nc_3-opt.label_nc_2+1)+2:-3-(opt.label_nc_3-opt.label_nc_2+1)+3])
        ys_S, ye_S, xs_S, xe_S = modelD.module.get_face_region(real_input_S[0, -1:, -opt.label_nc_3+opt.label_nc_2+1:-opt.label_nc_3+opt.label_nc_2+2])
        if ys_S is not None and ys_T is not None:
            input_tfg[ys_T, xs_T:xe_T, :] = input_tfg[ye_T, xs_T:xe_T, :] = input_tfg[ys_T:ye_T, xs_T, :] = input_tfg[ys_T:ye_T, xe_T, :] = 255 
            output_sfg_full[ys_S, xs_S:xe_S, :] = output_sfg_full[ye_S, xs_S:xe_S, :] = output_sfg_full[ys_S:ye_S, xs_S, :] = output_sfg_full[ys_S:ye_S, xe_S, :] = 0 

    visual_list = [('input_tlo', input_tlo),
                   ('input_tfg', input_tfg),
                   ('input_spose', input_spose),
                   ('input_slo', input_slo),
                   ('input_sfg', input_sfg),
                   ('input_bg', input_bg),
                   ('output_sfg_full', output_sfg_full),
                   ('output_sfg_res_raw', output_sfg_res_raw),
                   ('output_sfg_res', output_sfg_res),
                   ('output_si_raw', output_si_raw),
                   ('output_si', output_si),
                   ('output_si_first', output_si_first),
                   ('output_sd', output_sd),
                   ('output_flow', output_flow),
                   ('output_flow_weight', output_flow_weight),
                   ('real_sfg_full', real_sfg_full),
                   ('real_si', real_si),
                   ('real_flow', real_flow)]

    visuals = OrderedDict(visual_list)
    return visuals
    args = parse_args()

    vidcap = cv2.VideoCapture(args.input)
    FPS = int(vidcap.get(cv2.CAP_PROP_FPS))
    VID_WIDTH = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    VID_HEIGHT = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    vidout = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (VID_WIDTH, VID_HEIGHT))
    FRAMES = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    success = True

    for _ in tqdm(range(FRAMES)):
        success, image = vidcap.read()

        if success and 'frame2' in locals():  # We want to skip the first frame
            frame1 = frame2
            frame2 = image2tensor(image)
        else:
            frame2 = image2tensor(image)
            continue

        moduleNetwork = Network().cuda().eval()
        moduleNetwork.load_state_dict(torch.load(args.model))

        tensorOutput = moduleNetwork.estimate(frame1, frame2)
        flow = np.array(tensorOutput.numpy().transpose(1, 2, 0), np.float32)
        flow_img = flowiz.convert_from_flow(flow)
        vidout.write(flow_img)

    vidout.release()
    print(f"Done! \nFlowviz written to {args.output}")
Exemple #11
0
def main():
    args = get_args()
    if args.save_path is not None:
        if not exists(args.save_path):
            mkdir(args.save_path)

    # frames = get_every_nth_frame(args.path, args.dist)[:args.count]
    paths, frames = load_directory(args.frames)
    args.count = len(frames)

    models = ["default"]
    # models = ["default", "kitti", "sintel"]

    model_name = "default"
    arguments_strModel = f"/home/xbakom01/src/pytorch-liteflownet/models/network-{model_name}.pytorch"

    model = Network(arguments_strModel, 5).cuda().eval()
    # from custom.model.liteflownet import LiteFlowNet
    # model = LiteFlowNet().cuda().eval()

    # h, w, c
    for n, ((path1, path2),
            (frame1,
             frame2)) in enumerate(zip(pairwise(paths), pairwise(frames))):
        # cv2.imwrite(join(realpath(dirname(__file__) + "/../"), "data", "frames", f"f{n}.png"), frame1)

        # security through obscurity
        frame1_t = tensor(
            cv2.cvtColor(center_crop(frame1), cv2.COLOR_BGR2RGB).transpose(
                (2, 0, 1)).astype(np.float32) / 255).cuda()
        frame2_t = tensor(
            cv2.cvtColor(center_crop(frame2), cv2.COLOR_BGR2RGB).transpose(
                (2, 0, 1)).astype(np.float32) / 255).cuda()

        flows = estimate(model, frame1_t, frame2_t)

        # flow = model(frame1_t.unsqueeze(0), frame2_t.unsqueeze(0))[-1][0].cpu()

        print(f"{n + 1}/{len(frames) - 1}", flush=True, end="\r")

        if not args.dataset:
            flow = flows[-1].cpu()

            if args.save_path is None:
                # display flow
                img = convert_from_flow(flow.numpy().transpose(
                    (1, 2, 0)).astype(np.float16))
                cv2.imshow("", img[:, :, ::-1])
                cv2.waitKey()
            else:
                # save in .flo format
                f1, f2 = list(
                    map(lambda x: basename(x).split(".")[0], [path1, path2]))
                with open(join(args.save_path, f"{f1}-{f2}.flowc"), "wb") as f:
                    np.array([80, 73, 69, 72], np.uint8).tofile(f)
                    np.array([flow.size(2), flow.size(1)], np.int32).tofile(f)
                    np.array(flow.numpy().transpose(1, 2, 0),
                             np.float32).tofile(f)

                # if n == len(frames) - 2:
                #     cv2.imwrite(join(realpath(dirname(__file__)), "data", "frames", f"f{args.count - 1}.png"), frame2)
        else:
            # save cascade of flows
            flow = [flow.cpu().numpy().astype(np.float16) for flow in flows]

            save_dir = args.save_path if args.save_path is not None else args.frames

            f1, f2 = list(
                map(lambda x: basename(x).split(".")[0], [path1, path2]))
            with open(join(args.save_path, f"{f1}-{f2}.flowc"), "wb") as f:
                pickle.dump(flow, f)
Exemple #12
0
def show_flow(flow):
    Image.fromarray(
        convert_from_flow(flow.cpu().detach().numpy().transpose(1, 2,
                                                                0))).show()
Exemple #13
0
def save_flow(flow, name):
    Image.fromarray(
        convert_from_flow(flow.cpu().detach().numpy().transpose(1, 2,
                                                                0))).save(name)
Exemple #14
0
def writeFlow(name, flow):
    uv = fz.convert_from_flow(flow, mode='UV')
    np.save(name, uv)
Exemple #15
0
def flow_to_pil_image(tensor):
    return Image.fromarray(
        fz.convert_from_flow(tensor.numpy().transpose(1, 2, 0)))
Exemple #16
0
def train(rank, args):
    if args.num_gpus > 1:
        multi_gpu_rescale(args)
    if rank == 0:
        if not os.path.exists(args.save_folder):
            os.mkdir(args.save_folder)

    # set up logger
    setup_logger(output=os.path.join(args.log_folder, cfg.name),
                 distributed_rank=rank)
    logger = logging.getLogger("yolact.train")

    w = SummaryHelper(distributed_rank=rank,
                      log_dir=os.path.join(args.log_folder, cfg.name))
    w.add_text("argv", " ".join(sys.argv))
    logger.info("Args: {}".format(" ".join(sys.argv)))
    import git
    with git.Repo(search_parent_directories=True) as repo:
        w.add_text("git_hash", repo.head.object.hexsha)
        logger.info("git hash: {}".format(repo.head.object.hexsha))

    try:
        logger.info("Initializing torch.distributed backend...")
        dist.init_process_group(backend='nccl',
                                init_method=args.dist_url,
                                world_size=args.num_gpus,
                                rank=rank)
    except Exception as e:
        logger.error("Process group URL: {}".format(args.dist_url))
        raise e

    dist.barrier()

    if torch.cuda.device_count() > 1:
        logger.info('Multiple GPUs detected! Turning off JIT.')

    collate_fn = detection_collate
    if cfg.dataset.name == 'YouTube VIS':
        dataset = YoutubeVIS(image_path=cfg.dataset.train_images,
                             info_file=cfg.dataset.train_info,
                             configs=cfg.dataset,
                             transform=SSDAugmentationVideo(MEANS))

        if cfg.dataset.joint == 'coco':
            joint_dataset = COCODetection(
                image_path=cfg.joint_dataset.train_images,
                info_file=cfg.joint_dataset.train_info,
                transform=SSDAugmentation(MEANS))
            joint_collate_fn = detection_collate

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = YoutubeVIS(image_path=cfg.dataset.valid_images,
                                     info_file=cfg.dataset.valid_info,
                                     configs=cfg.dataset,
                                     transform=BaseTransformVideo(MEANS))
        collate_fn = collate_fn_youtube_vis

    elif cfg.dataset.name == 'FlyingChairs':
        dataset = FlyingChairs(image_path=cfg.dataset.trainval_images,
                               info_file=cfg.dataset.trainval_info)

        collate_fn = collate_fn_flying_chairs

    else:
        dataset = COCODetection(image_path=cfg.dataset.train_images,
                                info_file=cfg.dataset.train_info,
                                transform=SSDAugmentation(MEANS))

        if args.validation_epoch > 0:
            setup_eval()
            val_dataset = COCODetection(image_path=cfg.dataset.valid_images,
                                        info_file=cfg.dataset.valid_info,
                                        transform=BaseTransform(MEANS))

    # Set cuda device early to avoid duplicate model in master GPU
    if args.cuda:
        torch.cuda.set_device(rank)

    # Parallel wraps the underlying module, but when saving and loading we don't want that
    yolact_net = Yolact()
    net = yolact_net
    net.train()

    # I don't use the timer during training (I use a different timing method).
    # Apparently there's a race condition with multiple GPUs.

    # use timer for experiments
    timer.disable_all()

    # Both of these can set args.resume to None, so do them before the check
    if args.resume == 'interrupt':
        args.resume = SavePath.get_interrupt(args.save_folder)
    elif args.resume == 'latest':
        args.resume = SavePath.get_latest(args.save_folder, cfg.name)

    if args.resume is not None:
        logger.info('Resuming training, loading {}...'.format(args.resume))
        yolact_net.load_weights(args.resume, args=args)

        if args.start_iter == -1:
            args.start_iter = SavePath.from_str(args.resume).iteration
    else:
        logger.info('Initializing weights...')
        yolact_net.init_weights(backbone_path=args.save_folder +
                                cfg.backbone.path)

    if cfg.flow.train_flow:
        criterion = OpticalFlowLoss()

    else:
        criterion = MultiBoxLoss(num_classes=cfg.num_classes,
                                 pos_threshold=cfg.positive_iou_threshold,
                                 neg_threshold=cfg.negative_iou_threshold,
                                 negpos_ratio=3)

    if args.cuda:
        cudnn.benchmark = True
        net.cuda(rank)
        criterion.cuda(rank)
        net = nn.parallel.DistributedDataParallel(net,
                                                  device_ids=[rank],
                                                  output_device=rank,
                                                  broadcast_buffers=False,
                                                  find_unused_parameters=True)
        # net       = nn.DataParallel(net).cuda()
        # criterion = nn.DataParallel(criterion).cuda()

    optimizer = optim.SGD(filter(lambda x: x.requires_grad, net.parameters()),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)

    # loss counters
    loc_loss = 0
    conf_loss = 0
    iteration = max(args.start_iter, 0)
    w.set_step(iteration)
    last_time = time.time()

    epoch_size = len(dataset) // args.batch_size // args.num_gpus
    num_epochs = math.ceil(cfg.max_iter / epoch_size)

    # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index
    step_index = 0

    from data.sampler_utils import InfiniteSampler, build_batch_data_sampler

    infinite_sampler = InfiniteSampler(dataset,
                                       seed=args.random_seed,
                                       num_replicas=args.num_gpus,
                                       rank=rank,
                                       shuffle=True)
    train_sampler = build_batch_data_sampler(infinite_sampler,
                                             images_per_batch=args.batch_size)

    data_loader = data.DataLoader(
        dataset,
        num_workers=args.num_workers,
        collate_fn=collate_fn,
        multiprocessing_context="fork" if args.num_workers > 1 else None,
        batch_sampler=train_sampler)
    data_loader_iter = iter(data_loader)

    if cfg.dataset.joint:
        joint_infinite_sampler = InfiniteSampler(joint_dataset,
                                                 seed=args.random_seed,
                                                 num_replicas=args.num_gpus,
                                                 rank=rank,
                                                 shuffle=True)
        joint_train_sampler = build_batch_data_sampler(
            joint_infinite_sampler, images_per_batch=args.batch_size)
        joint_data_loader = data.DataLoader(
            joint_dataset,
            num_workers=args.num_workers,
            collate_fn=joint_collate_fn,
            multiprocessing_context="fork" if args.num_workers > 1 else None,
            batch_sampler=joint_train_sampler)
        joint_data_loader_iter = iter(joint_data_loader)

    dist.barrier()

    save_path = lambda epoch, iteration: SavePath(
        cfg.name, epoch, iteration).get_path(root=args.save_folder)
    time_avg = MovingAverage()
    data_time_avg = MovingAverage(10)

    global loss_types  # Forms the print order
    loss_avgs = {k: MovingAverage(100) for k in loss_types}

    def backward_and_log(prefix,
                         net_outs,
                         targets,
                         masks,
                         num_crowds,
                         extra_loss=None):
        optimizer.zero_grad()

        out = net_outs["pred_outs"]
        wrapper = ScatterWrapper(targets, masks, num_crowds)
        losses = criterion(out, wrapper, wrapper.make_mask())

        losses = {k: v.mean()
                  for k, v in losses.items()}  # Mean here because Dataparallel

        if extra_loss is not None:
            assert type(extra_loss) == dict
            losses.update(extra_loss)

        loss = sum([losses[k] for k in losses])

        # Backprop
        loss.backward()  # Do this to free up vram even if loss is not finite
        if torch.isfinite(loss).item():
            optimizer.step()

        # Add the loss to the moving average for bookkeeping
        for k in losses:
            loss_avgs[k].add(losses[k].item())
            w.add_scalar('{prefix}/{key}'.format(prefix=prefix, key=k),
                         losses[k].item())

        return losses

    logger.info('Begin training!')
    # try-except so you can use ctrl+c to save early and stop training
    try:
        for epoch in range(num_epochs):
            # Resume from start_iter
            if (epoch + 1) * epoch_size < iteration:
                continue

            while True:
                data_start_time = time.perf_counter()
                datum = next(data_loader_iter)
                dist.barrier()
                data_end_time = time.perf_counter()
                data_time = data_end_time - data_start_time
                if iteration != args.start_iter:
                    data_time_avg.add(data_time)
                # Stop if we've reached an epoch if we're resuming from start_iter
                if iteration == (epoch + 1) * epoch_size:
                    break

                # Stop at the configured number of iterations even if mid-epoch
                if iteration == cfg.max_iter:
                    break

                # Change a config setting if we've reached the specified iteration
                changed = False
                for change in cfg.delayed_settings:
                    if iteration >= change[0]:
                        changed = True
                        cfg.replace(change[1])

                        # Reset the loss averages because things might have changed
                        for avg in loss_avgs:
                            avg.reset()

                # If a config setting was changed, remove it from the list so we don't keep checking
                if changed:
                    cfg.delayed_settings = [
                        x for x in cfg.delayed_settings if x[0] > iteration
                    ]

                # Warm up by linearly interpolating the learning rate from some smaller value
                if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until and cfg.lr_warmup_init < args.lr:
                    set_lr(optimizer, (args.lr - cfg.lr_warmup_init) *
                           (iteration / cfg.lr_warmup_until) +
                           cfg.lr_warmup_init)

                elif cfg.lr_schedule == 'cosine':
                    set_lr(
                        optimizer,
                        args.lr *
                        ((math.cos(math.pi * iteration / cfg.max_iter) + 1.) *
                         .5))

                # Adjust the learning rate at the given iterations, but also if we resume from past that iteration
                while cfg.lr_schedule == 'step' and step_index < len(
                        cfg.lr_steps
                ) and iteration >= cfg.lr_steps[step_index]:
                    step_index += 1
                    set_lr(optimizer, args.lr * (args.gamma**step_index))

                global lr
                w.add_scalar('meta/lr', lr)

                if cfg.dataset.name == "FlyingChairs":
                    imgs_1, imgs_2, flows = prepare_flow_data(datum)
                    net_outs = net(None, extras=(imgs_1, imgs_2))
                    # Compute Loss
                    optimizer.zero_grad()

                    losses = criterion(net_outs, flows)

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('loss/%s' % k, losses[k].item())

                elif cfg.dataset.joint or not cfg.dataset.is_video:
                    if cfg.dataset.joint:
                        joint_datum = next(joint_data_loader_iter)
                        dist.barrier()
                        # Load training data
                        # Note, for training on multiple gpus this will use the custom replicate and gather I wrote up there
                        images, targets, masks, num_crowds = prepare_data(
                            joint_datum)
                    else:
                        images, targets, masks, num_crowds = prepare_data(
                            datum)
                    extras = {
                        "backbone": "full",
                        "interrupt": False,
                        "moving_statistics": {
                            "aligned_feats": []
                        }
                    }
                    net_outs = net(images, extras=extras)
                    out = net_outs["pred_outs"]
                    # Compute Loss
                    optimizer.zero_grad()

                    wrapper = ScatterWrapper(targets, masks, num_crowds)
                    losses = criterion(out, wrapper, wrapper.make_mask())

                    losses = {k: v.mean()
                              for k, v in losses.items()
                              }  # Mean here because Dataparallel
                    loss = sum([losses[k] for k in losses])

                    # Backprop
                    loss.backward(
                    )  # Do this to free up vram even if loss is not finite
                    if torch.isfinite(loss).item():
                        optimizer.step()

                    # Add the loss to the moving average for bookkeeping
                    for k in losses:
                        loss_avgs[k].add(losses[k].item())
                        w.add_scalar('joint/%s' % k, losses[k].item())

                # Forward Pass
                if cfg.dataset.is_video:
                    # reference frames
                    references = []
                    moving_statistics = {"aligned_feats": [], "conf_hist": []}
                    for idx, frame in enumerate(datum[:0:-1]):
                        images, annots = frame

                        extras = {
                            "backbone": "full",
                            "interrupt": True,
                            "keep_statistics": True,
                            "moving_statistics": moving_statistics
                        }

                        with torch.no_grad():
                            net_outs = net(images, extras=extras)

                        moving_statistics["feats"] = net_outs["feats"]
                        moving_statistics["lateral"] = net_outs["lateral"]

                        keys_to_save = ("outs_phase_1", "outs_phase_2")
                        for key in set(net_outs.keys()) - set(keys_to_save):
                            del net_outs[key]
                        references.append(net_outs)

                    # key frame with annotation, but not compute full backbone
                    frame = datum[0]
                    images, annots = frame
                    frame = (
                        images,
                        annots,
                    )
                    images, targets, masks, num_crowds = prepare_data(frame)

                    extras = {
                        "backbone": "full",
                        "interrupt": not cfg.flow.base_backward,
                        "moving_statistics": moving_statistics
                    }
                    gt_net_outs = net(images, extras=extras)
                    if cfg.flow.base_backward:
                        losses = backward_and_log("compute", gt_net_outs,
                                                  targets, masks, num_crowds)

                    keys_to_save = ("outs_phase_1", "outs_phase_2")
                    for key in set(gt_net_outs.keys()) - set(keys_to_save):
                        del gt_net_outs[key]

                    # now do the warp
                    if len(references) > 0:
                        reference_frame = references[0]
                        extras = {
                            "backbone": "partial",
                            "moving_statistics": moving_statistics
                        }

                        net_outs = net(images, extras=extras)
                        extra_loss = yolact_net.extra_loss(
                            net_outs, gt_net_outs)

                        losses = backward_and_log("warp",
                                                  net_outs,
                                                  targets,
                                                  masks,
                                                  num_crowds,
                                                  extra_loss=extra_loss)

                cur_time = time.time()
                elapsed = cur_time - last_time
                last_time = cur_time
                w.add_scalar('meta/data_time', data_time)
                w.add_scalar('meta/iter_time', elapsed)

                # Exclude graph setup from the timing information
                if iteration != args.start_iter:
                    time_avg.add(elapsed)

                if iteration % 10 == 0:
                    eta_str = str(
                        datetime.timedelta(seconds=(cfg.max_iter - iteration) *
                                           time_avg.get_avg())).split('.')[0]
                    if torch.cuda.is_available():
                        max_mem_mb = torch.cuda.max_memory_allocated(
                        ) / 1024.0 / 1024.0
                        # torch.cuda.reset_max_memory_allocated()
                    else:
                        max_mem_mb = None

                    logger.info("""\
eta: {eta}  epoch: {epoch}  iter: {iter}  \
{losses}  {loss_total}  \
time: {time}  data_time: {data_time}  lr: {lr}  {memory}\
""".format(eta=eta_str,
                    epoch=epoch,
                    iter=iteration,
                    losses="  ".join([
                    "{}: {:.3f}".format(k, loss_avgs[k].get_avg()) for k in losses
                    ]),
                    loss_total="T: {:.3f}".format(
                    sum([loss_avgs[k].get_avg() for k in losses])),
                    data_time="{:.3f}".format(data_time_avg.get_avg()),
                    time="{:.3f}".format(elapsed),
                    lr="{:.6f}".format(lr),
                    memory="max_mem: {:.0f}M".format(max_mem_mb)))

                if rank == 0 and iteration % 100 == 0:

                    if cfg.flow.train_flow:
                        import flowiz as fz
                        from layers.warp_utils import deform_op
                        tgt_size = (64, 64)
                        flow_size = flows.size()[2:]
                        vis_data = []
                        for pred_flow in net_outs:
                            vis_data.append(pred_flow)

                        deform_gt = deform_op(imgs_2, flows)
                        flows_pred = [
                            F.interpolate(x,
                                          size=flow_size,
                                          mode='bilinear',
                                          align_corners=False)
                            for x in net_outs
                        ]
                        deform_preds = [
                            deform_op(imgs_2, x) for x in flows_pred
                        ]

                        vis_data.append(
                            F.interpolate(flows, size=tgt_size, mode='area'))

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]

                        def convert_image(image):
                            image = F.interpolate(image,
                                                  size=tgt_size,
                                                  mode='area')
                            image = image[0]
                            image = image.data.cpu().numpy()
                            image = image[::-1]
                            image = image.transpose(1, 2, 0)
                            image = image * np.array(STD) + np.array(MEANS)
                            image = image.transpose(2, 0, 1)
                            image = image / 255
                            image = np.clip(image, -1, 1)
                            image = image[::-1]
                            return image

                        vis_data.append(convert_image(imgs_1))
                        vis_data.append(convert_image(imgs_2))
                        vis_data.append(convert_image(deform_gt))
                        vis_data.extend(
                            [convert_image(x) for x in deform_preds])

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                    elif cfg.flow.warp_mode == "flow":
                        import flowiz as fz
                        tgt_size = (64, 64)
                        vis_data = []
                        for pred_flow, _, _ in net_outs["preds_flow"]:
                            vis_data.append(pred_flow)

                        vis_data = [
                            F.interpolate(flow[:1], size=tgt_size)
                            for flow in vis_data
                        ]
                        vis_data = [
                            fz.convert_from_flow(
                                flow[0].data.cpu().numpy().transpose(
                                    1, 2, 0)).transpose(
                                        2, 0, 1).astype('float32') / 255
                            for flow in vis_data
                        ]
                        input_image = F.interpolate(images,
                                                    size=tgt_size,
                                                    mode='area')
                        input_image = input_image[0]
                        input_image = input_image.data.cpu().numpy()
                        input_image = input_image.transpose(1, 2, 0)
                        input_image = input_image * np.array(
                            STD[::-1]) + np.array(MEANS[::-1])
                        input_image = input_image.transpose(2, 0, 1)
                        input_image = input_image / 255
                        input_image = np.clip(input_image, -1, 1)
                        vis_data.append(input_image)

                        vis_data_stack = np.stack(vis_data, axis=0)
                        w.add_images("preds_flow", vis_data_stack)

                iteration += 1
                w.set_step(iteration)

                if rank == 0 and iteration % args.save_interval == 0 and iteration != args.start_iter:
                    if args.keep_latest:
                        latest = SavePath.get_latest(args.save_folder,
                                                     cfg.name)

                    logger.info('Saving state, iter: {}'.format(iteration))
                    yolact_net.save_weights(save_path(epoch, iteration))

                    if args.keep_latest and latest is not None:
                        if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval:
                            logger.info('Deleting old save...')
                            os.remove(latest)

            # This is done per epoch
            if args.validation_epoch > 0:
                if epoch % args.validation_epoch == 0 and epoch > 0:
                    if rank == 0:
                        compute_validation_map(yolact_net, val_dataset)
                    dist.barrier()

    except KeyboardInterrupt:
        if args.interrupt_no_save:
            logger.info('No save on interrupt, just exiting...')
        elif rank == 0:
            print('Stopping early. Saving network...')
            # Delete previous copy of the interrupted network so we don't spam the weights folder
            SavePath.remove_interrupt(args.save_folder)

            yolact_net.save_weights(
                save_path(epoch,
                          repr(iteration) + '_interrupt'))
        return

    if rank == 0:
        yolact_net.save_weights(save_path(epoch, iteration))
Exemple #17
0
    output_SLO_cloth = util.tensor2lo(output_SLO_cloth_, opt.label_nc_2)

    output_SFG_cloth_tps_ = torch.ones_like(SFG_cloth_tps)
    output_SFG_cloth_tps_[SLO_cloth_tps.max(1, keepdim=True)[1].expand_as(
        output_SFG_cloth_tps_) != 0] = SFG_cloth_tps[SLO_cloth_tps.max(
            1, keepdim=True)[1].expand_as(output_SFG_cloth_tps_) != 0]
    output_SFG_cloth_tps = util.tensor2im(output_SFG_cloth_tps_)
    output_SFG_cloth_ = torch.ones_like(SFG_cloth)
    output_SFG_cloth_[(
        SLO_cloth.max(1, keepdim=True)[1].expand_as(output_SFG_cloth_) != 0
    ) & (SParsing_cloth[0, -1].expand_as(output_SFG_cloth_) != 0)] = SFG_cloth[
        (SLO_cloth.max(1, keepdim=True)[1].expand_as(output_SFG_cloth_) != 0)
        & (SParsing_cloth[0, -1].expand_as(output_SFG_cloth_) != 0)]
    output_SFG_cloth = util.tensor2im(output_SFG_cloth_)
    output_SLO_cloth_stage2 = util.tensor2lo(SLO_cloth, opt.label_nc_2)
    output_flow_tps = fz.convert_from_flow(Flow_tps[0].permute(
        1, 2, 0).cpu().numpy())
    output_flow_dense = fz.convert_from_flow(Flow_dense[0].permute(
        1, 2, 0).cpu().numpy())
    output_flow = fz.convert_from_flow(Flow[0].permute(1, 2, 0).cpu().numpy())

    output_SFG_ = torch.ones_like(SFG)
    output_SFG_[
        output_SLO_.max(0, keepdim=True)[1].expand_as(output_SFG_) != 0] = SFG[
            output_SLO_.max(0, keepdim=True)[1].expand_as(output_SFG_) != 0]
    output_SFG = util.tensor2im(output_SFG_)
    output_SFG_res = util.tensor2im(SFG_res)
    SFG_res[output_SLO_[1:opt.label_nc_2].sum(dim=0, keepdim=True).expand_as(
        SFG_res) != 0] = 1
    output_SFG_res_uncloth = util.tensor2im(SFG_res)
    output_sd = util.tensor2im(SD)
    output_SI = util.tensor2im(SI)