예제 #1
0
def run_model(args, checkpoint, output_dir, loader=None):
  vocab = checkpoint['model_kwargs']['vocab']
  model = build_model(args, checkpoint)
  if loader is None:
    loader = build_loader(args, checkpoint)

  img_dir = makedir(output_dir, 'images')
  graph_dir = makedir(output_dir, 'graphs', args.save_graphs)
  gt_img_dir = makedir(output_dir, 'images_gt', args.save_gt_imgs)
  data_path = os.path.join(output_dir, 'data.pt')

  data = {
    'vocab': vocab,
    'objs': [],
    'masks_pred': [],
    'boxes_pred': [],
    'masks_gt': [],
    'boxes_gt': [],
    'filenames': [],
  }


  if args.model == 'bm_FH':
      FH_dir_train = args.FH_dir_train
      FH_dir_val = args.FH_dir_val
  if args.model == 'bm_FHrec':
      FH_dir_train = args.FHrec_dir_train
      FH_dir_val = args.FHrec_dir_val
      
  if args.model == 'bm_FH64':
      FH_dir_train = args.FH64_dir_train
      FH_dir_val = args.FH64_dir_val
  if args.model == 'bm_FHrec64':
      FH_dir_train = args.FHrec64_dir_train
      FH_dir_val = args.FHrec64_dir_val      



  FH_objs_train, FH_edges_train, IDs_train = torch.load(FH_dir_train)
  FH_objs_val, FH_edges_val, IDs_val = torch.load(FH_dir_val)
  IDs_train = torch.tensor(IDs_train)
  IDs_val = torch.tensor(IDs_val)
  if args.which_data == 'train':
      IDs = IDs_train
      FH_objs = FH_objs_train
      FH_edges = FH_edges_train
  else:
      IDs = IDs_val
      FH_objs = FH_objs_val
      FH_edges = FH_edges_val


  img_idx = 0
  for batch in loader:
    masks = None
    if len(batch) == 6:
      imgs, objs, boxes, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch]
    elif len(batch) == 7:
      imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch]
      
    imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [x.cuda() for x in batch]
    
    
    #  get FH by images within a batch
    fh_obj, fh_edge = [],[]
    for i in range(imgs_ids.shape[0]):
          idd = ((IDs == imgs_ids[i].item()).nonzero())
          fh_obj_i = FH_objs[idd]
          fh_obj.append(fh_obj_i)
          
          fh_edge_i = FH_edges[idd]
          fh_edge.append(fh_edge_i)
          
    fh_obj = torch.cat(fh_obj)    
    fh_edge = torch.cat(fh_edge)     
    
    

    imgs_gt = imagenet_deprocess_batch(imgs)
    boxes_gt = None
    masks_gt = None
    if args.use_gt_boxes:
      boxes_gt = boxes
    if args.use_gt_masks:
      masks_gt = masks

    # Run the model with predicted masks
    model_out = model(objs, triples, fh_obj, fh_edge, obj_to_img, 
                          boxes_gt=boxes_gt, masks_gt=masks_gt)
    
    # model_out = model(objs, triples, obj_to_img,
    #                   boxes_gt=boxes_gt, masks_gt=masks_gt)
    
    imgs_pred, boxes_pred, masks_pred, _ = model_out
    imgs_pred = imagenet_deprocess_batch(imgs_pred)

    obj_data = [objs, boxes_pred, masks_pred]
    _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                    triple_to_img)
    objs, boxes_pred, masks_pred = obj_data

    obj_data_gt = [boxes.data]
    if masks is not None:
      obj_data_gt.append(masks.data)
    triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                       obj_to_img, triple_to_img)
    boxes_gt, masks_gt = obj_data_gt[0], None
    if masks is not None:
      masks_gt = obj_data_gt[1]

    for i in range(imgs_pred.size(0)):
      img_filename = '%04d.png' % img_idx
      if args.save_gt_imgs:
        img_gt = imgs_gt[i].numpy().transpose(1, 2, 0)
        img_gt_path = os.path.join(gt_img_dir, img_filename)
        imsave(img_gt_path, img_gt)

      img_pred = imgs_pred[i]
      img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0)
      img_path = os.path.join(img_dir, img_filename)
      imsave(img_path, img_pred_np)

      data['objs'].append(objs[i].cpu().clone())
      data['masks_pred'].append(masks_pred[i].cpu().clone())
      data['boxes_pred'].append(boxes_pred[i].cpu().clone())
      data['boxes_gt'].append(boxes_gt[i].cpu().clone())
      data['filenames'].append(img_filename)

      cur_masks_gt = None
      if masks_gt is not None:
        cur_masks_gt = masks_gt[i].cpu().clone()
      data['masks_gt'].append(cur_masks_gt)

      if args.save_graphs:
        graph_img = draw_scene_graph(vocab, objs[i], triples[i])
        graph_path = os.path.join(graph_dir, img_filename)
        imsave(graph_path, graph_img)
      
      img_idx += 1

    torch.save(data, data_path)
    print('Saved %d images' % img_idx)
예제 #2
0
def check_model(args, t, loader, model):
    float_dtype = torch.cuda.FloatTensor
    long_dtype = torch.cuda.LongTensor
    num_samples = 0
    all_losses = defaultdict(list)
    total_iou = 0
    total_boxes = 0
    with torch.no_grad():
        for batch in loader:
            batch = [tensor.cuda() for tensor in batch]
            masks = None
            if len(batch) == 6:
                imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch
            elif len(batch) == 7:
                imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch
            predicates = triples[:, 1]

            # Run the model as it has been run during training
            model_masks = masks
            model_out = model(objs,
                              triples,
                              obj_to_img,
                              boxes_gt=boxes,
                              masks_gt=model_masks)
            imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out

            skip_pixel_loss = False
            total_loss, losses = calculate_model_losses(
                args, skip_pixel_loss, model, imgs, imgs_pred, boxes,
                boxes_pred, masks, masks_pred, predicates, predicate_scores)

            total_iou += jaccard(boxes_pred, boxes)
            total_boxes += boxes_pred.size(0)

            for loss_name, loss_val in losses.items():
                all_losses[loss_name].append(loss_val)
            num_samples += imgs.size(0)
            if num_samples >= args.num_val_samples:
                break

        samples = {}
        samples['gt_img'] = imgs

        model_out = model(objs,
                          triples,
                          obj_to_img,
                          boxes_gt=boxes,
                          masks_gt=masks)
        samples['gt_box_gt_mask'] = model_out[0]

        model_out = model(objs, triples, obj_to_img, boxes_gt=boxes)
        samples['gt_box_pred_mask'] = model_out[0]

        model_out = model(objs, triples, obj_to_img)
        samples['pred_box_pred_mask'] = model_out[0]

        for k, v in samples.items():
            samples[k] = imagenet_deprocess_batch(v)

        mean_losses = {k: np.mean(v) for k, v in all_losses.items()}
        avg_iou = total_iou / total_boxes

        masks_to_store = masks
        if masks_to_store is not None:
            masks_to_store = masks_to_store.data.cpu().clone()

        masks_pred_to_store = masks_pred
        if masks_pred_to_store is not None:
            masks_pred_to_store = masks_pred_to_store.data.cpu().clone()

    batch_data = {
        'objs': objs.detach().cpu().clone(),
        'boxes_gt': boxes.detach().cpu().clone(),
        'masks_gt': masks_to_store,
        'triples': triples.detach().cpu().clone(),
        'obj_to_img': obj_to_img.detach().cpu().clone(),
        'triple_to_img': triple_to_img.detach().cpu().clone(),
        'boxes_pred': boxes_pred.detach().cpu().clone(),
        'masks_pred': masks_pred_to_store
    }
    out = [mean_losses, samples, batch_data, avg_iou]

    return tuple(out)
예제 #3
0
def run_model(args, checkpoint, output_dir, fn, loader=None):
    vocab = checkpoint['model_kwargs']['vocab']
    print(vocab.keys())
    print(vocab['pred_name_to_idx'])
    dic_pred = vocab[
        'pred_name_to_idx']  #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6}

    model = build_model(args, checkpoint)
    if loader is None:
        loader = build_loader(args, checkpoint)

    data = {
        'vocab': vocab,
        'objs': [],
        'masks_pred': [],
        'boxes_pred': [],
        'masks_gt': [],
        'boxes_gt': [],
        'filenames': [],
    }
    which_data = args.which_data
    makedir(output_dir, which_data)
    FN_path = os.path.join(output_dir, args.which_data,
                           args.model + '_FN_%s.csv' % fn)
    f = open(FN_path, 'w')
    #  f = open('err_bm_FH_%s.csv'%fn, 'w')
    with f:
        fieldnames = [
            'imageID', 'left of', 'right of', 'above', 'below', 'inside',
            'surrounding'
        ]
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

    class2idx = {
        "left of": 0,
        "right of": 1,
        "above": 2,
        "below": 3,
        "inside": 4,
        "surrounding": 5
    }

    idx2class = {v: k for k, v in class2idx.items()}

    #  save_dir = makedir(save_dir,fn)
    count_edge_gt = []
    count_edge_pre = []
    img_idx = 0
    ibatch = 0
    for batch in loader:
        ibatch += 1
        masks = None
        #    if len(batch) == 6:
        #      imgs, objs, boxes, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch]
        #    elif len(batch) == 7:
        #      imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch]
        imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [
            x.cuda() for x in batch
        ]

        imgs_gt = imagenet_deprocess_batch(imgs)
        boxes_gt = None
        masks_gt = None
        if args.use_gt_boxes:
            boxes_gt = boxes
        if args.use_gt_masks:
            masks_gt = masks

        # Run the model with predicted masks
        model_out = model(objs,
                          triples,
                          obj_to_img,
                          boxes_gt=boxes_gt,
                          masks_gt=masks_gt)
        boxes_pred, masks_pred = model_out

        obj_data = [objs, boxes_pred, masks_pred]
        _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                        triple_to_img)
        objs, boxes_pred, masks_pred = obj_data

        obj_data_gt = [boxes.data]
        if masks is not None:
            obj_data_gt.append(masks.data)
        triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                                 obj_to_img, triple_to_img)
        boxes_gt, masks_gt = obj_data_gt[0], None
        if masks is not None:
            masks_gt = obj_data_gt[1]

        for i in range(imgs_gt.size(0)):
            # for edges
            triples_i = triples[i]
            img_id = imgs_ids[i].item()
            i_edge_gt, i_edge_pre = [], []
            for k in range(triples_i.shape[0]):

                if (triples_i[k][1] != 0):

                    idx_s, idx_o = triples_i[k][0], triples_i[k][2]

                    bbxs_of_img = boxes_gt[i]
                    masks_of_img = masks_gt[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    edge_gt = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_gt.append(edge_gt)
                    i_edge_gt.append(edge_gt)

                    bbxs_of_img = boxes_pred[i]
                    masks_of_img = masks_pred[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    mask_s, mask_o = torch.round(mask_s).type(
                        torch.long), torch.round(mask_o).type(torch.long)
                    edge_pre = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_pre.append(edge_pre)
                    i_edge_pre.append(edge_pre)

            edges_items = list(set(i_edge_gt + i_edge_pre))
            edges_items = [idx2class[x - 1] for x in edges_items]
            dictOfclass = {
                i: edges_items[i]
                for i in range(0, len(edges_items))
            }

            cmi = confusion_matrix(i_edge_pre,
                                   i_edge_gt)  #   axis y predicted axis x true

            total = cmi.sum(axis=1)
            numacc = [cmi[i][i] for i in range(cmi.shape[0])]
            numFP = total - numacc
            cmi = cmi / cmi.sum(axis=0)
            acci = [cmi[i][i] for i in range(cmi.shape[0])]

            rowFP, rowAcc = {'imageID': img_id}, {'imageID': img_id}
            for q in range(len(dictOfclass)):
                rowFP.update({dictOfclass[q]: numFP[q]})
                rowAcc.update({dictOfclass[q]: acci[q]})

            img_idx += 1

            FN_path = os.path.join(output_dir, args.which_data,
                                   args.model + '_FN_%s.csv' % fn)
            f = open(FN_path, 'a')
            with f:
                #          'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surounding'
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writerow(rowFP)

        print('%d images' % img_idx)


#    break

    print('gt', len(count_edge_gt))
    print('pre', len(count_edge_pre))
    cm = confusion_matrix(count_edge_pre, count_edge_gt)
    cm = cm / cm.sum(axis=0)
    confusion_matrix_df = pd.DataFrame(
        cm)  #.rename(columns=idx2class, index=idx2class)
    print(confusion_matrix_df)
    label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'}

    acc = [
        confusion_matrix_df[i][i] for i in range(confusion_matrix_df.shape[0])
    ]
    print('acc', acc)

    edgenames = [
        'left of', 'right of', 'above', 'below', 'inside', 'surrounding'
    ]
    accTotal = {'model': label[fn]}
    for q in range(0, len(acc)):
        accTotal.update({edgenames[q]: acc[q]})

    err_path = os.path.join(output_dir, args.which_data,
                            args.model + '_acc.csv')
    fil = open(err_path, 'a')

    with fil:
        fieldnames = [
            'model', 'left of', 'right of', 'above', 'below', 'inside',
            'surrounding'
        ]

        writer = csv.DictWriter(fil, fieldnames=fieldnames)
        writer.writerow(accTotal)

    print('over')
예제 #4
0
def main(args):
    #print(args)
    check_args(args)
    float_dtype = torch.cuda.FloatTensor
    long_dtype = torch.cuda.LongTensor

    vocab, train_loader, val_loader = build_loaders(args)
    model, model_kwargs = build_model(args, vocab)
    model.type(float_dtype)
    model = model.cuda()

    layoutgen = LayoutGenerator(args.batch_size,
                                args.max_objects_per_image + 1, 184).cuda()

    if (not os.path.exists(args.output_folder)):
        os.makedirs(args.output_folder)

    if (args.checkpoint_start_from is not None):
        model_path = args.checkpoint_start_from

        checkpoint = torch.load(model_path)

        model.load_state_dict(checkpoint['model_state'])
        layoutgen.load_state_dict(checkpoint['layout_gen'])

    num_samples = 0
    for batchnum, batch in enumerate(tqdm(val_loader)):
        imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, combined, all_num_objs = batch
        imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, combined, all_num_objs = imgs.cuda(
        ), objs.cuda(), boxes.cuda(
        ), masks.cuda(), triples.cuda(), obj_to_img.cuda(), triple_to_img.cuda(
        ), combined.cuda(), all_num_objs.cuda()

        for k in range(2):
            zlist = []
            for i in range(args.batch_size):
                geo_z = torch.normal(0,
                                     1,
                                     size=(args.max_objects_per_image + 1, 4))
                z = torch.FloatTensor(geo_z)
                zlist.append(z)

            zlist = torch.stack(zlist).cuda()
            zlist = torch.cat((zlist, combined[:, :, 4:]), dim=2)

            feature_vectors, logit_boxes = layoutgen(zlist.cuda())
            generated_boxes = 1 / (1 + torch.exp(-1 * logit_boxes))

            new_gen_boxes = torch.empty((0, 4)).cuda()
            new_feature_vecs = torch.empty((0, args.embedding_dim)).cuda()
            #print(generated_boxes[0,:,:4])

            for kb in range(args.batch_size):
                new_gen_boxes = torch.cat([
                    new_gen_boxes,
                    torch.squeeze(generated_boxes[kb, :all_num_objs[kb], :4])
                ],
                                          dim=0)
                new_feature_vecs = torch.cat([
                    new_feature_vecs,
                    torch.squeeze(feature_vectors[kb, :all_num_objs[kb], :])
                ],
                                             dim=0)

            boxes_pred = new_gen_boxes

            triples = None
            imgs_pred = model(new_feature_vecs, new_gen_boxes, triples,
                              obj_to_img)
            imgs_pred = imagenet_deprocess_batch(imgs_pred)

            for idx in range(imgs_pred.shape[0]):
                current_img = imgs_pred[idx, :, :, :].numpy().transpose(
                    1, 2, 0)
                cv2.imwrite(
                    os.path.join(
                        args.output_folder,
                        str(batchnum) + '_' + str(k) + '_' + str(idx) +
                        '.jpg'), current_img)

            num_samples += 1

        if (num_samples + 1 >= args.num_sample_imgs):
            break
예제 #5
0
def run_model(args, checkpoint, output_dir, fn, loader=None):
    vocab = checkpoint['model_kwargs']['vocab']
    print(vocab.keys())
    print(vocab['pred_name_to_idx'])
    dic_pred = vocab[
        'pred_name_to_idx']  #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6}

    model = build_model(args, checkpoint)
    if loader is None:
        loader = build_loader(args, checkpoint)

    data = {
        'vocab': vocab,
        'objs': [],
        'masks_pred': [],
        'boxes_pred': [],
        'masks_gt': [],
        'boxes_gt': [],
        'filenames': [],
    }
    which_data = args.which_data
    #  save_dir = makedir(output_dir, args.which_data)

    FN_path = os.path.join(output_dir, args.which_data,
                           args.model + '_FN_%s.csv' % fn)
    f = open(FN_path, 'w')
    #  f = open('err_bm_FH_%s.csv'%fn, 'w')
    with f:
        fieldnames = [
            'imageID', 'left of', 'right of', 'above', 'below', 'inside',
            'surrounding'
        ]
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

    if args.model == 'bm_FH':
        FH_dir_train = args.FH_dir_train
        FH_dir_val = args.FH_dir_val
    if args.model == 'bm_FHrec':
        FH_dir_train = args.FHrec_dir_train
        FH_dir_val = args.FHrec_dir_val

    FH_objs_train, FH_edges_train, IDs_train = torch.load(
        FH_dir_train)  #torch.load('dataFH/train_FH.npy')
    FH_objs_val, FH_edges_val, IDs_val = torch.load(
        FH_dir_val)  #torch.load('dataFH/val_FH.npy')
    IDs_train = torch.tensor(IDs_train)
    IDs_val = torch.tensor(IDs_val)
    if args.which_data == 'train':
        IDs = IDs_train
        FH_objs = FH_objs_train
        FH_edges = FH_edges_train
    else:
        IDs = IDs_val
        FH_objs = FH_objs_val
        FH_edges = FH_edges_val

    class2idx = {
        "left of": 0,
        "right of": 1,
        "above": 2,
        "below": 3,
        "inside": 4,
        "surrounding": 5
    }

    idx2class = {v: k for k, v in class2idx.items()}
    count_edge_gt = []
    count_edge_pre = []
    img_idx = 0
    ibatch = 0
    for batch in loader:
        ibatch += 1
        masks = None
        imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [
            x.cuda() for x in batch
        ]

        #  get FH by images within a batch
        fh_obj, fh_edge = [], []
        for i in range(imgs_ids.shape[0]):
            idd = ((IDs == imgs_ids[i].item()).nonzero())
            fh_obj_i = FH_objs[idd]
            fh_obj.append(fh_obj_i)

            fh_edge_i = FH_edges[idd]
            fh_edge.append(fh_edge_i)

        fh_obj = torch.cat(fh_obj)
        fh_edge = torch.cat(fh_edge)

        imgs_gt = imagenet_deprocess_batch(imgs)
        boxes_gt = None
        masks_gt = None
        if args.use_gt_boxes:
            boxes_gt = boxes
        if args.use_gt_masks:
            masks_gt = masks

        # Run the model with predicted masks

        model_out = model(objs,
                          triples,
                          fh_obj,
                          fh_edge,
                          obj_to_img,
                          boxes_gt=boxes_gt,
                          masks_gt=masks_gt)
        boxes_pred, masks_pred = model_out

        obj_data = [objs, boxes_pred, masks_pred]
        _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                        triple_to_img)
        objs, boxes_pred, masks_pred = obj_data

        obj_data_gt = [boxes.data]
        if masks is not None:
            obj_data_gt.append(masks.data)
        triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                                 obj_to_img, triple_to_img)
        boxes_gt, masks_gt = obj_data_gt[0], None
        if masks is not None:
            masks_gt = obj_data_gt[1]

        for i in range(imgs_gt.size(0)):
            # for edges
            triples_i = triples[i]
            img_id = imgs_ids[i].item()
            i_edge_gt, i_edge_pre = [], []
            for k in range(triples_i.shape[0]):

                if (triples_i[k][1] != 0):

                    idx_s, idx_o = triples_i[k][0], triples_i[k][2]

                    bbxs_of_img = boxes_gt[i]
                    masks_of_img = masks_gt[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    edge_gt = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_gt.append(edge_gt)
                    i_edge_gt.append(edge_gt)

                    bbxs_of_img = boxes_pred[i]
                    masks_of_img = masks_pred[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    mask_s, mask_o = torch.round(mask_s).type(
                        torch.long), torch.round(mask_o).type(torch.long)
                    edge_pre = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_pre.append(edge_pre)
                    i_edge_pre.append(edge_pre)

            edges_items = list(set(i_edge_gt + i_edge_pre))
            #      print(i_edge_pre, i_edge_gt,edges_items)
            edges_items = [idx2class[x - 1] for x in edges_items]
            #      print(edges_items)
            dictOfclass = {
                i: edges_items[i]
                for i in range(0, len(edges_items))
            }
            #      edges_items = {k:idx2class[edges_items[k]-1] for k in range(len(edges_items))}
            #      print(edges_items)

            cmi = confusion_matrix(i_edge_pre,
                                   i_edge_gt)  #   axis y predicted axis x true
            #      confusion_matrix_df = pd.DataFrame(cmi).rename(columns=dictOfclass, index = dictOfclass)#idx2class, index=idx2class)
            #      print(confusion_matrix_df)
            #      FP
            #      print(cmi)
            total = cmi.sum(axis=1)
            numacc = [cmi[i][i] for i in range(cmi.shape[0])]
            numFP = total - numacc
            #      acc
            cmi = cmi / cmi.sum(axis=0)
            #      print(cmi)
            acci = [cmi[i][i] for i in range(cmi.shape[0])]

            rowFP, rowAcc = {'imageID': img_id}, {'imageID': img_id}
            for q in range(len(dictOfclass)):
                rowFP.update({dictOfclass[q]: numFP[q]})
                rowAcc.update({dictOfclass[q]: acci[q]})

#      print(rowi)
            img_idx += 1

            FN_path = os.path.join(output_dir, args.which_data,
                                   args.model + '_FN_%s.csv' % fn)
            f = open(FN_path, 'a')
            #      f = open('err_bm_FH_%s.csv'%fn, 'a')
            with f:
                #          'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surounding'
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                #          writer = csv.writer(f)
                writer.writerow(rowFP)
#          writer.writerow(rowAcc)
#          writer.writerow([img_id]+acci)

        print('%d images' % img_idx)


#    break

    print('gt', len(count_edge_gt))
    print('pre', len(count_edge_pre))
    cm = confusion_matrix(count_edge_pre, count_edge_gt)
    cm = cm / cm.sum(axis=0)
    confusion_matrix_df = pd.DataFrame(
        cm)  #.rename(columns=idx2class, index=idx2class)
    print(confusion_matrix_df)
    label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'}

    acc = [
        confusion_matrix_df[i][i] for i in range(confusion_matrix_df.shape[0])
    ]
    print('acc', acc)
    #  np.savetxt('scores.csv', acc, delimiter=',', fmt='%s')

    #  fn = 'a'
    #  filename = 'accuracy_' + args.model + '_' + fn + '_'  + '.txt'
    #  err_path = os.path.join(output_dir,args.which_data, filename)
    #  np.savetxt(err_path, acc, delimiter=',', fmt='%s')s
    #  np.savetxt(err_path, [p for p in zip(class2idx.keys(), acc)], delimiter=',', fmt='%s')

    #  print(['bm_FH_'+fn]+acc)
    #  save in total
    edgenames = [
        'left of', 'right of', 'above', 'below', 'inside', 'surrounding'
    ]
    accTotal = {'model': label[fn]}
    for q in range(0, len(acc)):
        accTotal.update({edgenames[q]: acc[q]})

    err_path = os.path.join(output_dir, args.which_data,
                            args.model + '_acc.csv')
    fil = open(err_path, 'a')

    with fil:
        fieldnames = [
            'model', 'left of', 'right of', 'above', 'below', 'inside',
            'surrounding'
        ]

        writer = csv.DictWriter(fil, fieldnames=fieldnames)
        writer.writerow(accTotal)

    print('over')
예제 #6
0
def main(args):
    print(args)
    check_args(args)
    float_dtype = torch.cuda.FloatTensor
    long_dtype = torch.cuda.LongTensor

    vocab, train_loader, val_loader = build_loaders(args)
    print("vocab")
    print(vocab)
    obj_dict = vocab['object_name_to_idx']
    print("vocab['object_name_to_idx']")
    print(vocab['object_name_to_idx'])
    model, model_kwargs = build_model(args, vocab)
    model.type(float_dtype)
    print(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    obj_discriminator, d_obj_kwargs = build_obj_discriminator(args, vocab)
    img_discriminator, d_img_kwargs = build_img_discriminator(args, vocab)
    gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type)

    if obj_discriminator is not None:
        obj_discriminator.type(float_dtype)
        obj_discriminator.train()
        print(obj_discriminator)
        optimizer_d_obj = torch.optim.Adam(obj_discriminator.parameters(),
                                           lr=args.learning_rate)

    if img_discriminator is not None:
        img_discriminator.type(float_dtype)
        img_discriminator.train()
        print(img_discriminator)
        optimizer_d_img = torch.optim.Adam(img_discriminator.parameters(),
                                           lr=args.learning_rate)

    restore_path = './'

    # if args.restore_from_checkpoint:
    #   restore_path = '%s_with_model.pt' % args.checkpoint_name
    #   restore_path = os.path.join(args.output_dir, restore_path)
    restore_path = './sg2im-models/vg128.pt'
    if restore_path is not None and os.path.isfile(restore_path):
        print('Restoring from checkpoint:')
        print(restore_path)
        checkpoint = torch.load(restore_path)
        model.load_state_dict(checkpoint['model_state'])
        #optimizer.load_state_dict(checkpoint['optim_state'])

        # if obj_discriminator is not None:
        #   obj_discriminator.load_state_dict(checkpoint['d_obj_state'])
        #   optimizer_d_obj.load_state_dict(checkpoint['d_obj_optim_state'])

        #   if img_discriminator is not None:
        #     img_discriminator.load_state_dict(checkpoint['d_img_state'])
        #     optimizer_d_img.load_state_dict(checkpoint['d_img_optim_state'])

        #   t = checkpoint['counters']['t']
        #   if 0 <= args.eval_mode_after <= t:
        #     model.eval()
        #   else:
        #     model.train()
        #   epoch = checkpoint['counters']['epoch']
        # else:
        #   t, epoch = 0, 0
        #   checkpoint = {
        #     'args': args.__dict__,
        #     'vocab': vocab,
        #     'model_kwargs': model_kwargs,
        #     'd_obj_kwargs': d_obj_kwargs,
        #     'd_img_kwargs': d_img_kwargs,
        #     'losses_ts': [],
        #     'losses': defaultdict(list),
        #     'd_losses': defaultdict(list),
        #     'checkpoint_ts': [],
        #     'train_batch_data': [],
        #     'train_samples': [],
        #     'train_iou': [],
        #     'val_batch_data': [],
        #     'val_samples': [],
        #     'val_losses': defaultdict(list),
        #     'val_iou': [],
        #     'norm_d': [],
        #     'norm_g': [],
        #     'counters': {
        #       't': None,
        #       'epoch': None,
        #     },
        #     'model_state': None, 'model_best_state': None, 'optim_state': None,
        #     'd_obj_state': None, 'd_obj_best_state': None, 'd_obj_optim_state': None,
        #     'd_img_state': None, 'd_img_best_state': None, 'd_img_optim_state': None,
        #     'best_t': [],
        #   }

        # while True:
        #   if t >= args.num_iterations:
        #     break
        #   epoch += 1
        #   print('Starting epoch %d' % epoch)

        # for batch in train_loader:
        #   if t == args.eval_mode_after:
        #     print('switching to eval mode')
        #     model.eval()
        #     optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
        #   t += 1
        #   batch = [tensor.cuda() for tensor in batch]
        #   masks = None
        #   if len(batch) == 6:
        #     imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch
        #   elif len(batch) == 7:
        #     imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch
        #   else:
        #     assert False
        #   predicates = triples[:, 1]

        #   with timeit('forward', args.timing):
        #     model_boxes = boxes

        #     model_masks = masks
        #     model_out = model(objs, triples, obj_to_img,
        #                       boxes_gt=model_boxes, masks_gt=model_masks)
        #     imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out
        #   with timeit('loss', args.timing):
        #     # Skip the pixel loss if using GT boxes
        #     skip_pixel_loss = (model_boxes is None)
        #     total_loss, losses =  calculate_model_losses(
        #                             args, skip_pixel_loss, model, imgs, imgs_pred,
        #                             boxes, boxes_pred, masks, masks_pred,
        #                             predicates, predicate_scores)

        #   if obj_discriminator is not None:
        #     scores_fake, ac_loss = obj_discriminator(imgs_pred, objs, boxes, obj_to_img)
        #     total_loss = add_loss(total_loss, ac_loss, losses, 'ac_loss',
        #                           args.ac_loss_weight)
        #     weight = args.discriminator_loss_weight * args.d_obj_weight
        #     total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses,
        #                           'g_gan_obj_loss', weight)

        #   if img_discriminator is not None:
        #     scores_fake = img_discriminator(imgs_pred)
        #     weight = args.discriminator_loss_weight * args.d_img_weight
        #     total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses,
        #                           'g_gan_img_loss', weight)

        #   losses['total_loss'] = total_loss.item()
        #   if not math.isfinite(losses['total_loss']):
        #     print('WARNING: Got loss = NaN, not backpropping')
        #     continue

        #   optimizer.zero_grad()
        #   with timeit('backward', args.timing):
        #     total_loss.backward()
        #   optimizer.step()
        #   total_loss_d = None
        #   ac_loss_real = None
        #   ac_loss_fake = None
        #   d_losses = {}

        #   if obj_discriminator is not None:
        #     d_obj_losses = LossManager()
        #     imgs_fake = imgs_pred.detach()
        #     scores_fake, ac_loss_fake = obj_discriminator(imgs_fake, objs, boxes, obj_to_img)
        #     scores_real, ac_loss_real = obj_discriminator(imgs, objs, boxes, obj_to_img)

        #     d_obj_gan_loss = gan_d_loss(scores_real, scores_fake)
        #     d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss')
        #     d_obj_losses.add_loss(ac_loss_real, 'd_ac_loss_real')
        #     d_obj_losses.add_loss(ac_loss_fake, 'd_ac_loss_fake')

        #     optimizer_d_obj.zero_grad()
        #     d_obj_losses.total_loss.backward()
        #     optimizer_d_obj.step()

        #   if img_discriminator is not None:
        #     d_img_losses = LossManager()
        #     imgs_fake = imgs_pred.detach()
        #     scores_fake = img_discriminator(imgs_fake)
        #     scores_real = img_discriminator(imgs)

        #     d_img_gan_loss = gan_d_loss(scores_real, scores_fake)
        #     d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss')

        #     optimizer_d_img.zero_grad()
        #     d_img_losses.total_loss.backward()
        #     optimizer_d_img.step()

        #   if t % args.print_every == 0:
        #     print('t = %d / %d' % (t, args.num_iterations))
        #     for name, val in losses.items():
        #       print(' G [%s]: %.4f' % (name, val))
        #       checkpoint['losses'][name].append(val)
        #     checkpoint['losses_ts'].append(t)

        #     if obj_discriminator is not None:
        #       for name, val in d_obj_losses.items():
        #         print(' D_obj [%s]: %.4f' % (name, val))
        #         checkpoint['d_losses'][name].append(val)

        #     if img_discriminator is not None:
        #       for name, val in d_img_losses.items():
        #         print(' D_img [%s]: %.4f' % (name, val))
        #         checkpoint['d_losses'][name].append(val)

        #   if t % args.checkpoint_every == 0:
        #     print('checking on train')
        #     train_results = check_model(args, t, train_loader, model)
        #     t_losses, t_samples, t_batch_data, t_avg_iou = train_results

        #     checkpoint['train_batch_data'].append(t_batch_data)
        #     checkpoint['train_samples'].append(t_samples)
        #     checkpoint['checkpoint_ts'].append(t)
        #     checkpoint['train_iou'].append(t_avg_iou)

        #     print('checking on val')
        #     val_results = check_model(args, t, val_loader, model)
        #     val_losses, val_samples, val_batch_data, val_avg_iou = val_results
        #     checkpoint['val_samples'].append(val_samples)
        #     checkpoint['val_batch_data'].append(val_batch_data)
        #     checkpoint['val_iou'].append(val_avg_iou)

        #     print('train iou: ', t_avg_iou)
        #     print('val iou: ', val_avg_iou)

        #     for k, v in val_losses.items():
        #       checkpoint['val_losses'][k].append(v)
        #     checkpoint['model_state'] = model.state_dict()

        #     if obj_discriminator is not None:
        #       checkpoint['d_obj_state'] = obj_discriminator.state_dict()
        #       checkpoint['d_obj_optim_state'] = optimizer_d_obj.state_dict()

        #     if img_discriminator is not None:
        #       checkpoint['d_img_state'] = img_discriminator.state_dict()
        #       checkpoint['d_img_optim_state'] = optimizer_d_img.state_dict()

        #     checkpoint['optim_state'] = optimizer.state_dict()
        #     checkpoint['counters']['t'] = t
        #     checkpoint['counters']['epoch'] = epoch
        #     checkpoint_path = os.path.join(args.output_dir,
        #                           '%s_with_model.pt' % args.checkpoint_name)
        #     print('Saving checkpoint to ', checkpoint_path)
        #     torch.save(checkpoint, checkpoint_path)

        #     # Save another checkpoint without any model or optim state
        #     checkpoint_path = os.path.join(args.output_dir,
        #                           '%s_no_model.pt' % args.checkpoint_name)
        #     key_blacklist = ['model_state', 'optim_state', 'model_best_state',
        #                      'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state',
        #                      'd_img_state', 'd_img_optim_state', 'd_img_best_state']
        #     small_checkpoint = {}
        #     for k, v in checkpoint.items():
        #       if k not in key_blacklist:
        #         small_checkpoint[k] = v
        #     torch.save(small_checkpoint, checkpoint_path)
        batch_index = 0
        print('switching to eval mode')
        model.eval()
        for val_batch in val_loader:
            batch_index += 1
            print("val_batch_index: ", batch_index)

            val_batch = [tensor.cuda() for tensor in val_batch]
            val_masks = None
            if len(val_batch) == 6:
                val_imgs, val_objs, val_boxes, val_triples, val_obj_to_img, val_triple_to_img = val_batch
            elif len(val_batch) == 7:
                val_imgs, val_objs, val_boxes, val_masks, val_triples, val_obj_to_img, val_triple_to_img = val_batch
            else:
                assert False
            predicates = val_triples[:, 1]

            with timeit('forward', args.timing):
                val_model_boxes = val_boxes

                val_model_out = model(val_objs,
                                      val_triples,
                                      val_obj_to_img,
                                      boxes_gt=val_model_boxes,
                                      masks_gt=val_masks)
                val_imgs_pred, val_boxes_pred, val_masks_pred, val_predicate_scores = val_model_out

                val_imgs = imagenet_deprocess_batch(val_imgs_pred)

                output_img_dir = "./output_batch"

                if not os.path.exists(output_img_dir):
                    os.makedirs(output_img_dir)

                print("label: ")
                print(val_objs.shape[0])
                print(val_objs)

                object_name_list = []
                for label_index in range(val_objs.shape[0]):
                    object_index = val_objs[label_index].cpu().data.numpy()

                    object_name = list(obj_dict.keys())[list(
                        obj_dict.values()).index(object_index)]
                    object_name_list.append(object_name)
                    #print("val_objs[label_index]", val_objs[label_index].cpu().data.numpy())
                    #print("object_name: ", object_name)
                print(object_name_list)
                print("val_obj_to_img")
                print(val_obj_to_img)
                print("gt_boxes: ", val_model_boxes.shape)
                print(val_model_boxes)
                # Save the generated images
                for img_index in range(val_imgs.shape[0]):
                    img_np = val_imgs[img_index].numpy().transpose(1, 2, 0)
                    img_path = os.path.join(
                        output_img_dir,
                        'img_{}_{}.png'.format('%04d' % batch_index,
                                               '%03d' % img_index))
                    cv2.imwrite(img_path, img_np)

                #print("val_imgs_pred.shape: ", val_imgs_pred.shape)
                raise Exception("hahha, gonna save val_imgs_pred")
예제 #7
0
def run_model(args, checkpoint, output_dir, fn, loader=None):
    vocab = checkpoint['model_kwargs']['vocab']
    print(vocab.keys())
    print(vocab['pred_name_to_idx'])
    dic_pred = vocab[
        'pred_name_to_idx']  #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6}

    model = build_model(args, checkpoint)
    if loader is None:
        loader = build_loader(args, checkpoint)

    data = {
        'vocab': vocab,
        'objs': [],
        'masks_pred': [],
        'boxes_pred': [],
        'masks_gt': [],
        'boxes_gt': [],
        'filenames': [],
    }
    which_data = args.which_data
    save_dir = makedir(output_dir, which_data)
    FH_objs_train, FH_edges_train, IDs_train = torch.load(
        args.FH_dir_train)  #torch.load('dataFH/train_FH.npy')
    FH_objs_val, FH_edges_val, IDs_val = torch.load(
        args.FH_dir_val)  #torch.load('dataFH/val_FH.npy')
    IDs_train = torch.tensor(IDs_train)
    IDs_val = torch.tensor(IDs_val)
    if args.which_data == 'train':
        IDs = IDs_train
        FH_objs = FH_objs_train
        FH_edges = FH_edges_train
    else:
        IDs = IDs_val
        FH_objs = FH_objs_val
        FH_edges = FH_edges_val

    count_edge_gt = []
    count_edge_pre = []
    img_idx = 0
    ibatch = 0
    for batch in loader:
        ibatch += 1
        masks = None
        imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [
            x.cuda() for x in batch
        ]

        #  get FH by images within a batch
        fh_obj, fh_edge = [], []
        for i in range(imgs_ids.shape[0]):
            idd = ((IDs == imgs_ids[i].item()).nonzero())
            fh_obj_i = FH_objs[idd]
            fh_obj.append(fh_obj_i)

            fh_edge_i = FH_edges[idd]
            fh_edge.append(fh_edge_i)

        fh_obj = torch.cat(fh_obj)
        fh_edge = torch.cat(fh_edge)

        imgs_gt = imagenet_deprocess_batch(imgs)
        boxes_gt = None
        masks_gt = None
        if args.use_gt_boxes:
            boxes_gt = boxes
        if args.use_gt_masks:
            masks_gt = masks

        # Run the model with predicted masks

        model_out = model(objs,
                          triples,
                          fh_obj,
                          fh_edge,
                          obj_to_img,
                          boxes_gt=boxes_gt,
                          masks_gt=masks_gt)
        boxes_pred, masks_pred = model_out

        obj_data = [objs, boxes_pred, masks_pred]
        _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                        triple_to_img)
        objs, boxes_pred, masks_pred = obj_data

        obj_data_gt = [boxes.data]
        if masks is not None:
            obj_data_gt.append(masks.data)
        triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                                 obj_to_img, triple_to_img)
        boxes_gt, masks_gt = obj_data_gt[0], None
        if masks is not None:
            masks_gt = obj_data_gt[1]

        for i in range(imgs_gt.size(0)):
            # for edges
            triples_i = triples[i]

            for k in range(triples_i.shape[0]):

                if (triples_i[k][1] != 0):

                    idx_s, idx_o = triples_i[k][0], triples_i[k][2]

                    bbxs_of_img = boxes_gt[i]
                    masks_of_img = masks_gt[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    edge_gt = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_gt.append(edge_gt)
                    # print('gt:', triples_i[k][1].item(), edge_gt)

                    bbxs_of_img = boxes_pred[i]
                    masks_of_img = masks_pred[i]
                    box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o]
                    mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o]
                    mask_s, mask_o = torch.round(mask_s).type(
                        torch.long), torch.round(mask_o).type(torch.long)
                    edge_pre = get_relationship(box_s, box_o, mask_s, mask_o)
                    count_edge_pre.append(edge_pre)

            img_idx += 1

        print('%d images' % img_idx)

        class2idx = {
            "left of": 0,
            "right of": 1,
            "above": 2,
            "below": 3,
            "inside": 4,
            "surrounding": 5
        }

        idx2class = {v: k for k, v in class2idx.items()}


#    break

    print('gt', len(count_edge_gt))
    print('pre', len(count_edge_pre))
    cm = confusion_matrix(count_edge_pre, count_edge_gt)  # y, x
    cm = cm / cm.sum(axis=0)
    confusion_matrix_df = pd.DataFrame(cm).rename(columns=idx2class,
                                                  index=idx2class)
    label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'}
    ax = sns.heatmap(confusion_matrix_df,
                     annot=True,
                     cmap='Blues_r',
                     vmin=0,
                     vmax=1)
    title = 'M1_bm_FH_' + args.which_data + '_' + label[fn]
    ax.set(title=title, ylabel='Predicted label', xlabel='True label')
    fig = ax.get_figure()
    filename = 'CM1_bm_FH_' + fn + '_' + args.which_data + '.png'
    CM_path = os.path.join(output_dir, args.which_data, filename)
    fig.savefig(CM_path)
    fig.clf()
    print('over')
예제 #8
0
def run_model(args, checkpoint, output_dir, fn, loader=None):
    vocab = checkpoint['model_kwargs']['vocab']
    model = build_model(args, checkpoint)
    if loader is None:
        loader = build_loader(args, checkpoint)

    data = {
        'vocab': vocab,
        'objs': [],
        'masks_pred': [],
        'boxes_pred': [],
        'masks_gt': [],
        'boxes_gt': [],
        'filenames': [],
    }
    which_data = args.which_data
    save_dir = makedir(output_dir, which_data)
    #  save_dir = makedir(save_dir,fn)

    img_idx = 0
    ibatch = 0
    for batch in loader:
        ibatch += 1
        masks = None
        if len(batch) == 6:
            imgs, objs, boxes, triples, obj_to_img, triple_to_img = [
                x.cuda() for x in batch
            ]
        elif len(batch) == 7:
            imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [
                x.cuda() for x in batch
            ]
        # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [x.cuda() for x in batch]


#    imgs_print = imagenet_deprocess_batch(imgs)
#    grid = torchvision.utils.make_grid(imgs_print)
#    writer.add_image('img/real', grid, ibatch-1)
        imgs_gt = imagenet_deprocess_batch(imgs)
        boxes_gt = None
        masks_gt = None
        if args.use_gt_boxes:
            boxes_gt = boxes
        if args.use_gt_masks:
            masks_gt = masks

        # Run the model with predicted masks
        model_out = model(objs,
                          triples,
                          obj_to_img,
                          boxes_gt=boxes_gt,
                          masks_gt=masks_gt)
        # boxes_pred, masks_pred = model_out
        imgs_pred, boxes_pred, masks_pred, _ = model_out

        obj_data = [objs, boxes_pred, masks_pred]
        _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                        triple_to_img)
        objs, boxes_pred, masks_pred = obj_data

        obj_data_gt = [boxes.data]
        if masks is not None:
            obj_data_gt.append(masks.data)
        triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                                 obj_to_img, triple_to_img)
        boxes_gt, masks_gt = obj_data_gt[0], None
        if masks is not None:
            masks_gt = obj_data_gt[1]

        imgs_bbx = torch.zeros(imgs_gt.size(), dtype=torch.uint8)
        imgs_bbx_pre = torch.zeros(imgs_gt.size(), dtype=torch.uint8)
        white_bbx_gt, white_bbx_gtb = torch.zeros(
            imgs_gt.size(), dtype=torch.uint8), torch.zeros(imgs_gt.size(),
                                                            dtype=torch.uint8)
        white_bbx_pre, white_bbx_preb = torch.zeros(
            imgs_gt.size(), dtype=torch.uint8), torch.zeros(imgs_gt.size(),
                                                            dtype=torch.uint8)

        for i in range(imgs_gt.size(0)):

            black_gt = np.zeros([args.image_size[0], args.image_size[1], 3])
            black_gtb = np.zeros([args.image_size[0], args.image_size[1], 3])
            img = np.copy(imgs_gt[i].numpy().transpose(1, 2, 0))
            layer = np.zeros(list(args.image_size))
            masks_of_img = masks_gt[i]
            bbxs_of_img = boxes_gt[i]
            num_of_objs = bbxs_of_img.size(0)
            for j in range(num_of_objs - 1):

                #          color = tuple(np.random.randint(256, size=3))
                color = colors[j % len(colors)]
                mask = masks_of_img[j].cpu().clone().numpy()
                mask = np.round(mask)
                bbx = (bbxs_of_img[j].cpu().numpy() *
                       args.image_size[0]).astype(int)
                bbx = np.clip(bbx, 0, args.image_size[0] - 1)
                wbbx = bbx[2] - bbx[0]
                hbbx = bbx[3] - bbx[1]
                if not wbbx > 0:
                    wbbx = 1
                    print('gt', wbbx, hbbx)
                if not hbbx > 0:
                    hbbx = 1
                    print('gt', wbbx, hbbx)
                maskPIL = Image.fromarray(mask.astype(np.uint8))
                maskPIL = maskPIL.resize((wbbx, hbbx), resample=Image.BILINEAR)

                layer[bbx[1]:bbx[3], bbx[0]:bbx[2]] = np.array(maskPIL)
                img = apply_mask(img, layer, color)
                masked_imgPIL = Image.fromarray(img.astype(np.uint8))
                draw = ImageDraw.Draw(masked_imgPIL)
                draw.rectangle(bbx.tolist(), width=1, outline=color)
                img = np.array(masked_imgPIL)

                black_gt = apply_mask(black_gt, layer, color)
                masked_blackPIL = Image.fromarray(black_gt.astype(np.uint8))
                draw2 = ImageDraw.Draw(masked_blackPIL)
                draw2.rectangle(bbx.tolist(), width=1, outline=color)
                black_gt = np.array(masked_blackPIL)

                blackPIL = Image.fromarray(black_gtb.astype(np.uint8))
                draw2b = ImageDraw.Draw(blackPIL)
                draw2b.rectangle(bbx.tolist(), width=1, outline=color)
                black_gtb = np.array(blackPIL)

            imgs_bbx[i] = torchvision.transforms.ToTensor()(
                masked_imgPIL) * 255
            white_bbx_gt[i] = torchvision.transforms.ToTensor()(
                masked_blackPIL) * 255
            white_bbx_gtb[i] = torchvision.transforms.ToTensor()(
                blackPIL) * 255

            black_gt = np.zeros([args.image_size[0], args.image_size[1], 3])
            black_gtb = np.zeros([args.image_size[0], args.image_size[1], 3])
            img = np.copy(imgs_gt[i].numpy().transpose(1, 2, 0))
            layer = np.zeros(list(args.image_size))
            bbxs_of_img = boxes_pred[i]
            masks_of_img = masks_pred[i]
            num_of_objs = bbxs_of_img.size(0)
            for j in range(num_of_objs - 1):

                color = colors[j % len(colors)]

                mask = masks_of_img[j].cpu().clone().numpy()
                mask = np.round(mask)
                bbx = (bbxs_of_img[j].cpu().numpy() *
                       args.image_size[0]).astype(int)
                bbx = np.clip(bbx, 0, args.image_size[0] - 1)
                wbbx = bbx[2] - bbx[0]
                hbbx = bbx[3] - bbx[1]
                if not wbbx > 0:
                    wbbx = 1
                    print('pred', wbbx, hbbx)
                if not hbbx > 0:
                    hbbx = 1
                    print('pred', wbbx, hbbx)
                maskPIL = Image.fromarray(mask.astype(np.uint8))
                maskPIL = maskPIL.resize((wbbx, hbbx), resample=Image.BILINEAR)
                #          print('wwbx,hbbx:',wbbx, hbbx, maskPIL2.size, bbx)
                layer[bbx[1]:bbx[3], bbx[0]:bbx[2]] = np.array(maskPIL)
                img = apply_mask(img, layer, color)
                masked_imgPIL = Image.fromarray(img.astype(np.uint8))
                draw = ImageDraw.Draw(masked_imgPIL)
                draw.rectangle(bbx.tolist(), width=1, outline=color)
                img = np.array(masked_imgPIL)

                black_gt = apply_mask(black_gt, layer, color)
                masked_blackPIL = Image.fromarray(black_gt.astype(np.uint8))
                draw2 = ImageDraw.Draw(masked_blackPIL)
                draw2.rectangle(bbx.tolist(), width=1, outline=color)
                black_gt = np.array(masked_blackPIL)

                blackPIL = Image.fromarray(black_gtb.astype(np.uint8))
                draw2b = ImageDraw.Draw(blackPIL)
                draw2b.rectangle(bbx.tolist(), width=1, outline=color)
                black_gtb = np.array(blackPIL)

            imgs_bbx_pre[i] = torchvision.transforms.ToTensor()(
                masked_imgPIL) * 255
            white_bbx_pre[i] = torchvision.transforms.ToTensor()(
                masked_blackPIL) * 255
            white_bbx_preb[i] = torchvision.transforms.ToTensor()(
                blackPIL) * 255

            img_idx += 1

        imgs_orig = imagenet_deprocess_batch(imgs)
        grid1 = torchvision.utils.make_grid(imgs_orig)
        toSave = grid1
        # GT
        # imgs_grid_GT = imgs_bbx.byte()
        # grid2 = torchvision.utils.make_grid(imgs_grid_GT)

        # toSave = torch.cat((grid1,grid2),1)

        white_grid_GT = white_bbx_gt.byte()
        grid3 = torchvision.utils.make_grid(white_grid_GT)

        toSave = torch.cat((toSave, grid3), 1)

        white_grid_GTb = white_bbx_gtb.byte()
        grid3b = torchvision.utils.make_grid(white_grid_GTb)

        toSave = torch.cat((toSave, grid3b), 1)
        # PRE
        imgs_pred = imagenet_deprocess_batch(imgs_pred)
        gridx = torchvision.utils.make_grid(imgs_pred)
        toSave = torch.cat((toSave, gridx), 1)

        # imgs_grid_pre = imgs_bbx_pre.byte()
        # grid4 = torchvision.utils.make_grid(imgs_grid_pre)

        # toSave = torch.cat((toSave, grid4),1)

        white_grid_pre = white_bbx_pre.byte()
        grid5 = torchvision.utils.make_grid(white_grid_pre)

        toSave = torch.cat((toSave, grid5), 1)

        white_grid_preb = white_bbx_preb.byte()
        grid5b = torchvision.utils.make_grid(white_grid_preb)

        toSave = torch.cat((toSave, grid5b), 1)

        toSavePIL = torchvision.transforms.ToPILImage()(toSave)

        save_dir = 'output'
        fn = 'M1re'
        grids_path = os.path.join(save_dir, '%d' % img_idx + fn + '.png')
        # grids_path = os.path.join(save_dir, '%d'%img_id + fn + '.png')
        toSavePIL.save(grids_path)
        print('Saved %d images' % img_idx)
예제 #9
0
def run_model(args, checkpoint, output_dir, loader=None):
    vocab = checkpoint['model_kwargs']['vocab']
    model = build_model(args, checkpoint)
    if loader is None:
        loader = build_loader(args, checkpoint)

    img_dir = makedir(output_dir, 'images')
    graph_dir = makedir(output_dir, 'graphs', args.save_graphs)
    gt_img_dir = makedir(output_dir, 'images_gt', args.save_gt_imgs)
    data_path = os.path.join(output_dir, 'data.pt')

    data = {
        'vocab': vocab,
        'objs': [],
        'masks_pred': [],
        'boxes_pred': [],
        'masks_gt': [],
        'boxes_gt': [],
        'filenames': [],
    }

    img_idx = 0
    for batch in loader:
        masks = None
        if len(batch) == 6:
            imgs, objs, boxes, triples, obj_to_img, triple_to_img = [
                x.cuda() for x in batch
            ]
        elif len(batch) == 7:
            imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [
                x.cuda() for x in batch
            ]

        imgs_gt = imagenet_deprocess_batch(imgs)
        boxes_gt = None
        masks_gt = None
        if args.use_gt_boxes:
            boxes_gt = boxes
        if args.use_gt_masks:
            masks_gt = masks

        # Run the model with predicted masks
        model_out = model(objs,
                          triples,
                          obj_to_img,
                          boxes_gt=boxes_gt,
                          masks_gt=masks_gt)
        imgs_pred, boxes_pred, masks_pred, _ = model_out
        imgs_pred = imagenet_deprocess_batch(imgs_pred)

        obj_data = [objs, boxes_pred, masks_pred]
        _, obj_data = split_graph_batch(triples, obj_data, obj_to_img,
                                        triple_to_img)
        objs, boxes_pred, masks_pred = obj_data

        obj_data_gt = [boxes.data]
        if masks is not None:
            obj_data_gt.append(masks.data)
        triples, obj_data_gt = split_graph_batch(triples, obj_data_gt,
                                                 obj_to_img, triple_to_img)
        boxes_gt, masks_gt = obj_data_gt[0], None
        if masks is not None:
            masks_gt = obj_data_gt[1]

        for i in range(imgs_pred.size(0)):
            img_filename = '%04d.png' % img_idx
            if args.save_gt_imgs:
                img_gt = imgs_gt[i].numpy().transpose(1, 2, 0)
                img_gt_path = os.path.join(gt_img_dir, img_filename)
                imsave(img_gt_path, img_gt)

            img_pred = imgs_pred[i]
            img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0)
            img_path = os.path.join(img_dir, img_filename)
            imsave(img_path, img_pred_np)

            data['objs'].append(objs[i].cpu().clone())
            data['masks_pred'].append(masks_pred[i].cpu().clone())
            data['boxes_pred'].append(boxes_pred[i].cpu().clone())
            data['boxes_gt'].append(boxes_gt[i].cpu().clone())
            data['filenames'].append(img_filename)

            cur_masks_gt = None
            if masks_gt is not None:
                cur_masks_gt = masks_gt[i].cpu().clone()
            data['masks_gt'].append(cur_masks_gt)

            if args.save_graphs:
                graph_img = draw_scene_graph(vocab, objs[i], triples[i])
                graph_path = os.path.join(graph_dir, img_filename)
                imsave(graph_path, graph_img)

            img_idx += 1

        torch.save(data, data_path)
        print('Saved %d images' % img_idx)