Exemple #1
0
def train(opt):
    date = datetime.date(datetime.now())
    logs = '../logs/'
    logdir = os.path.join(logs,str(date))
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    else:
        logdir = logdir+"_"+str(np.random.randint(0,1000))
        os.mkdir(logdir)
    
    train_data = AllInOneData(opt.train_path,set='train',transforms=transforms.Compose([Normalizer(),Resizer()]))
    train_generator = torch.utils.data.DataLoader(train_data,batch_size=opt.batch_size,shuffle=True,num_workers=8,
                                                    collate_fn=collater,drop_last=True)

    valid_data = AllInOneData(opt.train_path,set='validation',transforms=transforms.Compose([Normalizer(),Resizer()]))
    valid_generator = torch.utils.data.DataLoader(valid_data,batch_size=opt.batch_size,shuffle=False,num_workers=8,
                                                    collate_fn=collater,drop_last=True)
    
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model = EfficientDetMultiBackbone(opt.train_path,compound_coef=0,heads=opt.heads)
    model.to(device)

    min_val_loss = 10e5
    
    if opt.optim == 'Adam':
        optimizer = torch.optim.AdamW(model.parameters(),lr=opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),lr=opt.lr,momentum = opt.momentum,nesterov=True)

    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, opt.lr, total_steps=None, epochs=opt.epochs,
                                                    steps_per_epoch=len(train_generator), pct_start=0.1, anneal_strategy='cos',
                                                    cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, 
                                                    div_factor=25.0, final_div_factor=1000.0, last_epoch=-1)

    criterion = MTLoss(heads = opt.heads, device = device)
    
    print('Model is successfully initiated')
    print(f'Targets are {opt.heads}.')
    verb_loss = 0
    writer = SummaryWriter(logdir=logdir,filename_suffix=f'Train_{"_".join(opt.heads)}',comment='try1')
    
    for epoch in range(opt.epochs):
        model.train()
        Losses = {k:[] for k in opt.heads}
        description = f'Epoch:{epoch}| Total Loss:{verb_loss}'
        progress_bar = tqdm(train_generator,desc = description)
        Total_loss = []
        for sample in progress_bar:
                        
            imgs = sample['img'].to(device)
            gt_person_bbox = sample['person_bbox'].to(device)
            gt_face_bbox = sample['face_bbox'].to(device)
            gt_pose = sample['pose'].to(device)
            gt_face_landmarks = sample['face_landmarks'].to(device)
            gt_age = sample['age'].to(device)
            gt_race = sample['race'].to(device)
            gt_gender = sample['gender'].to(device)
            gt_skin = sample['skin'].to(device)
            gt_emotions = sample['emotion'].to(device)        

            out = model(imgs)
            annot = {'person':gt_person_bbox,'gender':gt_gender,
                     'face':gt_face_bbox,'emotions':gt_emotions,
                     'face_landmarks':gt_face_landmarks,
                     'pose':gt_pose}
            
            losses, lm_mask = criterion(out,annot,out['anchors'])
            loss = torch.zeros(1).to(device)
            loss = torch.sum(torch.cat(list(losses.values())))
            loss.backward()
            optimizer.step()
            scheduler.step() 

            verb_loss = loss.detach().cpu().numpy()
            Total_loss.append(verb_loss)
            description = f'Epoch:{epoch}| Total Loss:{verb_loss}|'
            for k,v in losses.items():
                Losses[k].append(v.detach().cpu().numpy())
                description+=f'{k}:{round(np.mean(Losses[k]),1)}|'
            progress_bar.set_description(description)
            optimizer.zero_grad()
        
        writer.add_scalar('Train/Total',round(np.mean(Total_loss),2),epoch)
        for k in Losses.keys():
            writer.add_scalar(f"Train/{k}",round(np.mean(Losses[k]),2),epoch)
        
        if epoch%opt.valid_step==0:
            im = (imgs[0]+1)/2*255
            
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            pp = postprocess(imgs,
                  out['anchors'], out['person'], out['gender'],
                  regressBoxes, clipBoxes,
                  0.4, 0.4)
            
            writer.add_image_with_boxes('Train/Box_prediction',im,pp[0]['rois'],epoch)
            img2 = out['face_landmarks']
            if img2.shape[1]>3:
                img2 = img2.sum(axis=1).unsqueeze(1)*255
                lm_mask = lm_mask.sum(axis=1).unsqueeze(1)*255
            writer.add_images('Train/landmarks_prediction',img2,epoch)
            writer.add_images('Train/landmark target', lm_mask,epoch)
            
            #VALIDATION STEPS
            model.eval()
            with torch.no_grad():
                valid_Losses = {k:[] for k in opt.heads}

                val_description = f'Validation| Total Loss:{verb_loss}'
                progress_bar = tqdm(valid_generator,desc = val_description)
                Total_loss = []
                for sample in progress_bar:   
                    imgs = sample['img'].to(device)
                    gt_person_bbox = sample['person_bbox'].to(device)
                    gt_face_bbox = sample['face_bbox'].to(device)
                    gt_pose = sample['pose'].to(device)
                    gt_face_landmarks = sample['face_landmarks'].to(device)
                    gt_age = sample['age'].to(device)
                    gt_race = sample['race'].to(device)
                    gt_gender = sample['gender'].to(device)
                    gt_skin = sample['skin'].to(device)
                    gt_emotions = sample['emotion'].to(device)
                    out = model(imgs)
                    annot = {'person':gt_person_bbox,'gender':gt_gender,
                     'face':gt_face_bbox,'emotions':gt_emotions,
                     'face_landmarks':gt_face_landmarks,
                     'pose':gt_pose}

                    losses, lm_mask = criterion(out,annot,out['anchors'])

                    loss = torch.zeros(1).to(device)
                    loss = torch.sum(torch.cat(list(losses.values())))
                    verb_loss = loss.detach().cpu().numpy()
                    Total_loss.append(verb_loss)
                    val_description = f'Validation| Total Loss:{verb_loss}|'
                    for k,v in losses.items():
                        valid_Losses[k].append(v.detach().cpu().numpy())
                        val_description+=f'{k}:{round(np.mean(valid_Losses[k]),1)}|'
                    progress_bar.set_description(val_description)

                writer.add_scalar('Validation/Total',round(np.mean(Total_loss),2),epoch)
                for k in valid_Losses.keys():
                    writer.add_scalar(f"Validation/{k}",round(np.mean(valid_Losses[k]),2),epoch)

                im = (imgs[0]+1)/2*255
                
                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()
                pp = postprocess(imgs,
                  out['anchors'], out['person'], out['gender'],
                  regressBoxes, clipBoxes,
                  0.4, 0.4)

                writer.add_image_with_boxes('Validation/Box_prediction',im,pp[0]['rois'],epoch)
                
                img2 = out['face_landmarks']
                if img2.shape[1]>3:
                    img2 = img2.sum(axis=1).unsqueeze(1)*255
                    lm_mask = lm_mask.sum(axis=1).unsqueeze(1)*255
                writer.add_images('Validation/landmarks_prediction',img2,epoch)
                writer.add_images('Validation/landmark target', lm_mask,epoch)

                if verb_loss<min_val_loss:
                    print("The model improved and checkpoint is saved.")
                    torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_best_epoch_{epoch}.pt')
                    min_val_loss = verb_loss
                

        if epoch%100==0:
            torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_epoch_{epoch}.pt')
    torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_last.pt')
    writer.close()
Exemple #2
0
def predict(sample_paths, args):
    model, start_epoch = build_network(snapshot=args.snapshot,
                                       backend='retinanet')
    model.eval()
    if not os.path.exists(cfg.result_path):
        os.makedirs(cfg.result_path)
    print("Begin to predict mask: ",
          time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
    for sample_path in sample_paths:
        filename = sample_path.split('/')[-1].split('.')[0]

        read = kfbReader.reader()
        read.ReadInfo(sample_path, 20, False)
        width = read.getWidth()
        height = read.getHeight()
        image_shape = (width, height)

        strides, x_num, y_num = calc_split_num((width, height))

        model.eval()
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        transformed_all = []
        classification_all = []
        for i in range(x_num):
            for j in range(y_num):
                x = strides[0] * i if i < x_num - 1 else image_shape[
                    0] - cfg.patch_size[0]
                y = strides[1] * j if j < y_num - 1 else image_shape[
                    1] - cfg.patch_size[1]

                img = read.ReadRoi(x,
                                   y,
                                   cfg.patch_size[0],
                                   cfg.patch_size[1],
                                   scale=20).copy()
                img = img.transpose((2, 0, 1))
                img = img[np.newaxis, :, :, :]
                img = img.astype(np.float32) / 255.0
                img = torch.from_numpy(img).float()
                with torch.no_grad():
                    classification, regression, anchors = model(img.cuda())
                transformed_anchors = regressBoxes(anchors, regression)
                transformed_anchors = clipBoxes(transformed_anchors)

                scores = classification
                scores_over_thresh = (scores > 0.05)[0, :, 0]

                if scores_over_thresh.sum() == 0:
                    continue

                classification = classification[0, scores_over_thresh, :]
                transformed_anchors = transformed_anchors[
                    0, scores_over_thresh, :]
                transformed_anchors[:, 0] = transformed_anchors[:, 0] + x
                transformed_anchors[:, 1] = transformed_anchors[:, 1] + y
                transformed_anchors[:, 2] = transformed_anchors[:, 2] + x
                transformed_anchors[:, 3] = transformed_anchors[:, 3] + y
                scores = scores[0, scores_over_thresh, :]
                transformed_all.append(
                    torch.cat([transformed_anchors, scores], dim=1))
                classification_all.append(classification)

        # transformed_all = torch.cat(transformed_all, dim=0)
        # classification_all = torch.cat(classification_all, dim=0)
        # anchors_num_idx = nms(transformed_all, 0.5)
        # nms_scores = classification_all[anchors_num_idx, :]
        # nms_transformed = transformed_all[anchors_num_idx, :]

        # scores = nms_scores.detach().cpu().numpy()
        # transformed = nms_transformed.detach().cpu().numpy()
        # pos_all = []
        # for i in range(scores.shape[0]):
        #     x = int(transformed[i, 0])
        #     y = int(transformed[i, 1])
        #     w = max(int(transformed[i, 2] - transformed[i, 0]), 1)
        #     h = max(int(transformed[i, 3] - transformed[i, 1]), 1)
        #     p = float(scores[i, 0])
        #     pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p}
        #     pos_all.append(pos)

        transformed_all = torch.cat(transformed_all, dim=0)
        classification_all = torch.cat(classification_all, dim=0)
        #print("transformed_all.size(0)=", transformed_all.size(0))
        #print("classification_all.size(0)=", classification_all.size(0))
        num = int((transformed_all.size(0) + 200000) / 200000)
        #print("num=", num)

        pos_all = []
        trans = transformed_all.chunk(num, 0)
        classi = classification_all.chunk(num, 0)

        for i in range(num):
            #print("len(trans[i]),len(classi[i])=",len(trans[i]),len(classi[i]))
            pos_all = handle_nms(trans[i], classi[i], pos_all)
            #print("len(pos_all)=", len(pos_all))

        with open(os.path.join(cfg.result_path, filename + ".json"), 'w') as f:
            json.dump(pos_all, f)

        print("Finish predict mask: ", filename,
              time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
Exemple #3
0
    out = model(prim)
    person_bbox = out['person']
    face_landmarks = out['face_landmarks']
    gender = out['gender']
    gender = gender[0].detach().cpu().numpy()
    positive_ind = np.where(gender > 0.8)[0]

    gender = gender[positive_ind]
    person_bbox = person_bbox[0, positive_ind].detach().cpu().numpy().astype(
        np.float32) * 512
    face_landmarks = face_landmarks.permute(0, 2, 3, 1)
    face_landmarks = face_landmarks[0].detach().cpu().numpy().astype(
        np.float32)
    face_landmarks = face_landmarks / np.max(face_landmarks)

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    pp = postprocess(prim, out['anchors'], out['person'], out['gender'],
                     regressBoxes, clipBoxes, 0.8, 0.2)

    for box, gen in zip(pp[0]['rois'], pp[0]['class_ids']):
        cv2.rectangle(im_ov, (int(box[0]), int(box[1])),
                      (int(box[2]), int(box[3])), (0, 0, 255), 2)
        cv2.putText(im_ov, str(gen), (int(box[0]), int(box[1]) + 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2, cv2.LINE_AA)

    col_map = cv2.applyColorMap((255 * face_landmarks).astype(np.uint8),
                                cv2.COLORMAP_JET)
    im_show = cv2.addWeighted(im_ov, 0.5, col_map, 0.5, 0)
    while True:
        cv2.imshow('test', im_ov)
Exemple #4
0
def predict(sample_path, args):
    model, start_epoch = build_network(snapshot=args.snapshot,
                                       backend='retinanet')
    model.eval()
    if not os.path.exists(cfg.result_path):
        os.makedirs(cfg.result_path)

    test_data = CervicalDataset(sample_path,
                                cfg.patch_size,
                                transform=transforms.Compose([Normalizer()]))
    test_loader = DataLoader(test_data,
                             batch_size=1,
                             shuffle=True,
                             drop_last=False,
                             collate_fn=collater,
                             num_workers=0)

    model.eval()
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    with torch.no_grad():
        for idx, data in enumerate(test_loader):
            st = time.time()
            annotations = data['label'].cuda()
            classification, regression, anchors = model(data['img'].cuda())

            scores, transformed_anchors = transform_anchors(
                classification, regression, anchors, regressBoxes, clipBoxes)
            print('Elapsed time: {}'.format(time.time() - st))
            scores = scores.detach().cpu().numpy()
            transformed_anchors = transformed_anchors.detach().cpu().numpy()

            idxs = np.where(scores > 0.5)
            img = np.array(255 * data['img'][0, :, :, :]).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))
            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
            img_anno = img.copy()

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                draw_caption(img, (x1, y1, x2, y2), str(scores[idxs[0][j]]))

                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            for j in range(annotations.shape[1]):
                bbox = annotations[0, j, :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                draw_caption(img_anno, (x1, y1, x2, y2), 'pos')

                cv2.rectangle(img_anno, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            merge_img = np.hstack([img, img_anno])
            cv2.imwrite(
                os.path.join(cfg.result_path, "result" + str(idx) + ".jpg"),
                merge_img)
Exemple #5
0
def predict(sample_paths, args):
    model, start_epoch = build_network(snapshot=args.snapshot, backend="retinanet")
    model.eval()
    if not os.path.exists(cfg.result_path):
        os.makedirs(cfg.result_path)
    print(
        "Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    )
    for sample_path in sample_paths:
        filename = sample_path.split("/")[-1].split(".")[0]

        read = kfbReader.reader()
        read.ReadInfo(sample_path, 20, False)
        width = read.getWidth()
        height = read.getHeight()
        image_shape = (width, height)

        strides, x_num, y_num = calc_split_num((width, height))

        model.eval()
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        transformed_all = []
        classification_all = []
        for i in range(x_num // 2):
            for j in range(y_num // 2):
                x = (
                    strides[0] * i
                    if i < x_num - 1
                    else image_shape[0] - cfg.patch_size[0]
                )
                y = (
                    strides[1] * j
                    if j < y_num - 1
                    else image_shape[1] - cfg.patch_size[1]
                )

                img = read.ReadRoi(
                    x, y, cfg.patch_size[0], cfg.patch_size[1], scale=20
                ).copy()
                img = img.transpose((2, 0, 1))
                img = img[np.newaxis, :, :, :]
                img = img.astype(np.float32) / 255.0
                img = torch.from_numpy(img).float()
                with torch.no_grad():
                    classification, regression, anchors = model(img.cuda())
                transformed_anchors = regressBoxes(anchors, regression)
                transformed_anchors = clipBoxes(transformed_anchors)

                scores = classification
                scores_over_thresh = (scores > 0.05)[0, :, 0]

                if scores_over_thresh.sum() == 0:
                    continue

                classification = classification[0, scores_over_thresh, :]
                transformed_anchors = transformed_anchors[0, scores_over_thresh, :]
                transformed_anchors[:, 0] = transformed_anchors[:, 0] + x
                transformed_anchors[:, 1] = transformed_anchors[:, 1] + y
                transformed_anchors[:, 2] = transformed_anchors[:, 2] + x
                transformed_anchors[:, 3] = transformed_anchors[:, 3] + y
                scores = scores[0, scores_over_thresh, :]
                transformed_all.append(torch.cat([transformed_anchors, scores], dim=1))
                classification_all.append(classification)

        transformed_all = torch.cat(transformed_all, dim=0)
        classification_all = torch.cat(classification_all, dim=0)
        anchors_num_idx = nms(transformed_all, 0.5)
        nms_scores = classification_all[anchors_num_idx, :]
        nms_transformed = transformed_all[anchors_num_idx, :]

        scores = nms_scores.detach().cpu().numpy()
        transformed = nms_transformed.detach().cpu().numpy()
        pos_all = []
        for i in range(scores.shape[0]):
            x = int(transformed[i, 0])
            y = int(transformed[i, 1])
            w = max(int(transformed[i, 2] - transformed[i, 0]), 1)
            h = max(int(transformed[i, 3] - transformed[i, 1]), 1)
            p = float(scores[i, 0])
            pos = {"x": x, "y": y, "w": w, "h": h, "p": p}
            pos_all.append(pos)

        with open(os.path.join(cfg.result_path, filename + ".json"), "w") as f:
            json.dump(pos_all, f)

        print(
            "Finish predict mask: ",
            filename,
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
        )