def train(opt): date = datetime.date(datetime.now()) logs = '../logs/' logdir = os.path.join(logs,str(date)) if not os.path.exists(logdir): os.mkdir(logdir) else: logdir = logdir+"_"+str(np.random.randint(0,1000)) os.mkdir(logdir) train_data = AllInOneData(opt.train_path,set='train',transforms=transforms.Compose([Normalizer(),Resizer()])) train_generator = torch.utils.data.DataLoader(train_data,batch_size=opt.batch_size,shuffle=True,num_workers=8, collate_fn=collater,drop_last=True) valid_data = AllInOneData(opt.train_path,set='validation',transforms=transforms.Compose([Normalizer(),Resizer()])) valid_generator = torch.utils.data.DataLoader(valid_data,batch_size=opt.batch_size,shuffle=False,num_workers=8, collate_fn=collater,drop_last=True) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' model = EfficientDetMultiBackbone(opt.train_path,compound_coef=0,heads=opt.heads) model.to(device) min_val_loss = 10e5 if opt.optim == 'Adam': optimizer = torch.optim.AdamW(model.parameters(),lr=opt.lr) else: optimizer = torch.optim.SGD(model.parameters(),lr=opt.lr,momentum = opt.momentum,nesterov=True) scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, opt.lr, total_steps=None, epochs=opt.epochs, steps_per_epoch=len(train_generator), pct_start=0.1, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=1000.0, last_epoch=-1) criterion = MTLoss(heads = opt.heads, device = device) print('Model is successfully initiated') print(f'Targets are {opt.heads}.') verb_loss = 0 writer = SummaryWriter(logdir=logdir,filename_suffix=f'Train_{"_".join(opt.heads)}',comment='try1') for epoch in range(opt.epochs): model.train() Losses = {k:[] for k in opt.heads} description = f'Epoch:{epoch}| Total Loss:{verb_loss}' progress_bar = tqdm(train_generator,desc = description) Total_loss = [] for sample in progress_bar: imgs = sample['img'].to(device) gt_person_bbox = sample['person_bbox'].to(device) gt_face_bbox = sample['face_bbox'].to(device) gt_pose = sample['pose'].to(device) gt_face_landmarks = sample['face_landmarks'].to(device) gt_age = sample['age'].to(device) gt_race = sample['race'].to(device) gt_gender = sample['gender'].to(device) gt_skin = sample['skin'].to(device) gt_emotions = sample['emotion'].to(device) out = model(imgs) annot = {'person':gt_person_bbox,'gender':gt_gender, 'face':gt_face_bbox,'emotions':gt_emotions, 'face_landmarks':gt_face_landmarks, 'pose':gt_pose} losses, lm_mask = criterion(out,annot,out['anchors']) loss = torch.zeros(1).to(device) loss = torch.sum(torch.cat(list(losses.values()))) loss.backward() optimizer.step() scheduler.step() verb_loss = loss.detach().cpu().numpy() Total_loss.append(verb_loss) description = f'Epoch:{epoch}| Total Loss:{verb_loss}|' for k,v in losses.items(): Losses[k].append(v.detach().cpu().numpy()) description+=f'{k}:{round(np.mean(Losses[k]),1)}|' progress_bar.set_description(description) optimizer.zero_grad() writer.add_scalar('Train/Total',round(np.mean(Total_loss),2),epoch) for k in Losses.keys(): writer.add_scalar(f"Train/{k}",round(np.mean(Losses[k]),2),epoch) if epoch%opt.valid_step==0: im = (imgs[0]+1)/2*255 regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() pp = postprocess(imgs, out['anchors'], out['person'], out['gender'], regressBoxes, clipBoxes, 0.4, 0.4) writer.add_image_with_boxes('Train/Box_prediction',im,pp[0]['rois'],epoch) img2 = out['face_landmarks'] if img2.shape[1]>3: img2 = img2.sum(axis=1).unsqueeze(1)*255 lm_mask = lm_mask.sum(axis=1).unsqueeze(1)*255 writer.add_images('Train/landmarks_prediction',img2,epoch) writer.add_images('Train/landmark target', lm_mask,epoch) #VALIDATION STEPS model.eval() with torch.no_grad(): valid_Losses = {k:[] for k in opt.heads} val_description = f'Validation| Total Loss:{verb_loss}' progress_bar = tqdm(valid_generator,desc = val_description) Total_loss = [] for sample in progress_bar: imgs = sample['img'].to(device) gt_person_bbox = sample['person_bbox'].to(device) gt_face_bbox = sample['face_bbox'].to(device) gt_pose = sample['pose'].to(device) gt_face_landmarks = sample['face_landmarks'].to(device) gt_age = sample['age'].to(device) gt_race = sample['race'].to(device) gt_gender = sample['gender'].to(device) gt_skin = sample['skin'].to(device) gt_emotions = sample['emotion'].to(device) out = model(imgs) annot = {'person':gt_person_bbox,'gender':gt_gender, 'face':gt_face_bbox,'emotions':gt_emotions, 'face_landmarks':gt_face_landmarks, 'pose':gt_pose} losses, lm_mask = criterion(out,annot,out['anchors']) loss = torch.zeros(1).to(device) loss = torch.sum(torch.cat(list(losses.values()))) verb_loss = loss.detach().cpu().numpy() Total_loss.append(verb_loss) val_description = f'Validation| Total Loss:{verb_loss}|' for k,v in losses.items(): valid_Losses[k].append(v.detach().cpu().numpy()) val_description+=f'{k}:{round(np.mean(valid_Losses[k]),1)}|' progress_bar.set_description(val_description) writer.add_scalar('Validation/Total',round(np.mean(Total_loss),2),epoch) for k in valid_Losses.keys(): writer.add_scalar(f"Validation/{k}",round(np.mean(valid_Losses[k]),2),epoch) im = (imgs[0]+1)/2*255 regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() pp = postprocess(imgs, out['anchors'], out['person'], out['gender'], regressBoxes, clipBoxes, 0.4, 0.4) writer.add_image_with_boxes('Validation/Box_prediction',im,pp[0]['rois'],epoch) img2 = out['face_landmarks'] if img2.shape[1]>3: img2 = img2.sum(axis=1).unsqueeze(1)*255 lm_mask = lm_mask.sum(axis=1).unsqueeze(1)*255 writer.add_images('Validation/landmarks_prediction',img2,epoch) writer.add_images('Validation/landmark target', lm_mask,epoch) if verb_loss<min_val_loss: print("The model improved and checkpoint is saved.") torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_best_epoch_{epoch}.pt') min_val_loss = verb_loss if epoch%100==0: torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_epoch_{epoch}.pt') torch.save(model.state_dict(),f'{logdir}/{opt.save_name.split(".pt")[0]}_last.pt') writer.close()
def predict(sample_paths, args): model, start_epoch = build_network(snapshot=args.snapshot, backend='retinanet') model.eval() if not os.path.exists(cfg.result_path): os.makedirs(cfg.result_path) print("Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for sample_path in sample_paths: filename = sample_path.split('/')[-1].split('.')[0] read = kfbReader.reader() read.ReadInfo(sample_path, 20, False) width = read.getWidth() height = read.getHeight() image_shape = (width, height) strides, x_num, y_num = calc_split_num((width, height)) model.eval() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() transformed_all = [] classification_all = [] for i in range(x_num): for j in range(y_num): x = strides[0] * i if i < x_num - 1 else image_shape[ 0] - cfg.patch_size[0] y = strides[1] * j if j < y_num - 1 else image_shape[ 1] - cfg.patch_size[1] img = read.ReadRoi(x, y, cfg.patch_size[0], cfg.patch_size[1], scale=20).copy() img = img.transpose((2, 0, 1)) img = img[np.newaxis, :, :, :] img = img.astype(np.float32) / 255.0 img = torch.from_numpy(img).float() with torch.no_grad(): classification, regression, anchors = model(img.cuda()) transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors) scores = classification scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: continue classification = classification[0, scores_over_thresh, :] transformed_anchors = transformed_anchors[ 0, scores_over_thresh, :] transformed_anchors[:, 0] = transformed_anchors[:, 0] + x transformed_anchors[:, 1] = transformed_anchors[:, 1] + y transformed_anchors[:, 2] = transformed_anchors[:, 2] + x transformed_anchors[:, 3] = transformed_anchors[:, 3] + y scores = scores[0, scores_over_thresh, :] transformed_all.append( torch.cat([transformed_anchors, scores], dim=1)) classification_all.append(classification) # transformed_all = torch.cat(transformed_all, dim=0) # classification_all = torch.cat(classification_all, dim=0) # anchors_num_idx = nms(transformed_all, 0.5) # nms_scores = classification_all[anchors_num_idx, :] # nms_transformed = transformed_all[anchors_num_idx, :] # scores = nms_scores.detach().cpu().numpy() # transformed = nms_transformed.detach().cpu().numpy() # pos_all = [] # for i in range(scores.shape[0]): # x = int(transformed[i, 0]) # y = int(transformed[i, 1]) # w = max(int(transformed[i, 2] - transformed[i, 0]), 1) # h = max(int(transformed[i, 3] - transformed[i, 1]), 1) # p = float(scores[i, 0]) # pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} # pos_all.append(pos) transformed_all = torch.cat(transformed_all, dim=0) classification_all = torch.cat(classification_all, dim=0) #print("transformed_all.size(0)=", transformed_all.size(0)) #print("classification_all.size(0)=", classification_all.size(0)) num = int((transformed_all.size(0) + 200000) / 200000) #print("num=", num) pos_all = [] trans = transformed_all.chunk(num, 0) classi = classification_all.chunk(num, 0) for i in range(num): #print("len(trans[i]),len(classi[i])=",len(trans[i]),len(classi[i])) pos_all = handle_nms(trans[i], classi[i], pos_all) #print("len(pos_all)=", len(pos_all)) with open(os.path.join(cfg.result_path, filename + ".json"), 'w') as f: json.dump(pos_all, f) print("Finish predict mask: ", filename, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
out = model(prim) person_bbox = out['person'] face_landmarks = out['face_landmarks'] gender = out['gender'] gender = gender[0].detach().cpu().numpy() positive_ind = np.where(gender > 0.8)[0] gender = gender[positive_ind] person_bbox = person_bbox[0, positive_ind].detach().cpu().numpy().astype( np.float32) * 512 face_landmarks = face_landmarks.permute(0, 2, 3, 1) face_landmarks = face_landmarks[0].detach().cpu().numpy().astype( np.float32) face_landmarks = face_landmarks / np.max(face_landmarks) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() pp = postprocess(prim, out['anchors'], out['person'], out['gender'], regressBoxes, clipBoxes, 0.8, 0.2) for box, gen in zip(pp[0]['rois'], pp[0]['class_ids']): cv2.rectangle(im_ov, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2) cv2.putText(im_ov, str(gen), (int(box[0]), int(box[1]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2, cv2.LINE_AA) col_map = cv2.applyColorMap((255 * face_landmarks).astype(np.uint8), cv2.COLORMAP_JET) im_show = cv2.addWeighted(im_ov, 0.5, col_map, 0.5, 0) while True: cv2.imshow('test', im_ov)
def predict(sample_path, args): model, start_epoch = build_network(snapshot=args.snapshot, backend='retinanet') model.eval() if not os.path.exists(cfg.result_path): os.makedirs(cfg.result_path) test_data = CervicalDataset(sample_path, cfg.patch_size, transform=transforms.Compose([Normalizer()])) test_loader = DataLoader(test_data, batch_size=1, shuffle=True, drop_last=False, collate_fn=collater, num_workers=0) model.eval() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() with torch.no_grad(): for idx, data in enumerate(test_loader): st = time.time() annotations = data['label'].cuda() classification, regression, anchors = model(data['img'].cuda()) scores, transformed_anchors = transform_anchors( classification, regression, anchors, regressBoxes, clipBoxes) print('Elapsed time: {}'.format(time.time() - st)) scores = scores.detach().cpu().numpy() transformed_anchors = transformed_anchors.detach().cpu().numpy() idxs = np.where(scores > 0.5) img = np.array(255 * data['img'][0, :, :, :]).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) img_anno = img.copy() for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) draw_caption(img, (x1, y1, x2, y2), str(scores[idxs[0][j]])) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) for j in range(annotations.shape[1]): bbox = annotations[0, j, :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) draw_caption(img_anno, (x1, y1, x2, y2), 'pos') cv2.rectangle(img_anno, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) merge_img = np.hstack([img, img_anno]) cv2.imwrite( os.path.join(cfg.result_path, "result" + str(idx) + ".jpg"), merge_img)
def predict(sample_paths, args): model, start_epoch = build_network(snapshot=args.snapshot, backend="retinanet") model.eval() if not os.path.exists(cfg.result_path): os.makedirs(cfg.result_path) print( "Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) ) for sample_path in sample_paths: filename = sample_path.split("/")[-1].split(".")[0] read = kfbReader.reader() read.ReadInfo(sample_path, 20, False) width = read.getWidth() height = read.getHeight() image_shape = (width, height) strides, x_num, y_num = calc_split_num((width, height)) model.eval() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() transformed_all = [] classification_all = [] for i in range(x_num // 2): for j in range(y_num // 2): x = ( strides[0] * i if i < x_num - 1 else image_shape[0] - cfg.patch_size[0] ) y = ( strides[1] * j if j < y_num - 1 else image_shape[1] - cfg.patch_size[1] ) img = read.ReadRoi( x, y, cfg.patch_size[0], cfg.patch_size[1], scale=20 ).copy() img = img.transpose((2, 0, 1)) img = img[np.newaxis, :, :, :] img = img.astype(np.float32) / 255.0 img = torch.from_numpy(img).float() with torch.no_grad(): classification, regression, anchors = model(img.cuda()) transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors) scores = classification scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: continue classification = classification[0, scores_over_thresh, :] transformed_anchors = transformed_anchors[0, scores_over_thresh, :] transformed_anchors[:, 0] = transformed_anchors[:, 0] + x transformed_anchors[:, 1] = transformed_anchors[:, 1] + y transformed_anchors[:, 2] = transformed_anchors[:, 2] + x transformed_anchors[:, 3] = transformed_anchors[:, 3] + y scores = scores[0, scores_over_thresh, :] transformed_all.append(torch.cat([transformed_anchors, scores], dim=1)) classification_all.append(classification) transformed_all = torch.cat(transformed_all, dim=0) classification_all = torch.cat(classification_all, dim=0) anchors_num_idx = nms(transformed_all, 0.5) nms_scores = classification_all[anchors_num_idx, :] nms_transformed = transformed_all[anchors_num_idx, :] scores = nms_scores.detach().cpu().numpy() transformed = nms_transformed.detach().cpu().numpy() pos_all = [] for i in range(scores.shape[0]): x = int(transformed[i, 0]) y = int(transformed[i, 1]) w = max(int(transformed[i, 2] - transformed[i, 0]), 1) h = max(int(transformed[i, 3] - transformed[i, 1]), 1) p = float(scores[i, 0]) pos = {"x": x, "y": y, "w": w, "h": h, "p": p} pos_all.append(pos) with open(os.path.join(cfg.result_path, filename + ".json"), "w") as f: json.dump(pos_all, f) print( "Finish predict mask: ", filename, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), )