def main(*, evaluate, batch_size, optimizer_pick, model_cfg, weights_path, output_path, dataset_path, num_epochs, num_steps, checkpoint_interval, augment_affine, augment_hsv, lr_flip, ud_flip, momentum, gamma, lr, weight_decay, vis_batch, data_aug, blur, salt, noise, contrast, sharpen, ts, debug_mode, upload_dataset,xy_loss,wh_loss,no_object_loss,object_loss,vanilla_anchor,val_tolerance,min_epochs): input_arguments = list(locals().items()) print("Initializing model") model = Darknet(config_path=model_cfg,xy_loss=xy_loss,wh_loss=wh_loss,no_object_loss=no_object_loss,object_loss=object_loss,vanilla_anchor=vanilla_anchor) img_width, img_height = model.img_size() bw = model.get_bw() validate_uri, train_uri = model.get_links() if output_path == "automatic": current_month = datetime.now().strftime('%B').lower() current_year = str(datetime.now().year) if not os.path.exists(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1])): os.makedirs(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1])) output_uri = os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1]) else: output_uri = output_path num_validate_images, num_train_images = model.num_images() conf_thresh, nms_thresh, iou_thresh = model.get_threshs() num_classes = model.get_num_classes() loss_constant = model.get_loss_constant() conv_activation = model.get_conv_activation() anchors = model.get_anchors() onnx_name = model.get_onnx_name() with tempfile.TemporaryDirectory() as tensorboard_data_dir: print("Initializing data loaders") train_data_loader = torch.utils.data.DataLoader( ImageLabelDataset(train_uri, dataset_path=dataset_path, width=img_width, height=img_height, augment_hsv=augment_hsv, augment_affine=augment_affine, num_images=num_train_images, bw=bw, n_cpu=num_cpu, lr_flip=lr_flip, ud_flip=ud_flip,vis_batch=vis_batch,data_aug=data_aug,blur=blur,salt=salt,noise=noise,contrast=contrast,sharpen=sharpen,ts=ts,debug_mode=debug_mode, upload_dataset=upload_dataset), batch_size=(1 if debug_mode else batch_size), shuffle=(False if debug_mode else True), num_workers=(0 if vis_batch else num_cpu), pin_memory=cuda) print("Num train images: ", len(train_data_loader.dataset)) validate_data_loader = torch.utils.data.DataLoader( ImageLabelDataset(validate_uri, dataset_path=dataset_path, width=img_width, height=img_height, augment_hsv=False, augment_affine=False, num_images=num_validate_images, bw=bw, n_cpu=num_cpu, lr_flip=False, ud_flip=False,vis_batch=vis_batch,data_aug=False,blur=False,salt=False,noise=False,contrast=False,sharpen=False,ts=ts,debug_mode=debug_mode, upload_dataset=upload_dataset), batch_size=(1 if debug_mode else batch_size), shuffle=False, num_workers=(0 if vis_batch else num_cpu), pin_memory=cuda) print("Num validate images: ", len(validate_data_loader.dataset)) ##### additional configuration ##### print("Training batch size: " + str(batch_size)) print("Checkpoint interval: " + str(checkpoint_interval)) print("Loss constants: " + str(loss_constant)) print("Anchor boxes: " + str(anchors)) print("Training image width: " + str(img_width)) print("Training image height: " + str(img_height)) print("Confidence Threshold: " + str(conf_thresh)) print("Number of training classes: " + str(num_classes)) print("Conv activation type: " + str(conv_activation)) print("Starting learning rate: " + str(lr)) if ts: print("Tile and scale mode [on]") else: print("Tile and scale mode [off]") if data_aug: print("Data augmentation mode [on]") else: print("Data augmentation mode [off]") #################################### start_epoch = 0 weights_path = weights_path if optimizer_pick == "Adam": print("Using Adam Optimizer") optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay) elif optimizer_pick == "SGD": print("Using SGD Optimizer") optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay) else: raise Exception(f"Invalid optimizer name: {optimizer_pick}") print("Loading weights") model.load_weights(weights_path, model.get_start_weight_dim()) if torch.cuda.device_count() > 1: print('Using ', torch.cuda.device_count(), ' GPUs') model = nn.DataParallel(model) model = model.to(device, non_blocking=True) # Set scheduler scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma) val_loss = 999 # using a high number for validation loss val_loss_counter = 0 step = [0] # wrapping in an array so it is mutable epoch = start_epoch while epoch < num_epochs and step[0] < num_steps and not evaluate: epoch += 1 scheduler.step() model.train() run_epoch(label_prefix="train", data_loader=train_data_loader, epoch=epoch, step=step, model=model, num_epochs=num_epochs, num_steps=num_steps, optimizer=optimizer) print('Completed epoch: ', epoch) # Update best loss if epoch % checkpoint_interval == 0 or epoch == num_epochs or step[0] >= num_steps: # First, save the weights save_weights_uri = os.path.join(output_uri, "{epoch}.weights".format(epoch=epoch)) model.save_weights(save_weights_uri) with torch.no_grad(): print("Calculating loss on validate data") epoch_losses, epoch_time_total, epoch_num_targets = run_epoch( label_prefix="validate", data_loader=validate_data_loader, epoch=epoch, model=model, num_epochs=num_epochs, num_steps=num_steps, optimizer=None, step=step) avg_epoch_loss = epoch_losses[0] / epoch_num_targets print('Average Validation Loss: {0:10.6f}'.format(avg_epoch_loss)) if avg_epoch_loss > val_loss and epoch > min_epochs: val_loss_counter += 1 print(f"Validation loss did not decrease for {val_loss_counter}" f" consecutive check(s)") else: print("Validation loss decreased. Yay!!") val_loss_counter = 0 val_loss = avg_epoch_loss ##### updating best result for optuna study ##### result = open("logs/result.txt", "w" ) result.write(str(avg_epoch_loss)) result.close() ########################################### validate.validate(dataloader=validate_data_loader, model=model, device=device, step=step[0], bbox_all=False,debug_mode=debug_mode) if val_loss_counter == val_tolerance: print("Validation loss stopped decreasing over the last " + str(val_tolerance) + " checkpoints, creating onnx file") with tempfile.NamedTemporaryFile() as tmpfile: model.save_weights(tmpfile.name) weights_name = tmpfile.name cfg_name = os.path.join(tempfile.gettempdir(), model_cfg.split('/')[-1].split('.')[0] + '.tmp') onnx_gen = subprocess.call(['python3', 'yolo2onnx.py', '--cfg_name', cfg_name, '--weights_name', weights_name]) save_weights_uri = os.path.join(output_uri, onnx_name) os.rename(weights_name, save_weights_uri) try: os.remove(onnx_name) except: pass os.remove(cfg_name) break if evaluate: validation = validate.validate(dataloader=validate_data_loader, model=model, device=device, step=-1, bbox_all=False, tensorboard_writer=None,debug_mode=debug_mode) return val_loss
class BBDetection(): def __init__(self): print(os.getcwd()) self.model_cfg = "./src/akhenaten_dv/scripts/Perception/BBoxDetection/model_cfg/yolo_baseline_tiny.cfg" self.weights_path = './src/akhenaten_dv/scripts/Perception/BBoxDetection/7.weights' self.conf_thres = 0.8 self.nms_thres = 0.25 self.vanilla_anchor = False self.xy_loss = 2 self.wh_loss = 1.6 self.no_object_loss = 25 self.object_loss = 0.1 cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if cuda else 'cpu') random.seed(0) torch.manual_seed(0) if cuda: torch.cuda.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.benchmark = True torch.cuda.empty_cache() self.model = Darknet(config_path=self.model_cfg, xy_loss=self.xy_loss, wh_loss=self.wh_loss, no_object_loss=self.no_object_loss, object_loss=self.object_loss, vanilla_anchor=self.vanilla_anchor) # Load weights self.model.load_weights(self.weights_path, self.model.get_start_weight_dim()) self.model.to(self.device, non_blocking=True) def detect(self, cv_img): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB) img = img_pil.fromarray(cv_img) w, h = img.size new_width, new_height = self.model.img_size() pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width) img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant") img = torchvision.transforms.functional.resize(img, (new_height, new_width)) bw = self.model.get_bw() if bw: img = torchvision.transforms.functional.to_grayscale( img, num_output_channels=1) img = torchvision.transforms.functional.to_tensor(img) img = img.unsqueeze(0) with torch.no_grad(): self.model.eval() img = img.to(self.device, non_blocking=True) # output,first_layer,second_layer,third_layer = model(img) output = self.model(img) for detections in output: detections = detections[detections[:, 4] > self.conf_thres] box_corner = torch.zeros((detections.shape[0], 4), device=detections.device) xy = detections[:, 0:2] wh = detections[:, 2:4] / 2 box_corner[:, 0:2] = xy - wh box_corner[:, 2:4] = xy + wh probabilities = detections[:, 4] nms_indices = nms(box_corner, probabilities, self.nms_thres) main_box_corner = box_corner[nms_indices] if nms_indices.shape[0] == 0: continue bboxes = [] for i in range(len(main_box_corner)): x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h bboxes.append([x0, y0, x1, y1]) return bboxes