def peers() : # check the available peers visible # improvement : create mechanism so that only peers who approve can be seen if request.method == 'GET' : try : # get requests parameters req = {k:v for k,v in dict(request.args).items()} if all([isinstance(v, list) for k, v in req.items()]) : req = {k:v[0] for k,v in dict(request.args).items()} # validate request parameters(skipped) # authentication/ authorization # return active peers visible to requesting client active_peers = Tracker.get_active_clients() try : active_peers.remove(req['id']) # removing requesting client from the list except : pass msg = { 'status' : 'success', 'count' : len(active_peers), 'peers' : active_peers } except Exception as e : msg = { 'status' : 'failure', 'error' : '{}:{}'.format(e.__class__.__name__, str(e)) } return jsonify(msg) if request.method == 'POST' : try : # get json parameters req = request.get_json() if req is None : raise Exception('No json found') # validate request parameters(skilled) # authentication/ authorization # add client to active peers # later, you should allow clients to choose who they are visible to Tracker.add_active_clients([req['id']]) msg = { 'status' : 'success'} except Exception as e : msg = { 'status' : 'failure', 'error' : '{}:{}'.format(e.__class__.__name__, str(e)) } return jsonify(msg) return "Invalid Request"
def main(output_dir, n_attentions, image_shape, batch_size, learning_rate, gpu): """Perform model training""" # initialize the dataset train_set = TrainDataset(phase='train', shape=image_shape) val_set = TrainDataset(phase='val', shape=image_shape) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) # initialize the model model = Model(n_classes=196, input_size=image_shape, n_attentions=n_attentions, gpu=gpu) if gpu: model = model.cuda() # initialize related optimization methods criterion = nn.CrossEntropyLoss() criterion_attention = nn.MSELoss() optimizer = optim.Adam(params=model.parameters(), lr=learning_rate) feature_center = torch.zeros(196, n_attentions * 2208) scheduler = SuperConvergence(optimizer, max_lr=learning_rate, stepsize=5000, better_as_larger=False, last_epoch=-1) if gpu: feature_center = feature_center.cuda() # initialize other hyperparameters crop_threshold = 0.5 drop_threshold = 0.5 focal_weight = 0.4 # perform the training epoch = 0 while True: print('Starting epoch {:03d}'.format(epoch)) # statistic tracking train_loss_tracker = Tracker() train_accuracy_tracker = Tracker() model = model.train() for idx, (X, y) in enumerate(train_loader): if gpu: X = X.cuda() y = y.cuda() mini_batch = X.size(0) logits, feature_matrix, sampled_attentions = model(X) loss = (criterion(logits, y) + criterion_attention(feature_matrix, feature_center[y])) optimizer.zero_grad() loss.backward() optimizer.step() feature_center[y] = feature_center[y] + ( focal_weight * (feature_matrix.detach() - feature_center[y])) preds, _ = get_predictions(logits.squeeze().cpu().data.numpy()) preds = np.array(preds) == y.cpu().squeeze().data.numpy() accuracy = np.mean(preds) train_loss_tracker.step(loss.item() * mini_batch, mini_batch) train_accuracy_tracker.step(accuracy * mini_batch, mini_batch) # perform data cropping with torch.no_grad(): crop_attentions = F.interpolate( sampled_attentions.unsqueeze(1), size=image_shape, mode='bilinear', align_corners=False) crop_attentions = crop_attentions > crop_threshold cropped_images = [] for _idx in range(crop_attentions.size(0)): positive_indices = torch.nonzero(crop_attentions[_idx]) x_min = torch.min(positive_indices[:, 2]) y_min = torch.min(positive_indices[:, 1]) x_max = torch.max(positive_indices[:, 2]) y_max = torch.max(positive_indices[:, 1]) cropped_image = F.interpolate( crop_attentions[_idx, :, y_min:y_max + 1, x_min:x_max + 1].float().unsqueeze(0) * X[_idx, :, y_min:y_max + 1, x_min:x_max + 1].unsqueeze(0), size=image_shape, mode='bilinear', align_corners=False) cropped_images.append(cropped_image) cropped_images = torch.cat(cropped_images, dim=0) logits, _, _ = model(cropped_images) loss = criterion(logits, y) optimizer.zero_grad() loss.backward() optimizer.step() # perform attention dropping with torch.no_grad(): drop_attentions = F.interpolate( sampled_attentions.unsqueeze(1), size=image_shape, mode='bilinear', align_corners=False) drop_attentions = (drop_attentions < drop_threshold).float() dropped_images = drop_attentions * X logits, _, _ = model(dropped_images) loss = criterion(logits, y) optimizer.zero_grad() loss.backward() optimizer.step() stop = (epoch == 10) scheduler.step(epoch=None, metrics=train_loss_tracker.get_average(), stop=stop) if idx % 100 == 0: _temp_lr = optimizer.param_groups[0]['lr'] print('Batch {}, average loss {} - average accuracy {}, lr {}'. format(idx, train_loss_tracker.get_average(), train_accuracy_tracker.get_average(), _temp_lr)) # do validation pass val_loss_tracker = Tracker() val_accuracy_tracker = Tracker() model = model.eval() for X_val, y_val in val_loader: if gpu: X_val = X_val.cuda() y_val = y_val.cuda() mini_batch = X_val.size(0) with torch.no_grad(): logits, _, _ = model(X_val) val_loss = criterion(logits, y_val) preds, _ = get_predictions(logits.squeeze().cpu().data.numpy()) preds = np.array(preds) == y_val.cpu().squeeze().data.numpy() accuracy = np.mean(preds) val_loss_tracker.step(val_loss.item() * mini_batch, mini_batch) val_accuracy_tracker.step(accuracy * mini_batch, mini_batch) state_dict = { 'n_classes': 196, 'input_size': image_shape, 'n_attentions': n_attentions, 'state_dict': model.state_dict() } torch.save(state_dict, os.path.join(output_dir, '{:03d}.ckpt'.format(epoch))) print('Validation - loss {}, accuracy {}'.format( val_loss_tracker.get_average(), val_accuracy_tracker.get_average())) epoch += 1
def main(): # Paper: In the training phase, we set the batch size to 128, # base learning rate to 10−3, weight decay to 5×10−4, and momentum to 0.9 parser = argparse.ArgumentParser(description='Train - Evaluate DeepCORAL model') parser.add_argument('--disable_cuda', action='store_true', help='Disable CUDA') parser.add_argument('--epochs', type=int, default=50, help='Number of total epochs to run') parser.add_argument('--backbone_network', type=str, default='alexnet', help='Backbone CNN') parser.add_argument('--batch_size', type=int, default=128, help='Batch size') parser.add_argument('--lr', default=1e-3, help='Learning Rate') parser.add_argument('--decay', default=5e-4, help='Decay of the learning rate') parser.add_argument('--momentum', default=0.9, help="Optimizer's momentum") parser.add_argument('--lambda_coral', type=float, default=0.5, help="Weight that trades off the adaptation with " "classification accuracy on the source domain") parser.add_argument('--source', default='amazon', help="Source Domain (dataset)") parser.add_argument('--target', default='webcam', help="Target Domain (dataset)") args = parser.parse_args() args.device = None if not args.disable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda') else: args.device = torch.device('cpu') if args.backbone_network == 'alexnet' or args.backbone_network == 'resnet50': if args.source == 'ub': source_data_dir = '/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/train' else: source_data_dir = None if args.target == 'thomaz': target_data_dir = '/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/train' else: target_data_dir = None source_train_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=True, data_dir=source_data_dir) target_train_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=True, data_dir=target_data_dir) source_evaluate_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=False, data_dir=source_data_dir) target_evaluate_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=False, data_dir=target_data_dir) n_classes = len(source_train_loader.dataset.classes) else: source_train_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/cached_fold-01_train.npz') target_train_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/cached_fold-01_train.npz') source_train_loader = torch.utils.data.DataLoader(source_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) target_train_loader = torch.utils.data.DataLoader(target_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) source_evaluate_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/cached_fold-01_train.npz') target_evaluate_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/cached_fold-01_train.npz') source_evaluate_loader = torch.utils.data.DataLoader(source_evaluate_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) target_evaluate_loader = torch.utils.data.DataLoader(target_evaluate_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) n_classes = 17 # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet" # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py if args.backbone_network == 'alexnet': model = alexnet(pretrained=True) # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31) model.classifier[6] = nn.Linear(4096, n_classes) # ~ Paper : and initialized with N(0, 0.005) torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3) # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init) model.classifier[6].bias.data.fill_(0.01) model = model.to(device=args.device) # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch." optimizer = torch.optim.SGD([ {'params': model.features.parameters()}, {'params': model.classifier[:6].parameters()}, {'params': model.classifier[6].parameters(), 'lr': 10 * args.lr} ], lr=args.lr, momentum=args.momentum) # if not specified, the default lr is used elif args.backbone_network == 'resnet50': model = resnet50(pretrained=True) # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31) model.fc = nn.Linear(2048, n_classes) # ~ Paper : and initialized with N(0, 0.005) torch.nn.init.normal_(model.fc.weight, mean=0, std=5e-3) # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init) model.fc.bias.data.fill_(0.01) model = model.to(device=args.device) # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch." optimizer = torch.optim.SGD([ {'params': model.layer4.parameters()}, {'params': model.fc.parameters(), 'lr': 10 * args.lr} ], lr=args.lr, momentum=args.momentum) # if not specified, the default lr is used else: model = FrozenCNN() # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31) model.classifier[0] = nn.Linear(2048, n_classes) # ~ Paper : and initialized with N(0, 0.005) torch.nn.init.normal_(model.classifier[0].weight, mean=0, std=5e-3) # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init) model.classifier[0].bias.data.fill_(0.01) model = model.to(device=args.device) # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch." optimizer = torch.optim.SGD([ {'params': model.classifier[0].parameters(), 'lr': 10 * args.lr} ], lr=args.lr, momentum=args.momentum) # if not specified, the default lr is used tracker = Tracker() for i in range(args.epochs): train(model, optimizer, source_train_loader, target_train_loader, tracker, args, i) evaluate(model, source_evaluate_loader, 'source', tracker, args, i) evaluate(model, target_evaluate_loader, 'target', tracker, args, i) # Save logged classification loss, coral loss, source accuracy, target accuracy log_file = "{}_coral-loss:{}_{}-{}_log.pth".format(args.backbone_network, args.lambda_coral, args.source, args.target) torch.save(tracker.to_dict(), log_file)
def train_model(self, max_iterations=1e6, loss_freq=50, eval_freq=2000, save_freq=1e5, max_gradient_norm=0.25, no_model_checkpoints=False): parameters_to_optimize = self.model.parameters() # Setup dictionary to save evaluation details in checkpoint_dict = self.load_recent_model() start_iter = get_param_val( checkpoint_dict, "iteration", 0, warning_if_default=False) # Iteration to start from evaluation_dict = get_param_val( checkpoint_dict, "evaluation_dict", dict(), warning_if_default=False ) # Dictionary containing validation performances over time best_save_dict = get_param_val(checkpoint_dict, "best_save_dict", { "file": None, "metric": 1e6, "detailed_metrics": None, "test": None }, warning_if_default=False) # best_save_iter = best_save_dict["file"] last_save = None if start_iter == 0 else self.get_checkpoint_filename( start_iter) if last_save is not None and not os.path.isfile(last_save): print( "[!] WARNING: Could not find last checkpoint file specified as " + last_save) last_save = None test_NLL = None # Possible test performance determined in the end of the training # Initialize tensorboard writer writer = SummaryWriter(self.checkpoint_path) # Function for saving model. Add here in the dictionary necessary parameters that should be saved def save_train_model(iteration, only_weights=True): if no_model_checkpoints: return checkpoint_dict = { "iteration": iteration, "best_save_dict": best_save_dict, "evaluation_dict": evaluation_dict } self.save_model(iteration, checkpoint_dict, save_optimizer=not only_weights) # Function to export the current results to a txt file def export_result_txt(): if best_save_iter is not None: with open(os.path.join(self.checkpoint_path, "results.txt"), "w") as f: f.write("Best validation performance: %s\n" % (str(best_save_dict["metric"]))) f.write( "Best iteration: %i\n" % int(str(best_save_iter).split("_")[-1].split(".")[0])) f.write("Best checkpoint: %s\n" % str(best_save_iter)) f.write("Detailed metrics\n") for metric_name, metric_val in best_save_dict[ "detailed_metrics"].items(): f.write("-> %s: %s\n" % (metric_name, str(metric_val))) if "test" in best_save_dict and best_save_dict[ "test"] is not None: f.write("Test - Detailed metrics\n") for metric_name, metric_val in best_save_dict[ "test"].items(): f.write("[TEST] -> %s: %s\n" % (metric_name, str(metric_val))) f.write("\n") # "Trackers" are moving averages. We use them to log the loss and time needed per training iteration time_per_step = Tracker() train_losses = Tracker() # Try-catch if user terminates try: index_iter = -1 self.model.eval() self.task.initialize() print("=" * 50 + "\nStarting training...\n" + "=" * 50) self.model.train() print("Performing initial evaluation...") self.model.eval() eval_NLL, detailed_scores = self.task.eval(initial_eval=True) self.model.train() write_dict_to_tensorboard(writer, detailed_scores, base_name="eval", iteration=start_iter) for index_iter in range(start_iter, int(max_iterations)): # Training step start_time = time.time() loss = self.task.train_step(iteration=index_iter) self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(parameters_to_optimize, max_gradient_norm) if self.model.model_name in ["DAF", "DBF"]: torch.nn.utils.clip_grad_norm_(self.model.base_log_probs, max_gradient_norm) self.optimizer.step() if self.optimizer.param_groups[0]['lr'] > self.lr_minimum: self.lr_scheduler.step() end_time = time.time() time_per_step.add(end_time - start_time) train_losses.add(loss.item()) # Statement for detecting NaN values if torch.isnan(loss).item(): print("[!] ERROR: Loss is NaN!" + str(loss.item())) for name, param in self.model.named_parameters(): if param.requires_grad: if torch.isnan(param).sum() > 0: print("[!] ERROR: Parameter %s has %s NaN values!\n" % (name, str(torch.isnan(param).sum())) + \ "Grad values NaN: %s.\n" % (str(torch.isnan(param.grad).sum()) if param.grad is not None else "no gradients") + \ "Grad values avg: %s.\n" % (str(param.grad.abs().mean()) if param.grad is not None else "no gradients") + \ "Last loss: %s" % (str(loss))) # Printing current loss etc. for debugging if (index_iter + 1) % loss_freq == 0: loss_avg = train_losses.get_mean(reset=True) bpd_avg = self.task.loss_to_bpd(loss_avg) train_time_avg = time_per_step.get_mean(reset=True) max_memory = torch.cuda.max_memory_allocated( device=get_device( )) / 1.0e9 if torch.cuda.is_available() else -1 print( "Training iteration %i|%i (%4.2fs). Loss: %6.5f, Bpd: %6.4f [Mem: %4.2fGB]" % (index_iter + 1, max_iterations, train_time_avg, loss_avg, bpd_avg, max_memory)) writer.add_scalar("train/loss", loss_avg, index_iter + 1) writer.add_scalar("train/bpd", bpd_avg, index_iter + 1) writer.add_scalar("train/learning_rate", self.optimizer.param_groups[0]['lr'], index_iter + 1) writer.add_scalar("train/training_time", train_time_avg, index_iter + 1) self.task.add_summary(writer, index_iter + 1, checkpoint_path=self.checkpoint_path) # Performing evaluation every "eval_freq" steps if (index_iter + 1) % eval_freq == 0: self.model.eval() eval_NLL, detailed_scores = self.task.eval() self.model.train() write_dict_to_tensorboard(writer, detailed_scores, base_name="eval", iteration=index_iter + 1) # If model performed better on validation than any other iteration so far => save it and eventually replace old model if eval_NLL < best_save_dict["metric"]: best_save_iter = self.get_checkpoint_filename( index_iter + 1) best_save_dict["metric"] = eval_NLL best_save_dict["detailed_metrics"] = detailed_scores if not os.path.isfile(best_save_iter): print("Saving model at iteration " + str(index_iter + 1)) if best_save_dict[ "file"] is not None and os.path.isfile( best_save_dict["file"]): print("Removing checkpoint %s..." % best_save_dict["file"]) os.remove(best_save_dict["file"]) if last_save is not None and os.path.isfile( last_save): print("Removing checkpoint %s..." % last_save) os.remove(last_save) best_save_dict["file"] = best_save_iter last_save = best_save_iter save_train_model(index_iter + 1) self.task.export_best_results(self.checkpoint_path, index_iter + 1) export_result_txt() evaluation_dict[index_iter + 1] = best_save_dict["metric"] # Independent of evaluation, the model is saved every "save_freq" steps. This prevents loss of information if model does not improve for a while if (index_iter + 1) % save_freq == 0 and not os.path.isfile( self.get_checkpoint_filename(index_iter + 1)): save_train_model(index_iter + 1) if last_save is not None and os.path.isfile( last_save) and last_save != best_save_iter: print("Removing checkpoint %s..." % last_save) os.remove(last_save) last_save = self.get_checkpoint_filename(index_iter + 1) ## End training loop # Before testing, load best model and check whether its validation performance is in the right range (to prevent major loading issues) if not no_model_checkpoints and best_save_iter is not None: load_model(best_save_iter, model=self.model, optimizer=self.optimizer, lr_scheduler=self.lr_scheduler) eval_NLL, detailed_scores = self.task.eval() if eval_NLL != best_save_dict["metric"]: if abs(eval_NLL - best_save_dict["metric"]) > 1e-1: print( "[!] WARNING: new evaluation significantly differs from saved one (%s vs %s)! Probably a mistake in the saving/loading part..." % (str(eval_NLL), str(best_save_dict["metric"]))) else: print( "[!] WARNING: new evaluation sligthly differs from saved one (%s vs %s)." % (str(eval_NLL), str(best_save_dict["metric"]))) else: print("Using last model as no models were saved...") # Testing the trained model test_NLL, detailed_scores = self.task.test() print("=" * 50 + "\nTest performance: %lf" % (test_NLL)) detailed_scores["original_NLL"] = test_NLL best_save_dict["test"] = detailed_scores self.task.finalize_summary(writer, max_iterations, self.checkpoint_path) # If user terminates training early, replace last model saved per "save_freq" steps by current one except KeyboardInterrupt: if index_iter > 0: print( "User keyboard interrupt detected. Saving model at step %i..." % (index_iter)) save_train_model(index_iter + 1) else: print( "User keyboard interrupt detected before starting to train." ) if last_save is not None and os.path.isfile(last_save) and not any( [val == last_save for _, val in best_save_dict.items()]): os.remove(last_save) export_result_txt() writer.close()
def init_model(args): import os print(os.getcwd()) source_train_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=True, path="../../data/OfficeCaltech/images/") target_train_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=True, path="../../data/OfficeCaltech/images/") source_evaluate_loader = get_loader( name_dataset=args.source, batch_size=args.batch_size, train=False, path="../../data/OfficeCaltech/images/") target_evaluate_loader = get_loader( name_dataset=args.target, batch_size=args.batch_size, train=False, path="../../data/OfficeCaltech/images/") n_classes = len(source_train_loader.dataset.classes) # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet" # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py model = alexnet(pretrained=True) # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31) model.classifier[6] = nn.Linear(4096, n_classes) # ~ Paper : and initialized with N(0, 0.005) torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3) # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init) model.classifier[6].bias.data.fill_(0.01) model = model.to(device=args.device) # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch." optimizer = torch.optim.SGD( [ { 'params': model.features.parameters() }, { 'params': model.classifier[:6].parameters() }, # fc8 -> 7th element (index 6) in the Sequential block { 'params': model.classifier[6].parameters(), 'lr': 10 * args.lr } ], lr=args.lr, momentum=args.momentum, weight_decay=args.decay) # if not specified, the default lr is used tracker = Tracker() for i in range(args.epochs): train(model, optimizer, source_train_loader, target_train_loader, tracker, args, i) evaluate(model, source_evaluate_loader, 'source', tracker, args, i) evaluate(model, target_evaluate_loader, 'target', tracker, args, i) # Save logged classification loss, coral loss, source accuracy, target accuracy torch.save(tracker.to_dict(), args.da_loss + "_log.pth") print("Final Evaluation\r") return evaluate(model, target_evaluate_loader, 'target', tracker, args, i)
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ # Initialise the class infer_network = Network() # Set Probability threshold for detections prob_threshold = args.prob_threshold #Load the model through `infer_network` infer_network.load_model(args.model, device=args.device) # Create a flag for single images image_flag = False # Check if the input is a webcam if args.input == 'CAM': args.input = 0 elif args.input.endswith(('.jpg', '.bmp', '.png')): image_flag = True # If the input file is not a video, stop the program elif not args.input.endswith(('.mp4', '.avi')): sys.exit( f"The format of the input file '{args.input.endswith}' is not supported." ) #Handle the input stream cap = cv2.VideoCapture(args.input) cap.open(args.input) # Grab the shape of the input and the frame rate width = int(cap.get(3)) height = int(cap.get(4)) fps = cap.get(cv2.CAP_PROP_FPS) if not image_flag: # Create a video writer for the output video # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')` # on Mac, and `0x00000021` on Linux out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) min_frame_count = 3 # minimum number of consecutive frame a pedestrian needs to be detected in else: out = None min_frame_count = 0 # minimum number of consecutive frame a pedestrian needs to be detected in # Initialize the list of tracked vehicle list_tracked_pedestrians = [] list_trackers = [] # List of all trackers set_id_pedestrians = set() # Set of all the pedestrians in total previous_count = 0 #Loop until stream is over while cap.isOpened(): #Read from the video capture flag, frame = cap.read() if not flag: break key_pressed = cv2.waitKey(60) #Pre-process the image as needed net_input_shape = infer_network.get_input_shape() p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) #Start asynchronous inference for specified request infer_network.async_inference(p_frame) # Wait for the result if infer_network.wait() == 0: # Get the results of the inference request result = infer_network.get_output() # Detect the objects in the new frame list_detections = infer_network.postprocess_output( result, width, height, args.prob_threshold) # Update the position of the tracked pedestrians list_trackers, list_detections, list_trackers_removed = updateTrackers( list_trackers, list_detections) # Add the remaining detections as new tracked pedestrians for detection in list_detections: x, y, w, h = detection list_trackers.append(Tracker(x, y, w, h)) # Get the list of detected pedestrians (trackers detected in more than min_frame_count) list_tracked_pedestrians = [ tracker for tracker in list_trackers if len(tracker.list_centroids) >= min_frame_count ] # Draw all the tracked vehicles in the current frame for pedestrian in list_tracked_pedestrians: pedestrian.drawOnFrame(frame) # --- Extract any desired stats from the results --- # Update the list of total pedestrians set_id_pedestrians = set_id_pedestrians.union( set([p.id for p in list_tracked_pedestrians])) # Number of pedestrians in the current frame current_count = len(list_tracked_pedestrians) # Total of pedestrians detected since the beginning of the video total_count = len(set_id_pedestrians) # Publish the results in the person topic if current_count != previous_count: previous_count = current_count client.publish( "person", json.dumps({ "count": current_count, "total": total_count })) # Get the total duration a person stayed in the frame when he/she leave the frame duration_min = 10 # minimum frame a tracker needs to exist for its duration to be taken in account if list_trackers_removed: list_duration = [ len(p.list_centroids) * 1 / fps for p in list_trackers_removed if len(p.list_centroids) > duration_min ] if list_duration: duration = mean(list_duration) client.publish("person/duration", json.dumps({"duration": duration})) # Send frame to the ffmpeg server sys.stdout.buffer.write(frame) sys.stdout.flush() # Write out the frame if image_flag: cv2.imwrite('output_image.jpg', frame) else: # cv2.putText(frame, f"{current_cout} | {total_count}", (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), thickness = 1) out.write(frame) # Break if escape key pressed if key_pressed == 27: break # Release the out writer, capture, and destroy any OpenCV windows if not image_flag: out.release() cap.release() cv2.destroyAllWindows()
def clear() : # empty the active peer list Tracker.clear_all_clients() msg = {'status' : 'success'} return jsonify(msg)
def main(): # Paper: In the training phase, we set the batch size to 128, # base learning rate to 10−3, weight decay to 5×10−4, and momentum to 0.9 parser = argparse.ArgumentParser( description='Train - Evaluate DeepCORAL model') parser.add_argument('--disable_cuda', action='store_true', help='Disable CUDA') parser.add_argument('--epochs', type=int, default=50, help='Number of total epochs to run') parser.add_argument('--batch_size', type=int, default=128, help='Batch size') parser.add_argument('--lr', default=1e-3, help='Learning Rate') parser.add_argument('--decay', default=5e-4, help='Decay of the learning rate') parser.add_argument('--momentum', default=0.9, help="Optimizer's momentum") parser.add_argument('--lambda_coral', type=float, default=0.5, help="Weight that trades off the adaptation with " "classification accuracy on the source domain") parser.add_argument('--source', default='amazon', help="Source Domain (dataset)") parser.add_argument('--target', default='webcam', help="Target Domain (dataset)") args = parser.parse_args() args.device = None if not args.disable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda') else: args.device = torch.device('cpu') source_train_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=True) target_train_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=True) source_evaluate_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=False) target_evaluate_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=False) n_classes = len(source_train_loader.dataset.classes) # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet" # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py model = alexnet(pretrained=True) # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31) model.classifier[6] = nn.Linear(4096, n_classes) # ~ Paper : and initialized with N(0, 0.005) torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3) # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init) model.classifier[6].bias.data.fill_(0.01) model = model.to(device=args.device) # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch." optimizer = torch.optim.SGD( [ { 'params': model.features.parameters() }, { 'params': model.classifier[:6].parameters() }, # fc8 -> 7th element (index 6) in the Sequential block { 'params': model.classifier[6].parameters(), 'lr': 10 * args.lr } ], lr=args.lr, momentum=args.momentum) # if not specified, the default lr is used tracker = Tracker() for i in range(args.epochs): train(model, optimizer, source_train_loader, target_train_loader, tracker, args, i) evaluate(model, source_evaluate_loader, 'source', tracker, args, i) evaluate(model, target_evaluate_loader, 'target', tracker, args, i) # Save logged classification loss, coral loss, source accuracy, target accuracy torch.save(tracker.to_dict(), "log.pth")
def track_keypoints(args): print("Tracking sekd keypoints with args: {0}. \n".format(args)) print("Init feature extractor using SEKD.") feature_extractor = nets.get_sekd_model( args.model_name, weights_path=args.weights_path, confidence_threshold=args.conf_thresh, nms_radius=args.nms_radius, max_keypoints=args.max_keypoints, cuda=args.cuda, multi_scale=args.multi_scale, sub_pixel_location=args.sub_pixel_location) print("Init video stream from {0}.".format(args.input)) video_stream = Video(args.input, args.camera_id, args.img_ext) print("Init tracker.") tracker = Tracker(args.max_length) # Create a window to display the result. if not args.no_display: window = 'SEKD Tracker' cv2.namedWindow(window) else: print('Do not show the results via GUI window.') # Create output directory if desired. if args.save_keypoints: print('Will save keypoints to {0}.'.format(args.keypoints_dir)) if not os.path.exists(args.keypoints_dir): os.makedirs(args.keypoints_dir) if args.save_tracks: print('Will save tracks to {0}.'.format(args.tracks_dir)) if not os.path.exists(args.tracks_dir): os.makedirs(args.tracks_dir) print('Processing each frame ...') while True: # Get a new image. img = video_stream.next_frame() if img is None: print('All frames have been processed.') if not args.no_display: print('Press any key to quit.') cv2.waitKey(0) cv2.destroyAllWindows() break # Resize img. if img.shape[0] > args.max_height or img.shape[1] > args.max_width: resize_ratio = min(args.max_height / img.shape[0], args.max_width / img.shape[1]) img = cv2.resize(img, (int(resize_ratio * img.shape[1]), int(resize_ratio * img.shape[0]))) # Get points and descriptors. keypoints, descriptors = feature_extractor.detectAndCompute(img) # Save points and descriptors. if args.save_keypoints: img_name = video_stream.name_list[video_stream.i - 1] keypoints_filepath = os.path.join(args.keypoints_dir, img_name) print('Save keypoints to {0}'.format(keypoints_filepath)) np.savez(keypoints_filepath, keypoints=keypoints, descriptors=descriptors) # Update tracks with the keypoints and descriptors. tracker.track(keypoints, descriptors) # Draw keypoint tracks on the input image. img_out = (img * 255.).astype('uint8') img_out = tracker.draw_tracks(img_out) # Save tracks. if args.save_tracks: img_name = video_stream.name_list[video_stream.i - 1] img_name = str(video_stream.i - 1).zfill(5) + '.png' tracks_filepath = os.path.join(args.tracks_dir, img_name) print('Save tracks to {0}'.format(tracks_filepath)) cv2.imwrite(tracks_filepath, img_out) tracks_filepath = os.path.join(args.tracks_dir, img_name[:-4]) np.savez(tracks_filepath, tracks_backward=tracker.tracks_backward[-1]) # Display visualization image to screen. if not args.no_display: cv2.imshow(window, img_out) key = cv2.waitKey(1) & 0xFF if key == ord('q'): print('\'q\' has been pressed, quitting ...') cv2.destroyAllWindows() break print('Finshed tracking keypoints.')