def experiment_4(exp_name, model_file, method): print('Running Task B experiment', exp_name) SAVE_PATH = os.path.join('./results_week_6', exp_name + '_' + method) os.makedirs(SAVE_PATH, exist_ok=True) # Loading data print('Loading data') virtualoader = VirtualKitti() kittiloader = KittiMots() def rkitti_val(): return kittiloader.get_dicts(flag='val') def rkitti_test(): return kittiloader.get_dicts(flag='test') DatasetCatalog.register('KITTI_val', rkitti_val) MetadataCatalog.get('KITTI_val').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTI_test', rkitti_test) MetadataCatalog.get('KITTI_test').set( thing_classes=list(KITTI_CATEGORIES.keys())) virtual = virtualoader.get_dicts() for per in [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]: print('Iteration 100% Virtual & {0}% Real'.format(per * 100)) if os.path.isfile(os.path.join(SAVE_PATH, 'metrics.json')): os.remove(os.path.join(SAVE_PATH, 'metrics.json')) def vkitti_train(): real = kittiloader.get_dicts(flag='train', method=method, percentage=per) all_data = virtual + real return all_data catalog_name = 'ALL_train_{0}'.format(int(per * 10)) DatasetCatalog.register(catalog_name, vkitti_train) MetadataCatalog.get(catalog_name).set( thing_classes=list(KITTI_CATEGORIES.keys())) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = (catalog_name, ) cfg.DATASETS.TEST = ('KITTI_val', ) cfg.DATALOADER.NUM_WORKERS = 4 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 4000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') cfg.DATASETS.TEST = ('KITTI_test', ) evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') filename = 'validation_loss_{0}.png'.format(int(per * 10)) plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, exp_name, SAVE_PATH, filename) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = rkitti_test() inputs = [inputs[i] for i in TEST_INFERENCE_VALUES] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) os.makedirs(os.path.join(SAVE_PATH, str(int(per * 10))), exist_ok=True) cv2.imwrite( os.path.join( SAVE_PATH, str(int(per * 10)), 'Inference_' + exp_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def train(model_name, root_dir, dataset_mode, max_iter): # output folder to save models output_dir = os.path.join('train_results', model_name + '_' + dataset_mode) os.makedirs(output_dir, exist_ok=True) # get folders depending on dataset_mode folders_train = [] folders_test = [] for curr_dir in os.listdir(root_dir): with open(os.path.join(root_dir, curr_dir, 'meta.json')) as f: meta = json.load(f) if meta["set"] == "train_good_weather": folders_train.append(curr_dir) elif meta[ "set"] == "train_good_and_bad_weather" and dataset_mode == "good_and_bad_weather": folders_train.append(curr_dir) elif meta["set"] == "test": folders_test.append(curr_dir) def gen_boundingbox(bbox, angle): theta = np.deg2rad(-angle) R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) points = np.array([[bbox[0], bbox[1]], [bbox[0] + bbox[2], bbox[1]], [bbox[0] + bbox[2], bbox[1] + bbox[3]], [bbox[0], bbox[1] + bbox[3]]]).T cx = bbox[0] + bbox[2] / 2 cy = bbox[1] + bbox[3] / 2 T = np.array([[cx], [cy]]) points = points - T points = np.matmul(R, points) + T points = points.astype(int) min_x = np.min(points[0, :]) min_y = np.min(points[1, :]) max_x = np.max(points[0, :]) max_y = np.max(points[1, :]) return min_x, min_y, max_x, max_y def get_radar_dicts(folders): dataset_dicts = [] idd = 0 folder_size = len(folders) for folder in folders: radar_folder = os.path.join(root_dir, folder, 'Navtech_Cartesian') annotation_path = os.path.join(root_dir, folder, 'annotations', 'annotations.json') with open(annotation_path, 'r') as f_annotation: annotation = json.load(f_annotation) radar_files = os.listdir(radar_folder) radar_files.sort() for frame_number in range(len(radar_files)): record = {} objs = [] bb_created = False idd += 1 filename = os.path.join(radar_folder, radar_files[frame_number]) if (not os.path.isfile(filename)): print(filename) continue record["file_name"] = filename record["image_id"] = idd record["height"] = 1152 record["width"] = 1152 for object in annotation: if (object['bboxes'][frame_number]): class_obj = object['class_name'] if (class_obj != 'pedestrian' and class_obj != 'group_of_pedestrians'): bbox = object['bboxes'][frame_number]['position'] angle = object['bboxes'][frame_number]['rotation'] bb_created = True if cfg.MODEL.PROPOSAL_GENERATOR.NAME == "RRPN": cx = bbox[0] + bbox[2] / 2 cy = bbox[1] + bbox[3] / 2 wid = bbox[2] hei = bbox[3] obj = { "bbox": [cx, cy, wid, hei, angle], "bbox_mode": BoxMode.XYWHA_ABS, "category_id": 0, "iscrowd": 0 } else: xmin, ymin, xmax, ymax = gen_boundingbox( bbox, angle) obj = { "bbox": [xmin, ymin, xmax, ymax], "bbox_mode": BoxMode.XYXY_ABS, "category_id": 0, "iscrowd": 0 } objs.append(obj) if bb_created: record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts dataset_train_name = dataset_mode + '_train' dataset_test_name = dataset_mode + '_test' DatasetCatalog.register(dataset_train_name, lambda: get_radar_dicts(folders_train)) MetadataCatalog.get(dataset_train_name).set(thing_classes=["vehicle"]) DatasetCatalog.register(dataset_test_name, lambda: get_radar_dicts(folders_test)) MetadataCatalog.get(dataset_test_name).set(thing_classes=["vehicle"]) cfg_file = os.path.join('test', 'config', model_name + '.yaml') cfg = get_cfg() cfg.OUTPUT_DIR = output_dir cfg.merge_from_file(cfg_file) cfg.DATASETS.TRAIN = (dataset_train_name, ) cfg.DATASETS.TEST = (dataset_test_name, ) cfg.DATALOADER.NUM_WORKERS = 2 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.STEPS: (25000, 35000) cfg.SOLVER.MAX_ITER = max_iter cfg.SOLVER.BASE_LR = 0.00025 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2 cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8, 16, 32, 64, 128]] os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) if cfg.MODEL.PROPOSAL_GENERATOR.NAME == "RRPN": trainer = RotatedTrainer(cfg) else: trainer = Trainer(cfg) trainer.resume_or_load(resume=resume) trainer.train()
def task_a_KITTI_training(model_name, model_file): #model_name = model_name + '_inference' print('Running task A for model', model_name) SAVE_PATH = os.path.join('./results_week_5_task_a', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTIMOTS_train', ) cfg.DATASETS.TEST = ('MOTS_train', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('MOTS_train', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = mots_train() inputs = inputs[:20] + inputs[-20:] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=20000) parser.add_argument('--cpu-only', action='store_true') parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument("strategy_file") args = parser.parse_args() # Schema for strategy file # Each key has a value specfying comment and a boolean as to whether # it is required strategy_schema = { 'capture_process': ('If present run this process after lauching capture thread', False), 'broadcast_process': ('If present run this process before starting capture', False), 'detector': { 'size': ('Array representing size of network [H,W]', True), 'threshold': ('Keep threshold', True), 'names': ('Array of class names', True) } } with open(args.strategy_file, 'r') as fp: strategy = yaml.safe_load(fp) dims = [*strategy['detector']['size'], 3] frame_interval = strategy['detector'].get('interval', 1) num_buffers = 64 free_queue = multiprocessing.Queue(num_buffers) process_queue = multiprocessing.Queue(num_buffers) buffers = [] for x in range(num_buffers): buffers.append(RawArray(ctypes.c_uint8, dims[0] * dims[1] * dims[2])) free_queue.put(x) backbone = strategy['detector'].get( 'backbone', 'COCO-Detection/retinanet_R_50_FPN_3x.yaml') config = strategy['detector']['config'] weights = strategy['detector']['weights'] # Make a temporary work dir, or fetch from pipeline arguments temp_work_dir = os.getenv("TATOR_WORK_DIR") temp_gc = None if temp_work_dir is None: temp_gc = tempfile.TemporaryDirectory() temp_work_dir = temp_gc.name def handle_potential_fetch(path): # If config/weights are URLs, download them locally if path.startswith('http://') or path.startswith('https://'): local_name = path.split('/')[-1] local_path = os.path.join(temp_work_dir, local_name) print(f"Downloading '{path}' to '{local_path}'") download_file(path, local_path) return local_path else: return path config = handle_potential_fetch(config) weights = handle_potential_fetch(weights) cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(backbone)) cfg.merge_from_file(config) cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.3 cfg.MODEL.WEIGHTS = weights # path to the model file model = build_model(cfg) checkpointer = DetectionCheckpointer(model) checkpointer.load(cfg.MODEL.WEIGHTS) model = model.eval().cuda() # Seperate NMS model_nms = torchvision.ops.nms nms_threshold = strategy['detector'].get('nms_threshold', 0.55) if strategy.get('broadcast_process', None): print("Broadcast mode enabled") publish_queue = multiprocessing.Queue(num_buffers) broadcast = multiprocessing.Process(None, broadcast_thread, args=(buffers, publish_queue, free_queue, strategy)) broadcast.start() else: publish_queue = None if strategy.get('save', None): print("Save mode enabled") save_queue = multiprocessing.Queue(num_buffers) save = multiprocessing.Process(None, save_thread, args=(save_queue, strategy)) save.start() else: save_queue = None server = multiprocessing.Process(None, server_thread, args=(buffers, free_queue, process_queue, dims, strategy)) server.start() print("Loaded model") buffer_idx, media_id, frame_count = process_queue.get() names = strategy['detector']['names'] current = {"boxes": [], "scores": [], "classes": []} begin = time.time() while buffer_idx is not None: if frame_count % frame_interval == 0: bgr = np.frombuffer(buffers[buffer_idx], dtype=np.uint8).reshape(dims) blob = torch.as_tensor(bgr.transpose(2, 0, 1)).cuda() results = model([{"image": blob}]) # Process results on CPU cpu_results = results[0] cpu_results["instances"] = cpu_results["instances"][model_nms( cpu_results["instances"].pred_boxes.tensor, cpu_results["instances"].scores, nms_threshold, ).to("cpu").tolist()] instance_dict = cpu_results["instances"].get_fields() pred_boxes = instance_dict["pred_boxes"] scores = instance_dict["scores"] pred_classes = instance_dict["pred_classes"] current = {"boxes": [], "scores": [], "classes": []} for box, score, cls in zip(pred_boxes, scores, pred_classes): if score > strategy['detector']['threshold']: current['boxes'].append( np.array(box.tolist(), dtype=np.uint32)) current['scores'].append(score.tolist()) current['classes'].append(cls.tolist()) if current['boxes']: if save_queue: current['frame'] = frame_count current['media'] = media_id save_queue.put(current) if publish_queue: for box, score, label_id in zip(current['boxes'], current['scores'], current['classes']): drawBox(bgr, box, score, names[label_id]) frame_data = bgr.tobytes() if publish_queue: publish_queue.put(buffer_idx) else: free_queue.put(buffer_idx) else: if current['boxes'] and publish_queue: bgr = np.frombuffer(buffers[buffer_idx], dtype=np.uint8).reshape(dims) for box, score, label_id in zip(current['boxes'], current['scores'], current['classes']): drawBox(bgr, box, score, names[label_id]) frame_data = bgr.tobytes() if publish_queue: publish_queue.put(buffer_idx) else: free_queue.put(buffer_idx) buffer_idx, media_id, frame_count = process_queue.get() if args.verbose: if (frame_count + 1) % 100 == 0: duration = time.time() - begin time_per_frame = duration / 100 fps = 1.0 / time_per_frame print( f"total_frames={frame_count}, graph time={time_per_frame},graph fps = {fps}, Depth= {free_queue.qsize()}" ) begin = time.time() server.join() if save_queue: save_queue.put(None) save.join() if publish_queue: save_queue.put(None) broadcast.join()
def train_task(model_name, model_file): path = os.path.join(SAVE_PATH, 'train_task', model_name) if not os.path.exists(path): os.makedirs(path) # Load Data print('Loading Data.') dataloader = KITTI_Dataloader() def kitti_train(): return dataloader.get_dicts(train_flag=True) def kitti_test(): return dataloader.get_dicts(train_flag=False) DatasetCatalog.register("KITTI_train", kitti_train) MetadataCatalog.get("KITTI_train").set(thing_classes=[k for k,_ in CATEGORIES.items()]) DatasetCatalog.register("KITTI_test", kitti_test) MetadataCatalog.get("KITTI_test").set(thing_classes=[k for k,_ in CATEGORIES.items()]) # Load MODEL and configure train hyperparameters print('Loading Model.') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('KITTI_train',) cfg.DATASETS.TEST = ('KITTI_test',) cfg.DATALOADER.NUM_WORKERS = 0 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = NUM_IMGS // cfg.SOLVER.IMS_PER_BATCH + 1 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 9 # TRAIN!! print('Training.......') os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train() print('Training Done.') # EVAL print('Evaluating......') cfg.TEST.KEYPOINT_OKS_SIGMAS cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth') cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 predictor = DefaultPredictor(cfg) dataset_dicts = kitti_test() for i,d in enumerate(random.sample(dataset_dicts, 5)): im = cv2.imread(d['file_name']) outputs = predictor(im) v = Visualizer(im[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite(os.path.join(path, 'Evaluation_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1]) print('COCO EVALUATOR....') evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir="./output/") trainer.test(cfg, trainer.model, evaluators=[evaluator]) # Loading training and test examples inference_dataloader = Inference_Dataloader(MIT_DATA_DIR) inference_dataset = inference_dataloader.load_data() # Qualitative results: visualize some prediction results on MIT_split dataset for i, img_path in enumerate([i for i in inference_dataset['test'] if 'inside_city' in i][:20]): img = cv2.imread(img_path) outputs = predictor(img) v = Visualizer( img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite(os.path.join(path, 'Inference_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1]) """ val_loader = build_detection_test_loader(cfg, 'KITTI_test') inference_on_dataset(trainer.model, val_loader, evaluator) """ print('DONE!!')
def process(self): self.percent.emit(0) cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(self.detector_name)) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(self.detector_name) predictor = DefaultPredictor(cfg) metric = NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget) tracker = Tracker(metric, max_iou_distance=self.max_iou_distance, max_age=self.max_age, n_init=self.n_init) extractor = Extractor(self.tracker_path, use_cuda=True) self.percent.emit(5) out_file = open(self.tracks_path, 'w') self.video = cv2.VideoCapture(self.video_path) counter = 0 frames_count = self.video.get(cv2.CAP_PROP_FRAME_COUNT) current_percent = 0 det_time = 0 trac_time = 0 timeAll1 = time.time() success, frame = self.video.read() while success and not self.stopped: counter += 1 if current_percent != int((counter / frames_count) * 95): current_percent = int((counter / frames_count) * 95) self.percent.emit(current_percent + 5) time1 = time.time() outputs = predictor(frame) time2 = time.time() preds = self.getBboxs(outputs["instances"].to("cpu")) features = self.get_features(preds[:, :4].astype(np.int32), frame, extractor) bbox_tlwh = self.xyxy_to_xywh(preds[:, :4]) detections = [ Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(preds[:, 4]) if conf > self.min_confidence ] boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = non_max_suppression(boxes, self.nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) time3 = time.time() det_time += (time2 - time1) trac_time += (time3 - time2) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr().astype(np.int32) print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (counter, track.track_id, bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]), file=out_file) success, frame = self.video.read() timeAll2 = time.time() print(det_time, trac_time, det_time + trac_time) print(timeAll2 - timeAll1) out_file.close() self.video.release() if self.stopped: os.remove(self.tracks_path) else: self.finished.emit()
def setup_config(args, random_seed=None, is_testing=False): """ Sets up config node with probabilistic detectron elements. Also sets up a fixed random seed for all scientific computing libraries, and sets up all supported datasets as instances of coco. Args: args (Namespace): args from argument parser random_seed (int): set a fixed random seed throughout torch, numpy, and python is_testing (bool): set to true if inference. If true function will return an error if checkpoint directory not already existing. Returns: (CfgNode) detectron2 config object """ # Get default detectron config file cfg = get_cfg() add_detr_config(cfg) add_probabilistic_config(cfg) # Update default config file with custom config file configs_dir = core.configs_dir() args.config_file = os.path.join(configs_dir, args.config_file) cfg.merge_from_file(args.config_file) # Add dropout rate for faster RCNN box head cfg.MODEL.ROI_BOX_HEAD.DROPOUT_RATE = cfg.MODEL.PROBABILISTIC_MODELING.DROPOUT_RATE # Update config with inference configurations. Only applicable for when in # probabilistic inference mode. if args.inference_config != "": args.inference_config = os.path.join(configs_dir, args.inference_config) cfg.merge_from_file(args.inference_config) # Create output directory model_name = os.path.split(os.path.split(args.config_file)[0])[-1] dataset_name = os.path.split( os.path.split(os.path.split(args.config_file)[0])[0])[-1] cfg['OUTPUT_DIR'] = os.path.join(core.data_dir(), dataset_name, model_name, os.path.split(args.config_file)[-1][:-5], 'random_seed_' + str(random_seed)) if is_testing: if not os.path.isdir(cfg['OUTPUT_DIR']): raise NotADirectoryError( "Checkpoint directory {} does not exist.".format( cfg['OUTPUT_DIR'])) os.makedirs(cfg['OUTPUT_DIR'], exist_ok=True) # copy config file to output directory copyfile( args.config_file, os.path.join(cfg['OUTPUT_DIR'], os.path.split(args.config_file)[-1])) # Freeze config file cfg['SEED'] = random_seed cfg.freeze() # Initiate default setup default_setup(cfg, args) # Setup logger for probabilistic detectron module setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="Probabilistic Detectron") # Set a fixed random seed for all numerical libraries if random_seed is not None: torch.manual_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) # Setup datasets if args.image_corruption_level != 0: image_root_corruption_prefix = '_' + str(args.image_corruption_level) else: image_root_corruption_prefix = None dataset_dir = os.path.expanduser(args.dataset_dir) # Handle cases when this function has been called multiple times. In that case skip fully. # Todo this is very bad practice, should fix. try: setup_all_datasets( dataset_dir, image_root_corruption_prefix=image_root_corruption_prefix) return cfg except AssertionError: return cfg
def evaluate(args): logger.info("Start recommendation task!") os.makedirs(args.output_path, exist_ok=True) with open(args.config_path, encoding="utf-8") as f: configs = yaml.load(f, Loader=yaml.FullLoader) #logger.info(configs) dataset = Dataset(args.input_path, args.data_name) d = args.test_folder_name DatasetCatalog.register(f"{args.data_name}_" + d, lambda d=d: dataset.get_fashion_dicts(d)) MetadataCatalog.get(f"{args.data_name}_" + d).set( thing_classes=configs['Detectron2']['LABEL_LIST'][args.data_name]) experiment_folder = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") model_idx = get_best_checkpoint(experiment_folder) logger.info("Build model ...") cfg = get_cfg() cfg.OUTPUT_DIR = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") cfg.merge_from_file(model_zoo.get_config_file(args.model_path)) cfg.DATASETS.TRAIN = () cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = configs['Detectron2'][ 'DATALOADER_NUM_WORKERS'] # cpu cfg.SOLVER.IMS_PER_BATCH = configs['cgd']['SOLVER_IMS_PER_BATCH'] cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = configs['Detectron2'][ 'MODEL_ROI_HEADS_BATCH_SIZE_PER_IMAGE'] # number of items in batch update cfg.MODEL.ROI_HEADS.NUM_CLASSES = len( configs['Detectron2']['LABEL_LIST'][args.data_name]) # num classes cfg.MODEL.WEIGHTS = os.path.join(experiment_folder, f"model_{model_idx.zfill(7)}.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set a custom testing threshold cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' predictor = DefaultPredictor(cfg) logger.info("Completed detectron ...") logger.info("Attach CGD model ...") # cgd model = torch.load(os.path.join(args.output_path, 'cgd_model.pt')) logger.info("Load DataFiles ...") test_dataset_dicts = dataset.get_fashion_dicts(d) # detectron detectron = predictor.model # test resize func resizefunc = T.ResizeShortestEdge( [predictor.cfg.INPUT.MIN_SIZE_TEST, predictor.cfg.INPUT.MIN_SIZE_TEST], predictor.cfg.INPUT.MAX_SIZE_TEST) assert not detectron.training, "Current detectron is training mode" logger.info("Get embedded vectors ...") roi_pooler = ROIpool(detectron) total_dict = [] total_dict = get_features(test_dataset_dicts, roi_pooler, model, configs, 'test', total_dict, resizefunc) os.makedirs('./dataset/feature_extraction', exist_ok=True) with open(f'./dataset/feature_extraction/cgd_{d}.pkl', 'wb') as f: pickle.dump(total_dict, f) # logger.info("Load PCA model ...") # with open(f'./model/pca_model.pkl', 'rb') as f: # pca = pickle.load(f) # pca_dict = get_pca(total_dict, pca) # logger.info("Dimension reduction Applied...") # with open(f'./dataset/feature_extraction/cgd_pca_{d}.pkl', 'wb') as f: # pickle.dump(pca_dict,f) logger.info("Saved embedded vectors ...")
def run(self): # Core function of your process # Call beginTaskRun for initialization self.beginTaskRun() # we use seed to keep the same color for our masks + boxes + labels (same random each time) random.seed(10) # Get input : input = self.getInput(0) srcImage = input.getImage() # Get output : mask_output = self.getOutput(0) graph_output = self.getOutput(2) # Get parameters : param = self.getParam() # Config file and model file needed are in the output folder generated by the train plugin if (self.cfg is None or param.update) and param.configFile != "": with open(param.configFile, 'r') as file: cfg_data = file.read() self.cfg = CfgNode.load_cfg(cfg_data) self.classes = self.cfg.CLASS_NAMES if self.model is None or param.update: if param.dataset == "Cityscapes": url = "https://dl.fbaipublicfiles.com/detectron2/DeepLab/Cityscapes-" \ "SemanticSegmentation/deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16/" \ "28054032/model_final_a8a355.pkl" self.cfg = get_cfg() cfg_file = os.path.join( os.path.dirname(__file__), os.path.join( "configs", "deeplab_v3_plus_R_103_os16_mg124_poly_90k_bs16.yaml")) add_deeplab_config(self.cfg) self.cfg.merge_from_file(cfg_file) self.cfg.MODEL.WEIGHTS = url self.classes = [ 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle' ] elif self.cfg is not None: self.cfg.MODEL.WEIGHTS = param.modelFile if not torch.cuda.is_available(): self.cfg.MODEL.DEVICE = "cpu" self.cfg.MODEL.RESNETS.NORM = "BN" self.cfg.MODEL.SEM_SEG_HEAD.NORM = "BN" self.model = build_model(self.cfg) DetectionCheckpointer(self.model).load(self.cfg.MODEL.WEIGHTS) self.model.eval() if self.model is not None and srcImage is not None: # Convert numpy image to detectron2 input format input = {} h, w, c = np.shape(srcImage) input["image"] = (torch.tensor(srcImage).permute(2, 0, 1)) if param.dataset == "Cityscapes": input["image"] = Resize([512, 1024])(input["image"]) input["height"] = h input["width"] = w # Inference with pretrained model with torch.no_grad(): pred = self.model([input]) pred = pred[0]["sem_seg"].cpu().numpy() # Convert logits to labelled image dstImage = (np.argmax(pred, axis=0)).astype(dtype=np.uint8) # Set image of input/output (numpy array): # dstImage +1 because value 0 is for background but no background here mask_output.setImage(dstImage) # Create random color map if self.colors is None or param.update: n = len(self.classes) self.colors = [] for i in range(n): self.colors.append([ random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 255 ]) # Apply color map on labelled image self.setOutputColorMap(1, 0, self.colors) self.forwardInputImage(0, 1) graph_output.setImage(self.draw_legend()) param.update = False # Step progress bar: self.emitStepProgress() # Call endTaskRun to finalize process self.endTaskRun()
def show_action(action_folder,rootpath): path = os.path.join(action_folder) cfg = get_cfg() # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg) file_list = os.listdir(action_folder) skeleton_data = [] frame = cv2.imread(os.path.join(path, file_list[int(len(file_list)/2)])) #frame = cv2.imread(os.path.join(path, file_list[15])) outputs = predictor(frame) # whether display the detected object on a window #v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=2) #out = v.draw_instance_predictions(outputs["instances"].to("cpu")) #cv2.imshow('window', out.get_image()[:, :, ::-1]) #cv2.imwrite("ss.jpg",out.get_image()[:, :, ::-1]) boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy() for i, newbox in enumerate(boxes): #np.array([item for item in data if item is not None]) skeleton_data_ = get_skeleton(newbox, frame, i) if skeleton_data_ is not None: skeleton_data.append(np.array(skeleton_data_)) frame_skeleton = np.array(skeleton_data) with open('./data/SkeletonData/{}.npy'.format(rootpath), 'wb') as fp: np.save(fp, frame_skeleton) #TODO tracked multiframes feature extraction """ sortedfiles = sorted(file_list, key=lambda x:int(x.split("_")[1][:-4])) skeleton_data = {} object_missing = True frame_hog = [] for index, name in enumerate(sortedfiles): hog = cv2.HOGDescriptor() frame = cv2.imread(os.path.join(path,name)) h = hog.compute(frame) frame_hog.append(np.mean(h*100)) current = sum(frame_hog) / len(frame_hog) print(current, np.mean(h*100), abs(current - np.mean(h*100))) if index == 0 or abs(current - np.mean(h*100))>1.5: frame_hog = [] multiTracker = cv2.legacy.MultiTracker_create() outputs = predictor(frame) bbox = outputs["instances"].pred_boxes.tensor.cpu().numpy() for object in bbox: #print(object) xmin, ymin = object[0],object[1] boxwidth, boxheight= object[2]-object[0], object[3]-object[1] object = (xmin,ymin,boxwidth,boxheight) multiTracker.add(inital_tracker("CSRT"), frame, object) #print(np.mean(h*100),os.path.join(path,name)) object_missing, boxes = multiTracker.update(frame) for i, newbox in enumerate(boxes): p1 = (int(newbox[0]), int(newbox[1])) p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3])) cv2.rectangle(frame, p1, p2, (randint(0, 255), randint(0, 255), randint(0, 255)), 2, 1) #print(newbox.shape,frame.shape,i) skeleton_single = get_skeleton(newbox, frame, i, index, skeleton_data) if skeleton_single is not None: if i not in skeleton_data: skeleton_data[i] = [] skeleton_data[i].append(skeleton_single) #skeleton_data.append({i:skeleton_single}) #print(skeleton_data) cv2.imshow('MultiTracker', frame) #print(skeleton_data) #v = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) #out = v.draw_instance_predictions(outputs["instances"].to("cpu")) #cv2.imshow('window', out.get_image()[:, :, ::-1]) if cv2.waitKey(25) & 0xFF == ord('q'): break with open('./data/SkeletonData/{}.json'.format(rootpath), 'w') as fp: json.dump(skeleton_data,fp) """ return 0
def task_a(model_name, model_file, evaluate=True, visualize=True): print('Running task A for model', model_name) SAVE_PATH = os.path.join('./results_week_4_task_a', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Loading data print('Loading data') dataloader = KITTIMOTS_Dataloader() def kitti_train(): return dataloader.get_dicts(train_flag=True) def kitti_val(): return dataloader.get_dicts(train_flag=False) DatasetCatalog.register('KITTIMOTS_train', kitti_train) MetadataCatalog.get('KITTIMOTS_train').set( thing_classes=list(KITTI_CATEGORIES.keys())) DatasetCatalog.register('KITTIMOTS_val', kitti_val) MetadataCatalog.get('KITTIMOTS_val').set( thing_classes=list(KITTI_CATEGORIES.keys())) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) model_training_metadata = MetadataCatalog.get( cfg.DATASETS.TRAIN[0]) # Store current model training metadata cfg.DATASETS.TRAIN = ('KITTIMOTS_train', ) cfg.DATASETS.TEST = ('KITTIMOTS_val', ) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) if evaluate: model = build_model(cfg) DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) # Evaluation print('Evaluating') evaluator = COCOEvaluator('KITTIMOTS_val', cfg, False, output_dir='./output') trainer = DefaultTrainer(cfg) trainer.test(cfg, model, evaluators=[evaluator]) if visualize: # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) inputs = kitti_val() inputs = inputs[:20] + inputs[-20:] for i, input in enumerate(inputs): img = cv2.imread(input['file_name']) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=model_training_metadata, scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join( SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def __init__(self, img): self.img = img self.model_path = 'tasks/models/Detectron2/output/leaf_seg_final.pth' self.cfg = get_cfg()
def main(): data_name = "augmented_1" data_path = os.path.join("./data", data_name) csv_name = data_name + ".csv" train_df = pd.read_csv(os.path.join(data_path, csv_name)) keypoint_names = list( map(lambda x: x[:-2], train_df.columns.to_list()[1::2])) keypoint_flip_map = [ ("left_eye", "right_eye"), ("left_ear", "right_ear"), ("left_shoulder", "right_shoulder"), ("left_elbow", "right_elbow"), ("left_wrist", "right_wrist"), ("left_hip", "right_hip"), ("left_knee", "right_knee"), ("left_ankle", "right_ankle"), ("left_palm", "right_palm"), ("left_instep", "right_instep"), ] image_list = train_df.iloc[:, 0].to_numpy() keypoints_list = train_df.iloc[:, 1:].to_numpy() train_imgs, valid_imgs, train_keypoints, valid_keypoints = train_val_split( image_list, keypoints_list, random_state=42) image_set = {"train": train_imgs, "valid": valid_imgs} keypoints_set = {"train": train_keypoints, "valid": valid_keypoints} hyper_params = { "augmented_ver": data_name, "learning_rate": 0.001, "num_epochs": 10000, "batch_size": 256, "description": "Final training" } for phase in ["train", "valid"]: DatasetCatalog.register( "keypoints_" + phase, lambda phase=phase: get_data_dicts(data_path, image_set[phase], keypoints_set[phase])) MetadataCatalog.get("keypoints_" + phase).set(thing_classes=["human"]) MetadataCatalog.get("keypoints_" + phase).set(keypoint_names=keypoint_names) MetadataCatalog.get("keypoints_" + phase).set(keypoint_flip_map=keypoint_flip_map) MetadataCatalog.get("keypoints_" + phase).set(evaluator_type="coco") cfg = get_cfg() cfg.merge_from_file( model_zoo.get_config_file( "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml")) cfg.DATASETS.TRAIN = ("keypoints_train", ) cfg.DATASETS.TEST = ("keypoints_valid", ) cfg.DATALOADER.NUM_WORKERS = 16 # On Windows environment, this value must be 0. cfg.SOLVER.IMS_PER_BATCH = 2 # mini batch size would be (SOLVER.IMS_PER_BATCH) * (ROI_HEADS.BATCH_SIZE_PER_IMAGE). cfg.SOLVER.BASE_LR = hyper_params["learning_rate"] # Learning Rate. cfg.SOLVER.MAX_ITER = hyper_params["num_epochs"] # Max iteration. cfg.SOLVER.GAMMA = 0.8 cfg.SOLVER.STEPS = [ 3000, 4000, 5000, 6000, 7000, 8000 ] # The iteration number to decrease learning rate by GAMMA. cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml") cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = hyper_params[ "batch_size"] # Use to calculate RPN loss. cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 24 cfg.TEST.KEYPOINT_OKS_SIGMAS = np.ones((24, 1), dtype=float).tolist() cfg.TEST.EVAL_PERIOD = 5000 # Evaluation would occur for every cfg.TEST.EVAL_PERIOD value. cfg.OUTPUT_DIR = os.path.join("./output", data_name) os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = Trainer(cfg) trainer.resume_or_load(resume=False) trainer.train() # Inference should use the config with parameters that are used in training # cfg now already contains everything we've set previously. We changed it a little bit for inference: cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold predictor = DefaultPredictor(cfg) test_dir = os.path.join("data", "test_imgs") test_list = os.listdir(test_dir) test_list.sort() except_list = [] files = [] preds = [] for file in tqdm(test_list): filepath = os.path.join(test_dir, file) im = cv2.imread(filepath) outputs = predictor(im) outputs = outputs["instances"].to("cpu").get("pred_keypoints").numpy() files.append(file) pred = [] try: for out in outputs[0]: pred.extend([float(e) for e in out[:2]]) except IndexError: pred.extend([0] * 48) except_list.append(filepath) preds.append(pred) df_sub = pd.read_csv("./data/sample_submission.csv") df = pd.DataFrame(columns=df_sub.columns) df["image"] = files df.iloc[:, 1:] = preds df.to_csv(os.path.join(cfg.OUTPUT_DIR, f"{data_name}_submission.csv"), index=False) if except_list: print( "The following images are not detected keypoints. The row corresponding that images names would be filled with 0 value." ) print(*except_list) save_samples(cfg.OUTPUT_DIR, test_dir, os.path.join(cfg.OUTPUT_DIR, f"{data_name}_submission.csv"), mode="random", size=5)
def setup(args): """ Create configs and perform basic setups. """ # cfg = get_cfg() dataset_path = '/raid/cesar_workspace/cesar_workspace/Object_Detection/Detectron2/detectron2/detectron2/data/Datasets/' train = dataset_path + "up_trees_train_2021" val = dataset_path + "up_trees_val_2021" train_dataset = train # cfg.DATASETS.TRAIN val_dataset = val # cfg.DATASETS.TEST # print(cfg.DATASETS.TRAIN, 'eee') dic_marks_path = dataset_path + "up_trees_labels.json" datasets_dic = {'train': train_dataset, 'val': val_dataset} dic_marks = {'0': 'up_tree'} # with open(dic_marks_path, 'w') as out: # json.dump(dic_marks, out) with open(dic_marks_path, 'r') as out: dic_marks = json.load(out) # cat_ids = {label:key for key, label in dic_marks.items()} classes = [label for key, label in dic_marks.items()] def get_board_dicts(imgdir): json_file = imgdir + '.json' # Fetch the json file print(json_file) with open(json_file) as f: dataset_dicts = json.load(f) for i in dataset_dicts: filename = i["file_name"] for j in i["annotations"]: # Setting the required Box Mode j["bbox_mode"] = BoxMode.XYWH_ABS j["category_id"] = int(j["category_id"]) return dataset_dicts # Registering the Dataset for d in ['val', 'train']: # print(datasets_dic[d]) dataset_name = os.path.basename(datasets_dic[d]) print(dataset_name) DatasetCatalog.register(dataset_name, lambda d=d: get_board_dicts(datasets_dic[d])) MetadataCatalog.get(dataset_name).set(thing_classes=classes) train_name = os.path.basename(datasets_dic['train']) val_name = os.path.basename(datasets_dic['val']) print(train_name, val_name) board_metadata = MetadataCatalog.get(train_name) dataset_dicts = get_board_dicts(train_dataset) n_imgs = len(dataset_dicts) dataset_dicts = get_board_dicts(val_dataset) n_imgs_val = len(dataset_dicts) print('Number of images on training data is :', n_imgs) cfg = get_cfg() add_tridentnet_config(cfg) # cfg.DATALOADER.NUM_WORKERS = 2 cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.DATASETS.TRAIN = (train_name, ) cfg.DATASETS.TEST = (val_name, ) cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # No. of iterations after which the Validation Set is evaluated. cfg.TEST.EVAL_PERIOD = (n_imgs // cfg.SOLVER.IMS_PER_BATCH) cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes) path = '/raid/cesar_workspace/cesar_workspace/Object_Detection/Detectron/detectron2/ImageNetPretrained/MSRA/' os.makedirs(path, exist_ok=True) backbone = os.path.basename(cfg.MODEL.WEIGHTS) print('Number of images on training data is :', n_imgs, n_imgs_val) backbone += '.pkl' if '.pkl' not in backbone else '' weight = path + backbone print(weight) if not os.path.isfile(weight): print("Downloading ImageNet weights") url_weights = weights_catalog[backbone] + backbone urllib.request.urlretrieve(url_weights, weight) cfg.MODEL.WEIGHTS = weight print(weight) # cfg.OUTPUT_DIR = './output_%s_X-101_b/'%accr print(cfg, '~~ I dedicate this to Shadow Moon ~~') default_setup(cfg, args) cfg.freeze() return cfg
def evaluate(args): set_seed(args.seed) os.makedirs(args.output_path, exist_ok=True) with open(args.config_path) as f: configs = yaml.load(f, Loader=yaml.FullLoader) logger.info(configs) dataset = Dataset(args.input_path, args.data_name) for d in ["train", "val", 'test']: DatasetCatalog.register(f"{args.data_name}_" + d, lambda d=d: dataset.get_fashion_dicts(d)) MetadataCatalog.get(f"{args.data_name}_" + d).set( thing_classes=configs['Detectron2']['LABEL_LIST'][args.data_name]) experiment_folder = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") model_idx = get_best_checkpoint(experiment_folder) cfg = get_cfg() cfg.OUTPUT_DIR = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") cfg.merge_from_file(model_zoo.get_config_file(args.model_path)) cfg.DATASETS.TRAIN = (f'{args.data_name}_train', ) cfg.DATASETS.TEST = (f'{args.data_name}_val', ) cfg.DATALOADER.NUM_WORKERS = configs['Detectron2'][ 'DATALOADER_NUM_WORKERS'] # cpu cfg.SOLVER.IMS_PER_BATCH = configs['cgd']['SOLVER_IMS_PER_BATCH'] cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = configs['Detectron2'][ 'MODEL_ROI_HEADS_BATCH_SIZE_PER_IMAGE'] # number of items in batch update cfg.MODEL.ROI_HEADS.NUM_CLASSES = len( configs['Detectron2']['LABEL_LIST'][args.data_name]) # num classes cfg.MODEL.WEIGHTS = os.path.join(experiment_folder, f"model_{model_idx.zfill(7)}.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set a custom testing threshold predictor = DefaultPredictor(cfg) train_loader = build_detection_test_loader(cfg, f'{args.data_name}_train') val_loader = build_detection_test_loader(cfg, f'{args.data_name}_val') test_loader = build_detection_test_loader(cfg, f'{args.data_name}_test') # cgd model = torch.load(os.path.join(args.output_path, 'cgd_model.pt')) # detectron detectron = predictor.model assert not detectron.training, "Current detectron is training mode" roi_pooler = ROIpool(detectron) total_dict = [] total_dict = get_features(train_loader, roi_pooler, model, configs, 'train', total_dict) total_dict = get_features(val_loader, roi_pooler, model, configs, 'val', total_dict) total_dict = get_features(test_loader, roi_pooler, model, configs, 'test', total_dict) os.makedirs('./dataset/feature_extraction', exist_ok=True) with open(f'./dataset/feature_extraction/cgd.pkl', 'wb') as f: pickle.dump(total_dict, f) pca_dict, pca = get_pca(total_dict) with open(f'./dataset/feature_extraction/cgd_pca.pkl', 'wb') as f: pickle.dump(pca_dict, f) with open(f'./model/pca_model.pkl', 'wb') as f: pickle.dump(pca, f)
def test_rrpn(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" backbone = build_backbone(cfg) proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = RotatedBoxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, [gt_instances[0], gt_instances[1]]) expected_losses = { "loss_rpn_cls": torch.tensor(0.043263837695121765), "loss_rpn_loc": torch.tensor(0.14432406425476074), } for name in expected_losses.keys(): err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( name, proposal_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) expected_proposal_boxes = [ RotatedBoxes( torch.tensor([ [ 0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873 ], [ 15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475 ], [ -3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040 ], [ 16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227 ], [ 0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738 ], [ 8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409 ], [ 16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737 ], [ 5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970 ], [ 17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134 ], [ 0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086 ], [ -4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125 ], [ 7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789 ], ])), RotatedBoxes( torch.tensor([ [ 0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899 ], [ -3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234 ], [ 20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494 ], [ 15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994 ], [ 9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251 ], [ 15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217 ], [ 8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078 ], [ 16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463 ], [ 9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767 ], [ 1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884 ], [ 17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270 ], [ 5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991 ], [ 0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784 ], [ -5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201 ], ])), ] expected_objectness_logits = [ torch.tensor([ 0.10111768, 0.09112845, 0.08466332, 0.07589971, 0.06650183, 0.06350251, 0.04299347, 0.01864817, 0.00986163, 0.00078543, -0.04573630, -0.04799230, ]), torch.tensor([ 0.11373727, 0.09377633, 0.05281663, 0.05143715, 0.04040275, 0.03250912, 0.01307789, 0.01177734, 0.00038105, -0.00540255, -0.01194804, -0.01461012, -0.03061717, -0.03599222, ]), ] torch.set_printoptions(precision=8, sci_mode=False) for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits): self.assertEqual(len(proposal), len(expected_proposal_box)) self.assertEqual(proposal.image_size, im_size) # It seems that there's some randomness in the result across different machines: # This test can be run on a local machine for 100 times with exactly the same result, # However, a different machine might produce slightly different results, # thus the atol here. err_msg = "computed proposal boxes = {}, expected {}".format( proposal.proposal_boxes.tensor, expected_proposal_box.tensor) self.assertTrue( torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5), err_msg, ) err_msg = "computed objectness logits = {}, expected {}".format( proposal.objectness_logits, expected_objectness_logit) self.assertTrue( torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5), err_msg, )
def train(args): set_seed(args.seed) os.makedirs(args.output_path, exist_ok=True) with open(args.config_path) as f: configs = yaml.load(f, Loader=yaml.FullLoader) logger.info(configs) dataset = Dataset(args.input_path, args.data_name) for d in ["train", "val"]: DatasetCatalog.register(f"{args.data_name}_" + d, lambda d=d: dataset.get_fashion_dicts(d)) MetadataCatalog.get(f"{args.data_name}_" + d).set( thing_classes=configs['Detectron2']['LABEL_LIST'][args.data_name]) experiment_folder = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") model_idx = get_best_checkpoint(experiment_folder) cfg = get_cfg() cfg.OUTPUT_DIR = os.path.join(args.output_path, f"{args.data_name}_{args.model_name}") cfg.merge_from_file(model_zoo.get_config_file(args.model_path)) cfg.DATASETS.TRAIN = (f'{args.data_name}_train', ) cfg.DATASETS.TEST = (f'{args.data_name}_val', ) cfg.DATALOADER.NUM_WORKERS = configs['Detectron2'][ 'DATALOADER_NUM_WORKERS'] # cpu cfg.SOLVER.IMS_PER_BATCH = configs['cgd']['SOLVER_IMS_PER_BATCH'] cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = configs['Detectron2'][ 'MODEL_ROI_HEADS_BATCH_SIZE_PER_IMAGE'] # number of items in batch update cfg.MODEL.ROI_HEADS.NUM_CLASSES = len( configs['Detectron2']['LABEL_LIST'][args.data_name]) # num classes cfg.MODEL.WEIGHTS = os.path.join(experiment_folder, f"model_{model_idx.zfill(7)}.pth") cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set a custom testing threshold predictor = DefaultPredictor(cfg) train_loader = build_detection_train_loader(cfg) val_loader = build_detection_val_loader(cfg, cfg.DATASETS.TEST[0], DatasetMapper(cfg, True)) device = predictor.model.device selector = BatchHardTripletSelector().to(device) triple_loss = TripletLoss( margin=configs['cgd']['TRIPLE_MARGIN']).to(device) ce_loss = LabelSmoothingCrossEntropy( smoothing=configs['cgd']['SMOOTHING'], temperature_scale=configs['cgd']['TEMP_SCALE']).to(device) ### #cgd= torch.load("./model/middle_cgd_model.pt") cgd = CGD(configs['cgd']['GD_CONFIG'], configs['cgd']['FEATURE_DIM'], configs['cgd']['BASE_DIM'], configs['cgd']['NUM_CLASS']).to(device) optimizer = Adam(cgd.parameters(), lr=configs['cgd']['LR']) lr_scheduler = MultiStepLR(optimizer, milestones=[ int(0.6 * configs['cgd']['MAX_ITERS']), int(0.8 * configs['cgd']['MAX_ITERS']) ], gamma=0.1) detectron = predictor.model assert not detectron.training, "Current detectron is training mode" roi_pooler = ROIpool(detectron) trainer = Trainer(detectron, roi_pooler, selector, ce_loss, triple_loss, optimizer=optimizer, scheduler=lr_scheduler, save_path=os.path.join(args.output_path, 'cgd_model.pt')) trainer.train(train_loader, val_loader, cgd, configs['cgd']['MAX_ITERS'], eval_period=configs['cgd']['EVAL_PERIOD'])
def test_rpn(self): torch.manual_seed(121) cfg = get_cfg() backbone = build_backbone(cfg) proposal_generator = RPN(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = Boxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, [gt_instances[0], gt_instances[1]]) expected_losses = { "loss_rpn_cls": torch.tensor(0.0804563984), "loss_rpn_loc": torch.tensor(0.0990132466), } for name in expected_losses.keys(): err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( name, proposal_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) expected_proposal_boxes = [ Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])), Boxes( torch.tensor([ [0, 0, 30, 20], [0, 0, 16.7862777710, 13.1362524033], [0, 0, 30, 13.3173446655], [0, 0, 10.8602609634, 20], [7.7165775299, 0, 27.3875980377, 20], ])), ] expected_objectness_logits = [ torch.tensor([0.1225359365, -0.0133192837]), torch.tensor([ 0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837 ]), ] for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits): self.assertEqual(len(proposal), len(expected_proposal_box)) self.assertEqual(proposal.image_size, im_size) self.assertTrue( torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)) self.assertTrue( torch.allclose(proposal.objectness_logits, expected_objectness_logit))
def test_rrpn(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" backbone = build_backbone(cfg) proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = RotatedBoxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, [gt_instances[0], gt_instances[1]]) expected_losses = { "loss_rpn_cls": torch.tensor(0.04291602224), "loss_rpn_loc": torch.tensor(0.145077362), } for name in expected_losses.keys(): err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( name, proposal_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) expected_proposal_box = torch.tensor([ [-1.77999556, 0.78155339, 68.04367828, 14.78156471, 60.59333801], [13.82740974, -1.50282836, 34.67269897, 29.19676590, -3.81942749], [8.10392570, -0.99071521, 145.39100647, 32.13126373, 3.67242432], [5.00000000, 4.57370186, 10.00000000, 9.14740372, 0.89196777], ]) expected_objectness_logit = torch.tensor( [0.10924313, 0.09881870, 0.07649877, 0.05858029]) torch.set_printoptions(precision=8, sci_mode=False) self.assertEqual(len(proposals), len(image_sizes)) proposal = proposals[0] # It seems that there's some randomness in the result across different machines: # This test can be run on a local machine for 100 times with exactly the same result, # However, a different machine might produce slightly different results, # thus the atol here. err_msg = "computed proposal boxes = {}, expected {}".format( proposal.proposal_boxes.tensor, expected_proposal_box) self.assertTrue( torch.allclose(proposal.proposal_boxes.tensor[:4], expected_proposal_box, atol=1e-5), err_msg, ) err_msg = "computed objectness logits = {}, expected {}".format( proposal.objectness_logits, expected_objectness_logit) self.assertTrue( torch.allclose(proposal.objectness_logits[:4], expected_objectness_logit, atol=1e-5), err_msg, )
def process(self): my_logger = MyBarLogger(self.message, self.progress) my_logger(message="Detectron2 - Initializing the predictor") # Detectron2 默认设置 cfg = get_cfg() # PointRend 设置 add_pointrend_config(cfg) # 从该文件读取PointRend的参数设置 cfg.merge_from_file( "projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml" ) # 阈值,若阈值过低推断速度会很慢 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # 读取预训练模型的权重参数 cfg.MODEL.WEIGHTS = "datasets/model_final_3c3198.pkl" if not torch.cuda.is_available(): cfg.MODEL.DEVICE = 'cpu' predictor = DefaultPredictor(cfg) # 逐帧过滤不需要的类别的数据,为to_mask转换做准备 def custom_frame(frame): _frame = frame.copy() output = predictor(_frame) instances = output['instances'].to('cpu') data = { 'classes': instances.pred_classes.numpy(), 'boxes': instances.pred_boxes.tensor.numpy(), 'masks': instances.pred_masks.numpy(), 'scores': instances.scores.numpy() } # 设定接收人类的数据 data = process(data, target_class=[class_names.index('person')]) result = custom_show(_frame, data['masks']) return result # 以最终帧高度为准,使所有目标素材的帧高度相同,在concatenate的compose模式下可保证不因拉伸而失真 for i in range(len(self.targets)): self.targets[i] = self.targets[i].fx(vfx.resize, height=self.height) for i in range(len(self.backgrounds)): self.backgrounds[i] = self.backgrounds[i].fx( vfx.resize, (self.width, self.height)) # concatenate简单拼接 target = concatenate_videoclips(self.targets, method="compose").without_audio() background = concatenate_videoclips(self.backgrounds).without_audio() # 计算总时长,若有音频则拼接音频 audio = None duration = min(target.duration, background.duration) if self.audios: audio = concatenate_audioclips(self.audios) duration = min(target.duration, background.duration, audio.duration) # 把目标的识别结果——size为(n,w,h)的ndarray转换为mask,该mask表明它所属的片段哪些部分在背景上可见 mask_clip = target.fl_image(custom_frame).to_mask() # 在目标或背景上进行高斯模糊 if self.gauss_target: target = target.fl_image(blur) if self.gauss_background: background = background.fl_image(blur) # 在目标上添加抖音效果 if self.tiktok: target = target.fl_image(tiktok_effect) # 分身效果 if self.triple: temp = self.triple_effect(target, mask_clip, width=self.width, height=self.height) temp.insert(0, background) else: # set_mask使得被识别为True的部分在背景上可见 target = target.set_mask(mask_clip).set_position( "center", "center") temp = [background, target] # 拼接所有目标素材 final_clip = CompositeVideoClip(temp).set_audio(audio). \ set_duration(duration) if audio else CompositeVideoClip(temp).set_duration(duration) # 导出为文件 final_clip.write_videofile( f'./output/{time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())}.mp4', fps=30, codec='mpeg4', bitrate="8000k", audio_codec="libmp3lame", threads=4, logger=my_logger) self.finish_process.emit()
def task_b(model_name, model_file, percentage, augmentation=False): try: dataloader_train_v_r = Virtual_Real_KITTI() def virtual_real_kitti(): return dataloader_train_v_r.get_dicts(percentage) DatasetCatalog.register('VirtualReal', virtual_real_kitti) MetadataCatalog.get('VirtualReal').set( thing_classes=list(KITTI_CATEGORIES.keys())) except: print("VirtualReal already defined!") model_name = model_name + '_inference' print('Running task B for model', model_name) SAVE_PATH = os.path.join('./results_week_6_task_b', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('VirtualReal', ) cfg.DATASETS.TEST = ('KITTIMOTS_test', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH #load saved model '''checkpoint = '/home/grupo04/jobs_w6/results_week_6_task_b/MaskRCNN_R_50_FPN_Cityscapes_2_inference/model_final.pth' last_checkpoint = torch.load(checkpoint) new_path = checkpoint.split('.')[0] + '_modified.pth' last_checkpoint['iteration'] = -1 torch.save(last_checkpoint, new_path) cfg.MODEL.WEIGHTS = new_path''' #load a model form detectron2 model zoo cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 print(cfg) # Training print('Training') if augmentation: print("data augmentation") trainer = OurTrainer(cfg) else: print("NO data augmentation") trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('KITTIMOTS_test', cfg, False, output_dir='./output') trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') draw_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = kitti_val() inputs = inputs[:20] + inputs[-20:] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
#visualize print("show global path & path_point") # this doesn't show on ssh log-in plt.imshow(image_color) plt.show() origin = args.origin resolution = 0.05 # object categories that you want to detect. ex) tv, person, cellphone etc print("objects to find : ", args.object_category) # Setup for detecting object: pipe = rs.pipeline() cfg = rs.config() # config for pyrealsense cfg2 = get_cfg() # config for detectron cfg2.merge_from_file( model_zoo.get_config_file( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg2.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well cfg2.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg2) # set configuration for realsense. frame size is set 848 x 480. speed to 30 fps. cfg.enable_stream(rs.stream.depth, 848, 480, rs.format.z16, 30) cfg.enable_stream(rs.stream.color, 848, 480, rs.format.bgr8, 30) # put local radius for local path planning
def retrain_detector(settings): """ settings: properties to be used in the retraining process Splits the COCO-formatted data located in annotation_path, then trains and evaluates a Detectron2 model from scratch. The resulting model is saved in the model_path/ folder. Returns an object mapping different AP (average precision) metrics to the model's scores. """ if len(settings) == 0: settings["trainSplit"] = 0.7 settings["learningRate"] = 0.005 settings["maxIters"] = 100 base_path = "annotation_data/" coco_path = os.path.join(base_path, "coco") output_path = os.path.join(base_path, "output") annotation_path = os.path.join(coco_path, "coco_results.json") train_path = os.path.join(coco_path, "train.json") test_path = os.path.join(coco_path, "test.json") # 1) Split coco json file into train and test using cocosplit code # Adapted from https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py with open(annotation_path, "rt", encoding="UTF-8") as annotations_file: # Extract info from json coco = json.load(annotations_file) info = coco["info"] licenses = coco["licenses"] images = coco["images"] annotations = coco["annotations"] categories = coco["categories"] # Remove images without annotations images_with_annotations = set( map(lambda a: int(a["image_id"]), annotations)) images = list( filter(lambda i: i["id"] in images_with_annotations, images)) # Split images and annotations x_images, y_images = train_test_split( images, train_size=settings["trainSplit"]) x_ids = list(map(lambda i: int(i["id"]), x_images)) x_annots = list( filter(lambda a: int(a["image_id"]) in x_ids, annotations)) y_ids = list(map(lambda i: int(i["id"]), y_images)) y_annots = list( filter(lambda a: int(a["image_id"]) in y_ids, annotations)) # Save to file def save_coco(file, info, licenses, images, annotations, categories): with open(file, 'wt', encoding="UTF-8") as coco: json.dump( { "info": info, "licenses": licenses, "images": images, "annotations": annotations, "categories": categories }, coco, indent=2, sort_keys=True) save_coco(train_path, info, licenses, x_images, x_annots, categories) save_coco(test_path, info, licenses, y_images, y_annots, categories) # 2) Use train/test files to retrain detector dataset_name = "annotation_coco" image_dir = base_path + "rgb/" train_data = dataset_name + "_train" test_data = dataset_name + "_test" DatasetCatalog.clear() MetadataCatalog.clear() register_coco_instances(train_data, {}, train_path, image_dir) register_coco_instances(test_data, {}, test_path, image_dir) MetadataCatalog.get(train_data) coco_yaml = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(coco_yaml)) cfg.DATASETS.TRAIN = (train_data, ) cfg.DATASETS.TEST = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(categories) cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url( coco_yaml) # Let training initialize from model zoo cfg.OUTPUT_DIR = output_path cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = settings["learningRate"] # Make sure LR is good cfg.SOLVER.MAX_ITER = settings[ "maxIters"] # 300 is good for small datasets # Train os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train() # Move model to most recent model folder model_dir = os.path.join(base_path, "model") model_names = os.listdir(model_dir) # Get highest x for model/vx model_dirs = list( filter(lambda n: os.path.isdir(os.path.join(model_dir, n)), model_names)) model_nums = list(map(lambda x: int(x.split("v")[1]), model_dirs)) last_model_num = max(model_nums) # Add model to new folder model_path = os.path.join(model_dir, "v" + str(last_model_num)) new_model_path = os.path.join(model_path, "model_999.pth") old_model_path = os.path.join(output_path, "model_final.pth") os.replace(old_model_path, new_model_path) # Evaluate evaluator = COCOEvaluator(test_data, ("bbox", "segm"), False, output_dir="../../annotation_data/output/") val_loader = build_detection_test_loader(cfg, test_data) inference = inference_on_dataset(trainer.model, val_loader, evaluator) # inference keys: bbox, semg # bbox and segm keys: AP, AP50, AP75, APs, APm, AP1, AP-category1, ... inference_json = json.loads(json.dumps(inference).replace("NaN", "null")) return inference_json
def __init__(self): self.visualize = False self.verbose = False self.save_imgs = True self.plot_loss = True # st() # these are all map names a = np.arange(1, 30) b = np.arange(201, 231) c = np.arange(301, 331) d = np.arange(401, 431) abcd = np.hstack((a,b,c,d)) mapnames = [] for i in list(abcd): mapname = 'FloorPlan' + str(i) mapnames.append(mapname) train_len = int(0.9 * len(mapnames)) random.shuffle(mapnames) self.mapnames_train = mapnames[:train_len] self.mapnames_val = mapnames[train_len:] # self.num_episodes = len(self.mapnames) self.ignore_classes = [] # classes to save self.include_classes = [ 'ShowerDoor', 'Cabinet', 'CounterTop', 'Sink', 'Towel', 'HandTowel', 'TowelHolder', 'SoapBar', 'ToiletPaper', 'ToiletPaperHanger', 'HandTowelHolder', 'SoapBottle', 'GarbageCan', 'Candle', 'ScrubBrush', 'Plunger', 'SinkBasin', 'Cloth', 'SprayBottle', 'Toilet', 'Faucet', 'ShowerHead', 'Box', 'Bed', 'Book', 'DeskLamp', 'BasketBall', 'Pen', 'Pillow', 'Pencil', 'CellPhone', 'KeyChain', 'Painting', 'CreditCard', 'AlarmClock', 'CD', 'Laptop', 'Drawer', 'SideTable', 'Chair', 'Blinds', 'Desk', 'Curtains', 'Dresser', 'Watch', 'Television', 'WateringCan', 'Newspaper', 'FloorLamp', 'RemoteControl', 'HousePlant', 'Statue', 'Ottoman', 'ArmChair', 'Sofa', 'DogBed', 'BaseballBat', 'TennisRacket', 'VacuumCleaner', 'Mug', 'ShelvingUnit', 'Shelf', 'StoveBurner', 'Apple', 'Lettuce', 'Bottle', 'Egg', 'Microwave', 'CoffeeMachine', 'Fork', 'Fridge', 'WineBottle', 'Spatula', 'Bread', 'Tomato', 'Pan', 'Cup', 'Pot', 'SaltShaker', 'Potato', 'PepperShaker', 'ButterKnife', 'StoveKnob', 'Toaster', 'DishSponge', 'Spoon', 'Plate', 'Knife', 'DiningTable', 'Bowl', 'LaundryHamper', 'Vase', 'Stool', 'CoffeeTable', 'Poster', 'Bathtub', 'TissueBox', 'Footstool', 'BathtubBasin', 'ShowerCurtain', 'TVStand', 'Boots', 'RoomDecor', 'PaperTowelRoll', 'Ladle', 'Kettle', 'Safe', 'GarbageBag', 'TeddyBear', 'TableTopDecor', 'Dumbbell', 'Desktop', 'AluminumFoil', 'Window'] self.include_classes_final = [ 'Sink', 'Toilet', 'Bed', 'Book', 'CellPhone', 'AlarmClock', 'Laptop', 'Chair', 'Television', 'RemoteControl', 'HousePlant', 'Ottoman', 'ArmChair', 'Sofa', 'BaseballBat', 'TennisRacket', 'Mug', 'Apple', 'Bottle', 'Microwave', 'Fork', 'Fridge', 'WineBottle', 'Cup', 'ButterKnife', 'Toaster', 'Spoon', 'Knife', 'DiningTable', 'Bowl', 'Vase', 'TeddyBear', 'StoveKnob', 'StoveBurner', ] # self.include_classes = [ # 'Sink', # 'Toilet', 'Bed', 'Book', # 'CellPhone', # 'AlarmClock', 'Laptop', 'Chair', # 'Television', 'RemoteControl', 'HousePlant', # 'Ottoman', 'ArmChair', 'Sofa', 'BaseballBat', 'TennisRacket', 'Mug', # 'Apple', 'Bottle', 'Microwave', 'Fork', 'Fridge', # 'WineBottle', 'Cup', # 'ButterKnife', 'Toaster', 'Spoon', 'Knife', 'DiningTable', 'Bowl', # 'Vase', # 'TeddyBear', # ] self.action_space = {0: "MoveLeft", 1: "MoveRight", 2: "MoveAhead", 3: "MoveBack", 4: "DoNothing"} self.num_actions = len(self.action_space) cfg_det = get_cfg() cfg_det.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg_det.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1 # set threshold for this model cfg_det.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg_det.MODEL.DEVICE='cuda' self.cfg_det = cfg_det self.maskrcnn = DefaultPredictor(cfg_det) self.conf_thresh_detect = 0.7 # for initially detecting a low confident object self.conf_thresh_init = 0.8 # for after turning head toward object threshold self.conf_thresh_end = 0.9 # if reach this then stop getting obs self.BATCH_SIZE = 50 #50 # frames (not episodes) - this is approximate - it could be higher # self.percentile = 70 self.max_iters = 100000 self.max_frames = 10 self.val_interval = 10 #10 #10 self.save_interval = 50 # self.BATCH_SIZE = 2 # self.percentile = 70 # self.max_iters = 100000 # self.max_frames = 2 # self.val_interval = 1 # self.save_interval = 1 self.small_classes = [] self.rot_interval = 5.0 self.radius_max = 3.5 #3 #1.75 self.radius_min = 1.0 #1.25 self.num_flat_views = 3 self.num_any_views = 7 self.num_views = 25 self.center_from_mask = False # get object centroid from maskrcnn (True) or gt (False) self.obj_per_scene = 5 mod = 'conf05' # self.homepath = f'/home/nel/gsarch/aithor/data/test2' self.homepath = '/home/sirdome/katefgroup/gsarch/ithor/data/' + mod print(self.homepath) if not os.path.exists(self.homepath): os.mkdir(self.homepath) else: val = input("Delete homepath? [y/n]: ") if val == 'y': import shutil shutil.rmtree(self.homepath) os.mkdir(self.homepath) else: print("ENDING") assert(False) self.log_freq = 1 self.log_dir = self.homepath +'/..' + '/log_cem/' + mod if not os.path.exists(self.log_dir): os.mkdir(self.log_dir) MAX_QUEUE = 10 # flushes when this amount waiting self.writer = SummaryWriter(self.log_dir, max_queue=MAX_QUEUE, flush_secs=60) self.W = 256 self.H = 256 self.fov = 90 self.utils = Utils(self.fov, self.W, self.H) self.K = self.utils.get_habitat_pix_T_camX(self.fov) self.camera_matrix = self.utils.get_camera_matrix(self.W, self.H, self.fov) self.controller = Controller( scene='FloorPlan30', # will change gridSize=0.25, width=self.W, height=self.H, fieldOfView= self.fov, renderObjectImage=True, renderDepthImage=True, ) self.init_network() self.run_episodes()
def test(path_to_input, path_to_output, network): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/"+network+".yaml")) cfg.OUTPUT_DIR = "./code_workspace/output" cfg.MODEL.WEIGHTS = os.path.join( cfg.OUTPUT_DIR, "model_0009999.pth" ) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4 cfg.MODEL.MASK_ON = False cfg.TEST.EVAL_PERIOD = 5000 #cfg.INPUT.MIN_SIZE_TEST = 0 # cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2, 3]] # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32], [48, 64], [96, 128], [192, 256], [512, 640]] predictor = DefaultPredictor(cfg) video_cap = cv2.VideoCapture(path_to_input) video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) ret, i_frame = video_cap.read() video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_out = cv2.VideoWriter( path_to_output, fourcc, 30.0, (int(i_frame.shape[1]), int(i_frame.shape[0])), True ) total_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT) i=0 #timing t0 = time.time() #try to not do saves in between iterations, instead save all in list, then do all saving at the end out_list = [] frames = [] predict_time = 0 t2 = time.time() while True: t3 = time.time() ret, frame = video_cap.read() if (not ret): break #Produce some nice console output to show progess progress = "\r %progress: " + str(int((i/total_frames)*100)) + " " + "fps: " + str(int(i/(t3-t0))) i+=1 sys.stdout.write(progress) sys.stdout.flush() t4 = time.time() outputs = predictor(frame) t5 = time.time() predict_time += t5-t4 out_list.append(outputs["instances"].to("cpu")) frames.append(frame) t22 = time.time() inference_time = t22-t2 print() print("Inference complete, creating video") t10 = time.time() for output, frame in zip(out_list, frames): v = Visualizer( frame, MetadataCatalog.get("traffic"), scale=1, instance_mode=ColorMode.SEGMENTATION) #output.remove("scores") v = v.draw_instance_predictions(output) video_out.write(v.get_image()) t11 = time.time() print("Time to create video: ", t11-t10) #timing t1 = time.time() print("average fps: ", total_frames/inference_time) print("total time: ", t1-t0) print("%total predict: ", predict_time/(t1-t0)) print("Video produced on path: ", path_to_output) video_out.release() video_cap.release()
def process_score_image_request(): if request.method == "POST": file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # create config cfg = get_cfg() # below path applies to current installation location of Detectron2 cfgFile = "DLA_mask_rcnn_X_101_32x8d_FPN_3x.yaml" cfg.merge_from_file(cfgFile) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.MODEL.WEIGHTS = "model_final_trimmed.pth" cfg.MODEL.DEVICE = "cpu" # we use a CPU Detectron copy # boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()[0] classes = ['text', 'title', 'list', 'table', 'figure'] default_predictor = detectron2.engine.defaults.DefaultPredictor( cfg) pages = convert_from_path("/uploads/" + filename, dpi=200, fmt='jpeg') for idx, p in enumerate(pages): print(idx) # pages im = np.array(p)[:, :, ::-1] predictions = default_predictor(im) instances = predictions["instances"].to('cpu') MetadataCatalog.get(cfg.DATASETS.TEST[0]).thing_classes = [ 'text', 'title', 'list', 'table', 'figure' ] pred_classes = instances.pred_classes labels = [classes[i] for i in pred_classes] label_count = [{i: labels.count(i)} for i in labels] label_count = [ dict(y) for y in set( tuple(x.items()) for x in label_count) ] label_count = [{ k: [v, []] } for label in label_count for k, v in label.items()] print(label_count) page_label_count = {f"page {idx}": label_count} # print(page_label_count) # print(label_count) def add_content(content): for i in label_count: for k, v in i.items(): if k == label: v[1].append(content) return True boxes = instances.pred_boxes if isinstance(boxes, detectron2.structures.boxes.Boxes): boxes = boxes.tensor.numpy() else: boxes = np.asarray(boxes) from PIL import Image import math table = [] list_ = [] text = [] title = [] # content = [table] for label, bbox in zip(labels, boxes): # getting prediction bboxes from model outputs x2 = math.ceil(bbox[0]) x1 = math.ceil(bbox[1]) y2 = math.ceil(bbox[2]) y1 = math.ceil(bbox[3]) crop_img = im[x1:y1, x2:y2] if len(crop_img) <= 8: continue if label == "table": print(label) # add_content(img_(crop_img[ : , : , -1])) elif label == "list": add_content(extract_from_images(crop_img)) elif label == "title": add_content(extract_from_images(crop_img)) elif label != "figure": add_content(extract_from_images(crop_img)) # print(page_label_count) #print(page_label_count) for k, v in page_label_count.items(): # sendToNeo4j("MERGE (d:Document)-[:Page]->(p: Page {page_num: $k})", k=k) for i in v: for l, m in i.items(): # print(m) if l == 'figure': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Figure_count {figure: $m}]->(f:Figure {figure: 'figure'})", m=m[0], page=k) if l == 'text': sendToNeo4j( "UNWIND $text as text MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Paragraph_count {text: $m}]->(pa:Paragraph {text: text})", m=m[0], page=k, text=m[1]) if l == 'title': sendToNeo4j( "UNWIND $title as title MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Title_count {title: $m}]->(t:Title {title: title})", m=m[0], page=k, title=m[1]) if l == 'table': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Table_count {table: $m}]->(ta:Table {table: $table})", m=m[0], page=k, table=m[1]) if l == 'form': sendToNeo4j( "MERGE (d:Document) MERGE(d)-[:Page]->(p: Page {page_num: $page}) MERGE(p)-[:Form_count {form: $m}]->(fo:Form {form: $form})", m=m[0], page=k, form=m[1]) # sendToNeo4j('MERGE(p:Page{page:$page_label_count.keys()[0]', keys=page_label_count.keys()[0]) return render_template('index.html')
def task_b_MOTS_and_KITTI_training(model_name, model_file): # model_name = model_name + '_inference' print('Running task B for model', model_name) SAVE_PATH = os.path.join('./results_week_5_task_c', model_name) os.makedirs(SAVE_PATH, exist_ok=True) # Load model and configuration print('Loading Model') cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model_file)) cfg.DATASETS.TRAIN = ('MOTS_KITTI_train', ) cfg.DATASETS.TEST = ('KITTIMOTS_val', ) cfg.DATALOADER.NUM_WORKERS = 0 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.OUTPUT_DIR = SAVE_PATH cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) cfg.SOLVER.IMS_PER_BATCH = 4 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR" #hyperparameters #cfg.SOLVER.LR_POLICY = 'steps_with_decay' #cfg.SOLVER.STEPS = [0, 1000, 2000] #cfg.SOLVER.GAMMA = 0.1 #cfg.DATASETS.TRAIN.USE_FLIPPED = True #Eeste no va #cfg.MODEL.RPN.IOU_THRESHOLDS = [0.1, 0.9] #defatults 0.3 and 0.7 #cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]#default: [[32, 64, 128, 256, 512]] #cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] #End of hyperparameters playing cfg.SOLVER.MAX_ITER = 1000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 cfg.TEST.SCORE_THRESH = 0.5 print(cfg) # Training print('Training') trainer = DefaultTrainer(cfg) val_loss = ValidationLoss(cfg) trainer.register_hooks([val_loss]) trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1] trainer.resume_or_load(resume=False) trainer.train() # Evaluation print('Evaluating') evaluator = COCOEvaluator('KITTIMOTS_val', cfg, False, output_dir=SAVE_PATH) trainer.model.load_state_dict(val_loss.weights) trainer.test(cfg, trainer.model, evaluators=[evaluator]) print('Plotting losses') plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH) # Qualitative results: visualize some results print('Getting qualitative results') predictor = DefaultPredictor(cfg) predictor.model.load_state_dict(trainer.model.state_dict()) inputs = kitti_val() # inputs = inputs[:20] + inputs[-20:] inputs = inputs[220:233] + inputs[1995:2100] for i, input in enumerate(inputs): file_name = input['file_name'] print('Prediction on image ' + file_name) img = cv2.imread(file_name) outputs = predictor(img) v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=0.8, instance_mode=ColorMode.IMAGE) v = v.draw_instance_predictions(outputs['instances'].to('cpu')) cv2.imwrite( os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
def initialize(self, opt): BaseModel.initialize(self, opt) self.long_term = [0, 1, 10, 20, 40] self.alpha1 = opt.alpha1 self.alpha2 = opt.alpha2 self.alpha = opt.alpha # load/define networks self.netG_A_encoder = networks.define_G_encoder( opt.input_nc, opt.output_nc, opt.ngf, opt, opt.norm, not opt.no_dropout, opt.init_type, self.gpu_ids, opt.saliency, opt.multisa) self.netG_A_decoder = networks.define_G_decoder( opt.input_nc, opt.output_nc, opt.ngf, opt, opt.norm, not opt.no_dropout, opt.init_type, self.gpu_ids, opt.multisa) self.netM = networks.define_convs(self.netG_A_encoder.channel_size() * 2, 1, opt.M_layers, opt.M_size, gpu_ids=self.gpu_ids) self.netM2 = networks.define_convs(self.netG_A_encoder.channel_size() * 2, 1, opt.M_layers, opt.M_size, gpu_ids=self.gpu_ids) # ~~~~~~ if opt.saliency: cfg = get_cfg() point_rend.add_pointrend_config(cfg) cfg.merge_from_file( "/home/linchpin/Documents/ink_stylize/ChipGAN_release/models/detectron2_repo/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml" ) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = "/home/linchpin/Documents/ink_stylize/ChipGAN_release/pretrained_models/model_final_3c3198.pkl" self.NetIS = build_model(cfg) checkpointer = DetectionCheckpointer(self.NetIS) checkpointer.load(cfg.MODEL.WEIGHTS) self.NetIS.eval() if len(self.opt.gpu_ids) == 0: self.NetIS.cpu() for param in self.NetIS.parameters(): param.requires_grad = False self.pwc_model = PWCnet.PWCNet().eval() if len(self.opt.gpu_ids) != 0: self.pwc_model.cuda() model_path = './pretrained_models/network-default.pytorch' self.pwc_model.load_state_dict(torch.load(model_path)) for param in self.pwc_model.parameters(): param.requires_grad = False # ~~~~~~ kw = 3 g_kernel = self.gauss_kernel(kw, 3, 1).transpose((3, 2, 1, 0)) self.gauss_conv_kw = nn.Conv2d(1, 1, kernel_size=kw, stride=1, padding=(kw - 1) // 2, bias=False) self.gauss_conv_kw.weight.data.copy_(torch.from_numpy(g_kernel)) self.gauss_conv_kw.weight.requires_grad = False if len(self.opt.gpu_ids) != 0: self.gauss_conv_kw.cuda() which_epoch = opt.which_epoch self.load_network(self.netG_A_encoder, 'G_A_encoder', which_epoch) self.load_network(self.netG_A_decoder, 'G_A_decoder', which_epoch) self.load_network(self.netM, "M", which_epoch) self.load_network(self.netM2, "M2", which_epoch) # self.netG_A_decoder.eval() # self.netG_A_encoder.eval() # self.netM.eval() # self.netM2.eval() self.pwc_model.eval() print('---------- Networks initialized -------------') networks.print_network(self.netG_A_encoder) networks.print_network(self.netG_A_decoder) networks.print_network(self.netM) print('-----------------------------------------------')
type=str, default='0', help='which configuration of cross validation to use') return parser.parse_args(args) if __name__ == "__main__": args = parse_args() model = 'COCO-Detection/' + args.model + '.yaml' print('[INFO] Using model: ', model) ###-------TRAIN----------------------------- cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file(model)) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model) cfg.OUTPUT_DIR = '/home/group02/week3/results/' + args.model + '/lr_' + str( args.lr).replace('.', '_') + '_iter_' + str( args.iter) + '_batch_' + str( args.batch) + '/' + args.set_config + '/' os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) thing_classes = ['Car', 'Pedestrian'] map_classes = {1: 0, 2: 1} dataset = 'KITTI-MOTS'
def doit(self): data_path = 'data/genome/' vg_classes = [] with open(os.path.join(data_path, 'objects_vocab.txt')) as f: for object in f.readlines(): vg_classes.append(object.split(',')[0].lower().strip()) vg_attrs = [] with open(os.path.join(data_path, 'attributes_vocab.txt')) as f: for object in f.readlines(): vg_attrs.append(object.split(',')[0].lower().strip()) MetadataCatalog.get("vg").thing_classes = vg_classes MetadataCatalog.get("vg").attr_classes = vg_attrs cfg = get_cfg() cfg.merge_from_file( "./configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml" ) cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # VG Weight cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr_original.pkl" predictor = DefaultPredictor(cfg) with torch.no_grad(): raw_height, raw_width = self.raw_image.shape[:2] print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform( self.raw_image).apply_image(self.raw_image) print("Transformed image size: ", image.shape[:2]) image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{ "image": image, "height": raw_height, "width": raw_width }] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Generate proposals with RPN proposals, _ = predictor.model.proposal_generator( images, features, None) proposal = proposals[0] print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [x.proposal_boxes for x in proposals] features = [ features[f] for f in predictor.model.roi_heads.in_features ] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 print('Pooled features size:', feature_pooled.shape) # Predict classes and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) outputs = FastRCNNOutputs( predictor.model.roi_heads.box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, predictor.model.roi_heads.smooth_l1_beta, ) probs = outputs.predict_probs()[0] boxes = outputs.predict_boxes()[0] attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Note: BUTD uses raw RoI predictions, # we use the predicted boxes instead. # boxes = proposal_boxes[0].tensor # NMS for nms_thresh in np.arange(0.5, 1.0, 0.1): instances, ids = fast_rcnn_inference_single_image( boxes, probs, image.shape[1:], score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=self.NUM_OBJECTS) if len(ids) >= self.NUM_OBJECTS: break instances = detector_postprocess(instances, raw_height, raw_width) roi_features = feature_pooled[ids].detach() max_attr_prob = max_attr_prob[ids].detach() max_attr_label = max_attr_label[ids].detach() instances.attr_scores = max_attr_prob instances.attr_classes = max_attr_label print(instances) print((roi_features).size()) return roi_features