Example #1
0
def train_task(model_name, model_file):
    path = os.path.join(SAVE_PATH, 'train_task', model_name)
    if not os.path.exists(path):
        os.makedirs(path)
    # Load Data
    print('Loading Data.')
    dataloader = KITTI_Dataloader()
    def kitti_train(): return dataloader.get_dicts(train_flag=True)
    def kitti_test(): return dataloader.get_dicts(train_flag=False)
    DatasetCatalog.register("KITTI_train", kitti_train)
    MetadataCatalog.get("KITTI_train").set(thing_classes=[k for k,_ in CATEGORIES.items()])
    DatasetCatalog.register("KITTI_test", kitti_test)
    MetadataCatalog.get("KITTI_test").set(thing_classes=[k for k,_ in CATEGORIES.items()])

    # Load MODEL and configure train hyperparameters
    print('Loading Model.')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('KITTI_train',)
    cfg.DATASETS.TEST = ('KITTI_test',)
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = NUM_IMGS // cfg.SOLVER.IMS_PER_BATCH + 1 
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 9

    # TRAIN!!
    print('Training.......')
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    trainer.train()
    print('Training Done.')

    # EVAL
    print('Evaluating......')
    cfg.TEST.KEYPOINT_OKS_SIGMAS
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth')
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    predictor = DefaultPredictor(cfg)
    dataset_dicts = kitti_test()
    for i,d in enumerate(random.sample(dataset_dicts, 5)):    
        im = cv2.imread(d['file_name'])
        outputs = predictor(im)
        v = Visualizer(im[:, :, ::-1],
                   metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                   scale=0.8, 
                   instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(os.path.join(path, 'Evaluation_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
    print('COCO EVALUATOR....')
    evaluator = COCOEvaluator('KITTI_test', cfg, False, output_dir="./output/")
    trainer.test(cfg, trainer.model, evaluators=[evaluator])

    # Loading training and test examples
    inference_dataloader = Inference_Dataloader(MIT_DATA_DIR)
    inference_dataset = inference_dataloader.load_data()

    # Qualitative results: visualize some prediction results on MIT_split dataset
    for i, img_path in enumerate([i for i in inference_dataset['test'] if 'inside_city' in i][:20]):
        img = cv2.imread(img_path)
        outputs = predictor(img)
        v = Visualizer(
            img[:, :, ::-1],
            metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),

            scale=0.8, 
            instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(os.path.join(path, 'Inference_' + model_name + '_trained_' + str(i) + '.png'), v.get_image()[:, :, ::-1])
    
    """
    val_loader = build_detection_test_loader(cfg, 'KITTI_test')
    inference_on_dataset(trainer.model, val_loader, evaluator)
    """
    print('DONE!!')
Example #2
0
if __name__ == "__main__":
    args = parser.parse_args()

    output_dir = args.output_dir

    d = args.path_to_pkl
    dataset_name = "mtsd"
    DatasetCatalog.register(dataset_name, lambda d=d: load_obj(d))
    MetadataCatalog.get(dataset_name).set(thing_classes=CATEGORIES)
    Meta_data = MetadataCatalog.get(dataset_name)
    #print(Meta_data)
    dataset_dicts = load_obj(d)

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9  # set threshold for this model
    cfg.MODEL.WEIGHTS = args.path_to_model
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(CATEGORIES)
    predictor = DefaultPredictor(cfg)
    cfg.OUTPUT_DIR = output_dir

    #path_to_argo_dir = os.path.join('argoverse-tacking', 'train1')

    path_to_argo_dir = 'ring_front_center'

    validate_on_argoverse_single_seq(cfg, Meta_data, path_to_argo_dir)

    #validate_on_argoverse_multiple_seqs(cfg, Meta_data, path_to_argo_dir)
Example #3
0
        


    # Visualizing datasets
    # train_dicts = get_train_dicts()
    # for d in random.sample(train_dicts, 30):
    #     print(d)
    #     img = cv2.imread(d["file_name"])
    #     visualizer = Visualizer(img[:,:,::-1], metadata=openimages_train_metadata, scale=0.5)
    #     vis = visualizer.draw_dataset_dict(d)
    #     cv2.imshow("image", vis.get_image()[:,:,::-1])
    #     cv2.waitKey()


    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'))
    cfg.DATASETS.TRAIN = ("openimages_train",)
    cfg.DATASETS.TEST = ()
<<<<<<< HEAD
    cfg.MODEL.WEIGHTS = 'output/model_0054999_wo_solver_states.pth'
    cfg.DATALOADER.NUM_WORKERS = 2
=======
    cfg.MODEL.WEIGHTS = 'projects/CenterMask2/configs/model_0199999.pth'
    cfg.DATALOADER.NUM_WORKERS = 0
>>>>>>> refs/remotes/origin/master
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.0007
    cfg.SOLVER.GAMMA = 0.2
    cfg.SOLVER.STEPS = (40000,)
    cfg.SOLVER.MAX_ITER = 100000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8
Example #4
0
if len(sys.argv) < 2:
    print("usage:python detectron.py [input_Image]")
    sys.exit(0)

im = cv2.imread(sys.argv[1])

if im is None:
    print("file open fail")
    sys.exit(0)

cfg = get_cfg()

# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
# start path in configs [dir]
fileName = "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
model = model_zoo.get_config_file(fileName)

cfg.merge_from_file(model)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.merge_from_list(['MODEL.DEVICE', 'cpu'])

# Find a model from detectron2's model zoo. You can either use the https://dl.fbaipublicfiles.... url, or use the detectron2:// shorthand
cfg.MODEL.WEIGHTS = "detectron2://" + model_zoo.get_weight_suffix(fileName)
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

# We can use `Visualizer` to draw the predictions on the image.
v = Visualizer(im[:, :, ::-1],
               MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
               scale=1.0)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
Example #5
0
# %%
# Create our Detectron2 model
# ----------------------------
#
# Next, we create a detectron2 config and a detectron2 `DefaultPredictor` to
# run predictions on the new images.
#
# - We use a pre-trained Faster R-CNN with a ResNet-50 backbone
# - We use an MS COCO pre-trained model from detectron2

cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
###cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
###cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)


# %%
# We use this little helper method to overlay the model predictions on a
# given image.
def predict_and_overlay(model, filename):
    # helper method to run the model on an image and overlay the predictions
    im = cv2.imread(filename)
    out = model(im)
    if channels == 3:
        im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
    else:
        im = cv2.cvtColor(im, cv2.COLOR_RGBA2BGR)

else:
    im = cv2.imread(opt.file, cv2.IMREAD_COLOR)
    height, width, channels = im.shape

print('image W:%d H:%d' % (width, height))

network_model = 'COCO-InstanceSegmentation/' + opt.model + '.yaml'

cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file(network_model))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(network_model)
predictor = DefaultPredictor(cfg)

fps_time = time.perf_counter()
outputs = predictor(im)
fps = 1.0 / (time.perf_counter() - fps_time)
print('===== pred_boxes =====')
print(outputs["instances"].pred_boxes)

print('===== scores =====')
print(outputs["instances"].scores)

print('===== pred_classes =====')
Example #7
0
from detectron2.utils.visualizer import Visualizer
from detectron2.checkpoint import DetectionCheckpointer
import cv2
import random
import torch

from kitti_mots_dataset import get_kiti_mots_dicts, register_kitti_mots_dataset

# Task predict from pretrained model (uses COCO classes)
if __name__ == '__main__':

    # cfg_file = "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"
    cfg_file = "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(cfg_file))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(cfg_file)
    model = build_model(cfg)
    DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)

    register_kitti_mots_dataset("datasets/KITTI-MOTS/training/image_02",
                                "datasets/KITTI-MOTS/instances_txt",
                                ("kitti_mots_train", "kitti_mots_test"),
                                image_extension="png")

    cfg.DATASETS.TRAIN = ("kitti_mots_train", )
    cfg.DATASETS.TEST = ("kitti_mots_test", )

    evaluator = COCOEvaluator("kitti_mots_test",
                              cfg,
    lambda: get_pedestrain_dict(train_people_imagelist, train_people_jsonlist))
MetadataCatalog.get("pedestrain_train").set(thing_classes=["person"])
DatasetCatalog.register(
    "pedestrain_test",
    lambda: get_pedestrain_dict(test_people_imagelist, test_people_jsonlist))
MetadataCatalog.get("pedestrain_test").set(thing_classes=["person"])

pedestrain_metadata = MetadataCatalog.get("pedestrain_train")

# Traning

from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("Base-RCNN-FPN.yaml"))
cfg.DATASETS.TRAIN = ("pedestrain_train", )
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 3
#cfg.MODEL.WEIGHTS = "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl"  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = num_of_iter  # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512  # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.OUTPUT_DIR = out_path

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
trainer.train()
models = ["Cityscapes/mask_rcnn_R_50_FPN.yaml"]

for to_evaluate in models:
    print('* ' * 30, to_evaluate, '* ' * 30)

    # Train
    lr = 0.0025
    EXPERIMENT_NAME = f"{to_evaluate[:-5]}_trained"
    OUTPUT_DIR = f"/home/group00/working/week4/model_evaluation/{EXPERIMENT_NAME}"

    print('Loading pre-trained models...')
    cfg = get_cfg()

    #Select model
    # cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/{to_evaluate}"))
    cfg.merge_from_file(model_zoo.get_config_file(f"{to_evaluate}"))
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5  # set threshold for this model
    # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"COCO-InstanceSegmentation/{to_evaluate}")
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(f"{to_evaluate}")

    #configure parameters
    cfg.INPUT.MASK_FORMAT = 'bitmask'
    cfg.OUTPUT_DIR = OUTPUT_DIR
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    cfg.DATASETS.TRAIN = ("train_kitti-mots", )
    cfg.DATASETS.TEST = ("val_kitti-mots", )
    cfg.DATALOADER.NUM_WORKERS = 1
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = lr
    cfg.SOLVER.MAX_ITER = 1500
Example #10
0
def task_b_MOTS_and_KITTI_training(model_name, model_file):
    # model_name = model_name + '_inference'
    print('Running task B for model', model_name)

    SAVE_PATH = os.path.join('./results_week_5_task_c', model_name)
    os.makedirs(SAVE_PATH, exist_ok=True)

    # Load model and configuration
    print('Loading Model')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('MOTS_KITTI_train', )
    cfg.DATASETS.TEST = ('KITTIMOTS_val', )
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR"
    #hyperparameters
    #cfg.SOLVER.LR_POLICY = 'steps_with_decay'
    #cfg.SOLVER.STEPS = [0, 1000, 2000]
    #cfg.SOLVER.GAMMA = 0.1
    #cfg.DATASETS.TRAIN.USE_FLIPPED = True #Eeste no va
    #cfg.MODEL.RPN.IOU_THRESHOLDS = [0.1, 0.9] #defatults 0.3 and 0.7
    #cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]#default: [[32, 64, 128, 256, 512]]
    #cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
    #End of hyperparameters playing
    cfg.SOLVER.MAX_ITER = 1000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    cfg.TEST.SCORE_THRESH = 0.5
    print(cfg)
    # Training
    print('Training')
    trainer = DefaultTrainer(cfg)
    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluation
    print('Evaluating')
    evaluator = COCOEvaluator('KITTIMOTS_val',
                              cfg,
                              False,
                              output_dir=SAVE_PATH)
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    print('Plotting losses')
    plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH)

    # Qualitative results: visualize some results
    print('Getting qualitative results')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    inputs = kitti_val()
    # inputs = inputs[:20] + inputs[-20:]
    inputs = inputs[220:233] + inputs[1995:2100]
    for i, input in enumerate(inputs):
        file_name = input['file_name']
        print('Prediction on image ' + file_name)
        img = cv2.imread(file_name)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' +
                         str(i) + '.png'),
            v.get_image()[:, :, ::-1])
Example #11
0
def task_a_KITTI_training(model_name, model_file):
    #model_name = model_name + '_inference'
    print('Running task A for model', model_name)

    SAVE_PATH = os.path.join('./results_week_5_task_a', model_name)
    os.makedirs(SAVE_PATH, exist_ok=True)

    # Load model and configuration
    print('Loading Model')
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file))
    cfg.DATASETS.TRAIN = ('KITTIMOTS_train', )
    cfg.DATASETS.TEST = ('MOTS_train', )
    cfg.DATALOADER.NUM_WORKERS = 0
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    cfg.OUTPUT_DIR = SAVE_PATH
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file)
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 1000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3
    cfg.TEST.SCORE_THRESH = 0.5

    # Training
    print('Training')
    trainer = DefaultTrainer(cfg)
    val_loss = ValidationLoss(cfg)
    trainer.register_hooks([val_loss])
    trainer._hooks = trainer._hooks[:-2] + trainer._hooks[-2:][::-1]
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluation
    print('Evaluating')
    evaluator = COCOEvaluator('MOTS_train', cfg, False, output_dir=SAVE_PATH)
    trainer.model.load_state_dict(val_loss.weights)
    trainer.test(cfg, trainer.model, evaluators=[evaluator])
    print('Plotting losses')
    plot_validation_loss(cfg, cfg.SOLVER.MAX_ITER, model_name, SAVE_PATH)

    # Qualitative results: visualize some results
    print('Getting qualitative results')
    predictor = DefaultPredictor(cfg)
    predictor.model.load_state_dict(trainer.model.state_dict())
    inputs = mots_train()
    inputs = inputs[:20] + inputs[-20:]
    for i, input in enumerate(inputs):
        file_name = input['file_name']
        print('Prediction on image ' + file_name)
        img = cv2.imread(file_name)
        outputs = predictor(img)
        v = Visualizer(img[:, :, ::-1],
                       metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
                       scale=0.8,
                       instance_mode=ColorMode.IMAGE)
        v = v.draw_instance_predictions(outputs['instances'].to('cpu'))
        cv2.imwrite(
            os.path.join(SAVE_PATH, 'Inference_' + model_name + '_inf_' +
                         str(i) + '.png'),
            v.get_image()[:, :, ::-1])
import os, json, cv2, random
#from google.colab.patches import cv2_imshow
from cv2 import imshow
cv2_imshow = imshow
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
#cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_C4_3x.yaml"))

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml")
predictor = DefaultPredictor(cfg)

#v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
#out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#im2 = out.get_image()[:, :, ::-1]
#b,g,r = cv2.split(im2)
#image_rgb2 = cv2.merge([r,g,b])
#plt.figure()
#plt.imshow(image_rgb2)
    json_file=json_path,

    image_root=img_path,

)

MetadataCatalog.get("marker").thing_classes = ['bolt-roi']

box_metadata = MetadataCatalog.get("marker")
model_config_path = "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"
#model_config_path = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
###############################################
cfg = get_cfg()

cfg.merge_from_file(get_config_file(model_config_path))

cfg.DATALOADER.NUM_WORKERS = 2

cfg.MODEL.WEIGHTS = get_checkpoint_url(model_config_path)  # Let training initialize from model zoo

cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # one class (markedbolt=1)

cfg.INPUT.MIN_SIZE_TEST = 1024
cfg.INPUT.MAX_SIZE_TEST = 1024
#cfg.INPUT.MIN_SIZE_TEST = 0 #Size of the smallest side of the image during testing. Set to zero to disable resize in testing

#cfg.INPUT.MAX_SIZE_TEST = 1333  # Maximum size of the side of the image during testing by deafult 1333
# Size of the smallest side of the image during testing. Set to zero to disable resize in testing.
#cfg.INPUT.MIN_SIZE_TEST = 800
# Maximum size of the side of the image during testing
Example #14
0
def test(path_to_input, path_to_output, network):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/"+network+".yaml"))
    cfg.OUTPUT_DIR = "./code_workspace/output"
    cfg.MODEL.WEIGHTS = os.path.join(
            cfg.OUTPUT_DIR, 
            "model_0009999.pth"
            )
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01
    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST   = 0.2
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
    cfg.MODEL.MASK_ON = False
    cfg.TEST.EVAL_PERIOD = 5000

    #cfg.INPUT.MIN_SIZE_TEST = 0
    # cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.33, 0.5, 1.0, 2, 3]]
    # cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32], [48, 64], [96, 128], [192, 256], [512, 640]]

    predictor = DefaultPredictor(cfg)


    video_cap = cv2.VideoCapture(path_to_input)
    video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
    ret, i_frame = video_cap.read()
    video_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)


    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_out = cv2.VideoWriter(
            path_to_output, 
            fourcc, 
            30.0, 
            (int(i_frame.shape[1]), int(i_frame.shape[0])),
            True
            )
    total_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT)
    
    i=0
    
    #timing
    t0 = time.time()

    #try to not do saves in between iterations, instead save all in list, then do all saving at the end
    out_list = []
    frames = []
    predict_time = 0

    t2 = time.time()
    while True:
        t3 = time.time()
        ret, frame = video_cap.read()
        if (not ret):
            break
        #Produce some nice console output to show progess
        progress = "\r %progress: " + str(int((i/total_frames)*100)) + "    " + "fps: " + str(int(i/(t3-t0))) 
        i+=1
        sys.stdout.write(progress)
        sys.stdout.flush()
        
        t4 = time.time()
        outputs = predictor(frame)
        t5 = time.time()
        predict_time += t5-t4
        out_list.append(outputs["instances"].to("cpu"))
        frames.append(frame)
    t22 = time.time()
    inference_time = t22-t2
    print()
    print("Inference complete, creating video") 
    t10 = time.time()
    for output, frame in zip(out_list, frames):
        v = Visualizer(
                frame,
                MetadataCatalog.get("traffic"),
                scale=1,
                instance_mode=ColorMode.SEGMENTATION)
        #output.remove("scores")
        v = v.draw_instance_predictions(output)
        video_out.write(v.get_image())
    t11 = time.time()
    print("Time to create video: ", t11-t10)

    #timing
    t1 = time.time()
    print("average fps: ", total_frames/inference_time)
    print("total time: ", t1-t0)
    print("%total predict: ", predict_time/(t1-t0))
    print("Video produced on path: ", path_to_output)
    video_out.release()
    video_cap.release()
def _config_training(args: argparse.Namespace) -> CfgNode:
    r"""Create a configuration node from the script arguments.

    In this application we consider object detection use case only. We finetune object detection
    networks trained on COCO dataset to a custom use case

    Parameters
    ----------
    args : argparse.Namespace
        training script arguments, see :py:meth:`_parse_args()`

    Returns
    -------
    CfgNode
        configuration that is used by Detectron2 to train a model

    Raises:
        RuntimeError: if the combination of `model_type`, `backbone`, `lr_schedule` is not valid.
            Please refer to Detectron2 model zoo for valid options.
    """
    cfg = get_cfg()
    pretrained_model = (
        f"COCO-Detection/{args.model_type}_{args.backbone}_{args.lr_schedule}x.yaml"
    )
    LOGGER.info(f"Loooking for the pretrained model {pretrained_model}...")
    try:
        cfg.merge_from_file(model_zoo.get_config_file(pretrained_model))
    except RuntimeError as err:
        LOGGER.error(f"{err}: check model backbone and lr schedule combination")
        raise
    cfg.DATASETS.TRAIN = (f"{args.dataset_name}_training",)
    cfg.DATASETS.TEST = (f"{args.dataset_name}_validation",)
    cfg.DATALOADER.NUM_WORKERS = args.num_workers
    # Let training initialize from model zoo
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(pretrained_model)
    LOGGER.info(f"{pretrained_model} correctly loaded")

    cfg.SOLVER.CHECKPOINT_PERIOD = 20000
    cfg.SOLVER.BASE_LR = args.lr
    cfg.SOLVER.MAX_ITER = args.num_iter
    cfg.SOLVER.IMS_PER_BATCH = args.batch_size
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = args.num_rpn
    if args.model_type == "faster_rcnn":
        cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(args.classes)
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.pred_thr
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = args.nms_thr
        cfg.MODEL.RPN.BBOX_REG_LOSS_TYPE = args.reg_loss_type
        cfg.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = args.bbox_reg_loss_weight
        cfg.MODEL.RPN.POSITIVE_FRACTION = args.bbox_rpn_pos_fraction
        cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = args.bbox_head_pos_fraction
    elif args.model_type == "retinanet":
        cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.pred_thr
        cfg.MODEL.RETINANET.NMS_THRESH_TEST = args.nms_thr
        cfg.MODEL.RETINANET.NUM_CLASSES = len(args.classes)
        cfg.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = args.reg_loss_type
        cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = args.focal_loss_gamma
        cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = args.focal_loss_alpha
    else:
        assert False, f"Add implementation for model {args.model_type}"
    cfg.MODEL.DEVICE = "cuda" if args.num_gpus else "cpu"

    cfg.TEST.DETECTIONS_PER_IMAGE = args.det_per_img

    cfg.OUTPUT_DIR = args.model_dir
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    return cfg
    train_annos = join(data_dir, 'grini_nc_merged_no_masks_train.json')
    val_annos = join(data_dir, 'grini_nc_merged_no_masks_val.json')
    test_annos = join(data_dir, 'grini_nc_merged_no_masks_test.json')

    # Register dataset configs
    register_coco_instances('grini_nc_merged_bbox_only_train', {}, train_annos,
                            train_imgs)
    register_coco_instances('grini_nc_merged_bbox_only_val', {}, val_annos,
                            val_imgs)
    register_coco_instances('grini_nc_merged_bbox_only_test', {}, test_annos,
                            test_imgs)


cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml'))

register_datasets()
cfg.DATASETS.TRAIN = ('grini_nc_merged_bbox_only_train', )
cfg.DATASETS.TEST = ('grini_nc_merged_bbox_only_val', )

cfg.MODEL.WEIGHTS = get_checkpoint_url(
    'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml')
cfg.MODEL.DEVICE = "cpu"  # cpu or cuda
cfg.MASK_ON = False

# todo find out how rescale images and annotations first...
# Parameters fixed
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
Example #17
0
def main():
    """ Mask RCNN Object Detection with Detectron2 """
    rospy.init_node("mask_rcnn", anonymous=True)
    bridge = CvBridge()
    start_time = time.time()
    image_counter = 0

    register_coco_instances(
        "train_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/train")
    register_coco_instances(
        "test_set", {},
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test/annotations.json",
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/test")

    train_metadata = MetadataCatalog.get("train_set")
    print(train_metadata)
    dataset_dicts_train = DatasetCatalog.get("train_set")

    test_metadata = MetadataCatalog.get("test_set")
    print(test_metadata)
    dataset_dicts_test = DatasetCatalog.get("test_set")

    cfg = get_cfg()
    cfg.merge_from_file(
        model_zoo.get_config_file(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_set")
    cfg.DATASETS.TEST = ()  # no metrics implemented for this dataset
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    )  # initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.01
    cfg.SOLVER.MAX_ITER = 1000  # 300 iterations seems good enough, but you can certainly train longer
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (
        128)  # faster, and good enough for this toy dataset
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # 5 classes (Plate, Carrot, Celery, Pretzel, Gripper)

    # Temporary Solution. If I train again I think I can use the dynamically set path again
    cfg.MODEL.WEIGHTS = os.path.join(
        cfg.OUTPUT_DIR,
        "/home/labuser/ros_ws/src/odhe_ros/arm_camera_dataset/output/model_final.pth"
    )
    # cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4  # set the testing threshold for this model
    cfg.DATASETS.TEST = ("test_set")
    predictor = DefaultPredictor(cfg)

    class_names = MetadataCatalog.get("train_set").thing_classes

    # Set up custom cv2 visualization parameters
    # Classes: [name, id]
    #               -
    #          [Plate,   0]
    #          [Carrot,  1]
    #          [Celery,  2]
    #          [Pretzel, 3]
    #          [Gripper, 4]

    # Colors = [blue, green, red]
    color_plate = [0, 255, 0]  # green
    color_carrot = [255, 200, 0]  # blue
    color_celery = [0, 0, 255]  # red
    color_pretzel = [0, 220, 255]  # yellow
    color_gripper = [204, 0, 150]  # purple
    colors = list([
        color_plate, color_carrot, color_celery, color_pretzel, color_gripper
    ])

    alpha = .4

    run = maskRCNN()
    while not rospy.is_shutdown():
        # Get images
        img = run.get_img()

        if img is not None:
            outputs = predictor(img)
            predictions = outputs["instances"].to("cpu")

            # Get results
            unsorted = run.getResult(predictions, class_names)

            # Sort detections by x and y
            sorted = run.sort_detections(unsorted)

            result = Result()
            for i in range(len(sorted)):
                result.class_ids.append(sorted[i][0])
                result.class_names.append(sorted[i][1])
                result.scores.append(sorted[i][2])
                result.boxes.append(sorted[i][3])
                result.masks.append(sorted[i][4])

            # Visualize using detectron2 built in visualizer
            # v = Visualizer(im[:, :, ::-1],
            #             metadata=train_metadata,
            #             scale=1.0
            #             # instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels
            # )
            # v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            # im = v.get_image()[:, :, ::-1]
            # im_msg = bridge.cv2_to_imgmsg(im, encoding="bgr8")

            # Visualize using custom cv2 code
            if result is not None:
                result_cls = result.class_names
                result_clsId = result.class_ids
                result_scores = result.scores
                result_masks = result.masks

                # Create copies of the original image
                im = img.copy()
                output = img.copy()

                # Initialize lists
                masks = []
                masks_indices = []
                for i in range(len(result_clsId)):
                    # Obtain current object mask as a numpy array (black and white mask of single object)
                    current_mask = bridge.imgmsg_to_cv2(result_masks[i])

                    # Find current mask indices
                    mask_indices = np.where(current_mask == 255)

                    # Add to mask indices list
                    if len(masks_indices) > len(result_clsId):
                        masks_indices = []
                    else:
                        masks_indices.append(mask_indices)

                    # Add to mask list
                    if len(masks) > len(result_clsId):
                        masks = []
                    else:
                        masks.append(current_mask)

                if len(masks) > 0:
                    # Create composite mask
                    composite_mask = sum(masks)

                    # Clip composite mask between 0 and 255
                    composite_mask = composite_mask.clip(0, 255)

                for i in range(len(result_clsId)):
                    # Select correct object color
                    color = colors[result_clsId[i]]

                    # Change the color of the current mask object
                    im[masks_indices[i][0], masks_indices[i][1], :] = color

                # Apply alpha scaling to image to adjust opacity
                cv2.addWeighted(im, alpha, output, 1 - alpha, 0, output)

                for i in range(len(result_clsId)):
                    # Draw Bounding boxes
                    start_point = (result.boxes[i].x_offset,
                                   result.boxes[i].y_offset)
                    end_point = (result.boxes[i].x_offset +
                                 result.boxes[i].width,
                                 result.boxes[i].y_offset +
                                 result.boxes[i].height)
                    start_point2 = (result.boxes[i].x_offset + 2,
                                    result.boxes[i].y_offset + 2)
                    end_point2 = (result.boxes[i].x_offset +
                                  result.boxes[i].width - 2,
                                  result.boxes[i].y_offset + 12)
                    color = colors[result_clsId[i]]
                    box_thickness = 1

                    name = result_cls[i]
                    score = result_scores[i]
                    conf = round(score.item() * 100, 1)
                    string = str(name) + ":" + str(conf) + "%"
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    org = (result.boxes[i].x_offset + 2,
                           result.boxes[i].y_offset + 10)
                    fontScale = .3
                    text_thickness = 1
                    output = cv2.rectangle(output, start_point, end_point,
                                           color, box_thickness)
                    output = cv2.rectangle(output, start_point2, end_point2,
                                           color, -1)  # Text box
                    output = cv2.putText(output, string, org, font, fontScale,
                                         [0, 0, 0], text_thickness,
                                         cv2.LINE_AA, False)

                im_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
                im_msg = bridge.cv2_to_imgmsg(im_rgb, encoding="rgb8")

                ##### The entire goal of the below code is to get N random points on the mask in 3D
                ##### and publish on cloud samples topic for GPD
                item_ids = result_clsId
                idx = [i for i, e in enumerate(item_ids) if e > 0 and e < 4]
                numFoodItems = len(idx)

                mask = bridge.imgmsg_to_cv2(result_masks[idx[0]])
                coord = cv2.findNonZero(
                    mask)  # Coordinates of the mask that are on the food item

                # Pick 3 random points on the object mask
                sample_list = list()
                for ii in range(3):
                    point = Point()
                    x = random.choice(
                        coord[:, 0, 1])  # x and y reversed for some reason
                    y = random.choice(
                        coord[:, 0, 0])  # x and y reversed for some reason
                    depth = (run.depth_array[y, x]) / 1000
                    # Deproject pixels and depth to 3D coordinates (camera frame)
                    X, Y, Z = run.convert_depth_to_phys_coord_using_realsense(
                        y, x, depth, run.cam_info)
                    # print("(x,y,z) to convert: ("+str(y)+", "+str(x)+", "+str(depth)+")")
                    # print("(X,Y,Z) converted: ("+str(X)+", "+str(Y)+", "+str(Z)+")")
                    point.x = X
                    point.y = Y
                    point.z = Z
                    sample_list.append(point)

                # print(sample_list)

                cam_source = Int64()
                cam_source.data = 0

                cloud_source = CloudSources()
                cloud_source.cloud = run.pointCloud
                cloud_source.camera_source = [cam_source]
                view_point = Point()
                view_point.x = 0.640
                view_point.y = 0.828
                view_point.z = 0.505
                # view_point.x = 0; view_point.y = 0; view_point.z = 0
                cloud_source.view_points = [view_point]

                cloud_samples = CloudSamples()
                cloud_samples.cloud_sources = cloud_source
                cloud_samples.samples = sample_list

                # Print publish info
                # print(type(cloud_source.cloud))
                # print(cloud_source.camera_source)
                # print(cloud_source.view_points)
                # print("")
                # print(type(cloud_samples.cloud_sources))
                # print(cloud_samples.samples)
                # print("-------------------------\n")

            # Display Image Counter
            # image_counter = image_counter + 1
            # if (image_counter % 11) == 10:
            #     rospy.loginfo("Images detected per second=%.2f", float(image_counter) / (time.time() - start_time))

            run.publish(im_msg, result, cloud_samples)

    return 0
Example #18
0
def task12_B():
    lr = 0.0025
    batch_size = 256
    n_iter = 300
    EXPERIMENT_NAME = 'K1_' + str(lr) + 'lr_' + str(
        batch_size) + 'bsize_' + str(n_iter) + 'iter'

    def get_aicity_dataset(frame_idx_list):
        path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml'
        video_path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/vdo.avi'

        reader = ReadData(path)
        gt, num_iter = reader.getGTfromXML()

        sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt, 0, 2141)
        gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

        dataset_dicts = []
        directory = '/home/group09/code/week6/datasets/AICity_data/AICity_frames'
        for frame_idx in tqdm(frame_idx_list):
            filename = str(frame_idx).zfill(4) + '.png'
            record = {}
            im_path = os.path.join(directory, filename)
            im = cv2.imread(im_path)
            print(filename)
            height, width = im.shape[:2]

            record["file_name"] = im_path
            record["image_id"] = str(frame_idx).zfill(4)
            record["height"] = height
            record["width"] = width

            classes = ['Car']

            objs = []
            for [
                    x1, y1, x2, y2
            ] in gtInfo[frame_idx]['bbox']:  # for every bbox in a frame's gt
                class_id = 0
                obj = {
                    "type": 'Car',
                    "bbox": [x1, y1, x2, y2],
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "category_id": 0
                }

                objs.append(obj)

            record["annotations"] = objs
            dataset_dicts.append(record)

        return dataset_dicts

    # K-FOLD SPLITS
    k_train = 0  # take 0th k-fold for train
    k_step = int(np.floor(2141 * 0.25))
    frame_idx = [i for i in range(2141)]
    ini_frame_train = k_step * k_train

    # train dataset
    train_frame_idx = frame_idx[ini_frame_train:(ini_frame_train + k_step)]
    print("==== TRAIN SPLIT")
    print("")
    for d in ['train']:
        DatasetCatalog.register(
            'train_retina', lambda d=d: get_aicity_dataset(train_frame_idx))
        MetadataCatalog.get('train_retina').set(thing_classes=['Car'])

    # val dataset
    val_frame_idx = [x for x in frame_idx if x not in train_frame_idx]
    print("==== VALIDATION SPLIT")
    print("")
    for d in ['val']:
        DatasetCatalog.register('val_retina',
                                lambda d=d: get_aicity_dataset(val_frame_idx))
        MetadataCatalog.get('val_retina').set(thing_classes=['Car'])

    train_metadata = MetadataCatalog.get("train_retina")
    dataset_dicts = get_aicity_dataset(train_frame_idx)

    OUTPUT_DIR = '/home/group09/code/week6/models_retina/' + EXPERIMENT_NAME

    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    cfg = get_cfg()
    cfg.OUTPUT_DIR = OUTPUT_DIR
    cfg.merge_from_file(
        model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_retina", )
    cfg.DATASETS.TEST = ("val_retina", )
    # cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        "COCO-Detection/retinanet_R_50_FPN_3x.yaml"
    )  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = lr  # pick a good LR
    cfg.SOLVER.MAX_ITER = n_iter  # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []  # do not decay learning rate
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size  # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (car)

    cfg.TEST.EVAL_PERIOD = 100

    class MyTrainer(DefaultTrainer):
        @classmethod
        def build_evaluator(cls, cfg, dataset_name, output_folder=None):
            if output_folder is None:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
            return COCOEvaluator(dataset_name, cfg, True, output_folder)

        def build_hooks(self):
            hooks = super().build_hooks()
            hooks.insert(
                -1,
                LossEvalHook(
                    cfg.TEST.EVAL_PERIOD, self.model,
                    build_detection_test_loader(self.cfg,
                                                self.cfg.DATASETS.TEST[0],
                                                DatasetMapper(self.cfg,
                                                              True))))
            return hooks

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = MyTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    sys.exit(0)

im = cv2.imread(sys.argv[1])

if im is None:
    print("file open fail")
    sys.exit(0)

#cv2.imshow('image',im)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(
    model_zoo.get_config_file(
        "../detectron2/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    ))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.merge_from_list(['MODEL.DEVICE', 'cpu'])
# Find a model from detectron2's model zoo. You can either use the https://dl.fbaipublicfiles.... url, or use the detectron2:// shorthand
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
predictor = DefaultPredictor(cfg)
outputs = predictor(im)
size = outputs["instances"].scores.shape[0]

poslist = []
boxes = outputs["instances"].pred_boxes.tensor
pred = outputs['instances'].pred_classes
masks = outputs["instances"].pred_masks

for i in range(size):
                mask_encode = dets_frame_dict['pred_masks'][objid]
                det_str = "%d %s %.4f %.2f %.2f %.2f %.2f %d %d %s\n" % \
                          (frameid, CITYSCAPES_THINGS[dets_frame_dict['pred_classes'][objid]],
                           dets_frame_dict['scores'][objid],
                           dets_frame_dict['pred_boxes'][objid][0], dets_frame_dict['pred_boxes'][objid][1],
                           dets_frame_dict['pred_boxes'][objid][2], dets_frame_dict['pred_boxes'][objid][3],
                           mask_encode['size'][0], mask_encode['size'][1], mask_encode['counts'].decode('UTF-8'))
                f.write(det_str)


if __name__ == '__main__':

    cfg = get_cfg()
    # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
    cfg.merge_from_file(
        model_zoo.get_config_file("Cityscapes/mask_rcnn_R_50_FPN.yaml"))
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
    # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
    # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("Cityscapes/mask_rcnn_R_50_FPN.yaml")
    cfg.MODEL.WEIGHTS = 'model_final_af9cf5.pkl'
    predictor = DefaultPredictor(cfg)

    # seq_names = sorted(os.listdir(cruw_data_root))
    seq_names = ['12', '13']
    for seq in seq_names:
        output_dict = {
            'IMAGES_0': [],
            'IMAGES_1': [],
        }
        seq_path = os.path.join(cruw_data_root, seq)
                    help="Root directory to store the outputs.")
parser.add_argument("--dataset_root",
                    type=str,
                    required=True,
                    help="Root directory of the dataset")
args = parser.parse_args()

# ------------------------ MODEL SELECTION AND CONFIGURATION ----------------------------------

# MODEL_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"
MODEL_PATH = args.model
MODEL = MODEL_PATH.split('/')[1].split('.')[0]

cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file(MODEL_PATH))

# Does not affect output bbox coordinates
cfg.INPUT.MAX_SIZE_TRAIN = 1333

# Setting to lower than 0.7 because using very low objectness
cfg.MODEL.RPN.NMS_THRESH = 0.5

# Objectness threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.01  # set threshold for this model
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5  # Setting to lower than 0.7 because using very low objectness

# Setting to 500 for safety
cfg.TEST.DETECTIONS_PER_IMAGE = 150

# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
def demo(cfg):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    model.eval()
    misc.log_model_info(model)

   # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2= "caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )

    if cfg.DETECTION.ENABLE:
        # Load object detector from detectron2
        dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
        dtron2_cfg = get_cfg()
        dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file))
        dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
        dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS
        object_predictor = DefaultPredictor(dtron2_cfg)
        # Load the labels of AVA dataset
        with open(cfg.DEMO.LABEL_FILE_PATH) as f:
            labels = f.read().split('\n')[:-1]
        palette = np.random.randint(64, 128, (len(labels), 3)).tolist()
        boxes = []
    else:
        # Load the labels of Kinectics-400 dataset
        labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH)
        labels = labels_df['name'].values

    frame_provider = VideoReader(cfg)
    seq_len = cfg.DATA.NUM_FRAMES*cfg.DATA.SAMPLING_RATE
    frames = []
    pred_labels = []
    s = 0.
    videoOut = cv2.VideoWriter(cfg.DEMO.OUTPUT_FILE, cv2.VideoWriter_fourcc(*'MP4V'), 30.0, (cfg.DEMO.DISPLAY_WIDTH,cfg.DEMO.DISPLAY_HEIGHT))
    for able_to_read, frame in frame_provider:
        if not able_to_read:
            # when reaches the end frame, clear the buffer and continue to the next one.
            frames = []
            continue

        if len(frames) != seq_len:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed)
            frames.append(frame_processed)
            if cfg.DETECTION.ENABLE and len(frames) == seq_len//2 - 1:
                mid_frame = frame
            
        if len(frames) == seq_len:
            start = time()
            if cfg.DETECTION.ENABLE:
                outputs = object_predictor(mid_frame)
                fields = outputs["instances"]._fields
                pred_classes = fields["pred_classes"]
                selection_mask = pred_classes == 0
                # acquire person boxes
                pred_classes = pred_classes[selection_mask]
                pred_boxes = fields["pred_boxes"].tensor[selection_mask]
                scores = fields["scores"][selection_mask]
                boxes = cv2_transform.scale_boxes(cfg.DATA.TEST_CROP_SIZE,
                                                    pred_boxes,
                                                    frame_provider.display_height,
                                                    frame_provider.display_width)
                boxes = torch.cat(
                    [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1
                )

            inputs = torch.as_tensor(frames).float()
            inputs = inputs / 255.0
            # Perform color normalization.
            inputs = inputs - torch.tensor(cfg.DATA.MEAN)
            inputs = inputs / torch.tensor(cfg.DATA.STD)
            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)

            # 1 C T H W.
            inputs = inputs.unsqueeze(0)

            # Sample frames for the fast pathway.
            index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long()
            fast_pathway = torch.index_select(inputs, 2, index)
            # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape))

            # Sample frames for the slow pathway.
            index = torch.linspace(0, fast_pathway.shape[2] - 1, 
                                    fast_pathway.shape[2]//cfg.SLOWFAST.ALPHA).long()
            slow_pathway = torch.index_select(fast_pathway, 2, index)
            # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape))
            inputs = [slow_pathway, fast_pathway]

            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)

            # Perform the forward pass.
            if cfg.DETECTION.ENABLE:
                # When there is nothing in the scene, 
                #   use a dummy variable to disable all computations below.
                if not len(boxes):
                    preds = torch.tensor([])
                else:
                    preds = model(inputs, boxes)
            else:
                preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]
                
            if cfg.DETECTION.ENABLE:
                # This post processing was intendedly assigned to the cpu since my laptop GPU
                #   RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend
                #   to change this section to make CUDA does the processing.
                preds = preds.cpu().detach().numpy()
                pred_masks = preds > .1
                label_ids = [np.nonzero(pred_mask)[0] for pred_mask in pred_masks]
                pred_labels = [
                    [labels[label_id] for label_id in perbox_label_ids]
                    for perbox_label_ids in label_ids
                ]
                # I'm unsure how to detectron2 rescales boxes to image original size, so I use
                #   input boxes of slowfast and rescale back it instead, it's safer and even if boxes
                #   was not rescaled by cv2_transform.rescale_boxes, it still works.
                boxes = boxes.cpu().detach().numpy()
                ratio = np.min(
                    [frame_provider.display_height, frame_provider.display_width]
                ) / cfg.DATA.TEST_CROP_SIZE
                boxes = boxes[:, 1:] * ratio
            else:
                ## Option 1: single label inference selected from the highest probability entry.
                # label_id = preds.argmax(-1).cpu()
                # pred_label = labels[label_id]
                # Option 2: multi-label inferencing selected from probability entries > threshold
                label_ids = torch.nonzero(preds.squeeze() > .1).reshape(-1).cpu().detach().numpy()
                pred_labels = labels[label_ids]
                logger.info(pred_labels)
                if not list(pred_labels):
                    pred_labels = ['Unknown']

            # # option 1: remove the oldest frame in the buffer to make place for the new one.
            # frames.pop(0)
            # option 2: empty the buffer
            frames = []
            s = time() - start
        
        if cfg.DETECTION.ENABLE and pred_labels and boxes.any():
            for box, box_labels in zip(boxes.astype(int), pred_labels):
                cv2.rectangle(frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2)
                label_origin = box[:2]
                for label in box_labels:
                    label_origin[-1] -= 5
                    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2)
                    cv2.rectangle(
                        frame, 
                        (label_origin[0], label_origin[1] + 5), 
                        (label_origin[0] + label_width, label_origin[1] - label_height - 5),
                        palette[labels.index(label)], -1
                    )
                    cv2.putText(
                        frame, label, tuple(label_origin), 
                        cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
                    )
                    label_origin[-1] -= label_height + 5
        if not cfg.DETECTION.ENABLE:
            # Display predicted labels to frame.
            y_offset = 50
            cv2.putText(frame, 'Action:', (10, y_offset), 
                                fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                fontScale=.65, color=(0, 235, 0), thickness=2)        
            for pred_label in pred_labels:
                y_offset += 30
                cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), 
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=.65, color=(0, 235, 0), thickness=2)

        # Display prediction speed
        cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (10, 25), 
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=.65, color=(0, 235, 0), thickness=2)
        # Display the frame
        cv2.imshow('SlowFast', frame)   
        videoOut.write(frame)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break
 
    frame_provider.clean()
    videoOut.release()
Example #23
0
        dataset_dicts.append(record)
    return dataset_dicts


DatasetCatalog.register("aicity_train", partial(get_datasect_dicts, 1080,
                                                1620))
MetadataCatalog.get("aicity_train").set(thing_classes=["car"])
aicity_metadata = MetadataCatalog.get("aicity_train")

dataset_dicts = get_datasect_dicts(0, 540)

from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("aicity_train", )
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-Detection/retinanet_R_50_FPN_3x.yaml"
)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001  # pick a good LR

cfg.SOLVER.WARMUP_ITERS = 300
cfg.SOLVER.MAX_ITER = 600
cfg.SOLVER.STEPS = (350, 500)  #decay learning rate
cfg.SOLVER.GAMMA = 0.1

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # faster, and good enough for this toy dataset (default: 512)
    def __init__(self):
        self.visualize = False
        self.verbose = False
        self.save_imgs = True

        self.plot_loss = True
        # st()

        # these are all map names
        a = np.arange(1, 30)
        b = np.arange(201, 231)
        c = np.arange(301, 331)
        d = np.arange(401, 431)
        abcd = np.hstack((a, b, c, d))
        mapnames = []
        for i in list(abcd):
            mapname = 'FloorPlan' + str(i)
            mapnames.append(mapname)

        train_len = int(0.9 * len(mapnames))

        np.random.seed(1)
        random.shuffle(mapnames)
        self.mapnames_train = mapnames[:train_len]
        self.mapnames_val = mapnames[train_len:]
        # self.num_episodes = len(self.mapnames)

        self.ignore_classes = []
        # classes to save
        self.include_classes = [
            'ShowerDoor', 'Cabinet', 'CounterTop', 'Sink', 'Towel',
            'HandTowel', 'TowelHolder', 'SoapBar', 'ToiletPaper',
            'ToiletPaperHanger', 'HandTowelHolder', 'SoapBottle', 'GarbageCan',
            'Candle', 'ScrubBrush', 'Plunger', 'SinkBasin', 'Cloth',
            'SprayBottle', 'Toilet', 'Faucet', 'ShowerHead', 'Box', 'Bed',
            'Book', 'DeskLamp', 'BasketBall', 'Pen', 'Pillow', 'Pencil',
            'CellPhone', 'KeyChain', 'Painting', 'CreditCard', 'AlarmClock',
            'CD', 'Laptop', 'Drawer', 'SideTable', 'Chair', 'Blinds', 'Desk',
            'Curtains', 'Dresser', 'Watch', 'Television', 'WateringCan',
            'Newspaper', 'FloorLamp', 'RemoteControl', 'HousePlant', 'Statue',
            'Ottoman', 'ArmChair', 'Sofa', 'DogBed', 'BaseballBat',
            'TennisRacket', 'VacuumCleaner', 'Mug', 'ShelvingUnit', 'Shelf',
            'StoveBurner', 'Apple', 'Lettuce', 'Bottle', 'Egg', 'Microwave',
            'CoffeeMachine', 'Fork', 'Fridge', 'WineBottle', 'Spatula',
            'Bread', 'Tomato', 'Pan', 'Cup', 'Pot', 'SaltShaker', 'Potato',
            'PepperShaker', 'ButterKnife', 'StoveKnob', 'Toaster',
            'DishSponge', 'Spoon', 'Plate', 'Knife', 'DiningTable', 'Bowl',
            'LaundryHamper', 'Vase', 'Stool', 'CoffeeTable', 'Poster',
            'Bathtub', 'TissueBox', 'Footstool', 'BathtubBasin',
            'ShowerCurtain', 'TVStand', 'Boots', 'RoomDecor', 'PaperTowelRoll',
            'Ladle', 'Kettle', 'Safe', 'GarbageBag', 'TeddyBear',
            'TableTopDecor', 'Dumbbell', 'Desktop', 'AluminumFoil', 'Window'
        ]

        self.action_space = {
            0: "MoveLeft",
            1: "MoveRight",
            2: "MoveAhead",
            3: "MoveBack"
        }
        self.num_actions = len(self.action_space)

        cfg_det = get_cfg()
        cfg_det.merge_from_file(
            model_zoo.get_config_file(
                "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        cfg_det.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2  # set threshold for this model
        cfg_det.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
            "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
        cfg_det.MODEL.DEVICE = 'cpu'
        self.cfg_det = cfg_det
        self.maskrcnn = DefaultPredictor(cfg_det)

        self.conf_thresh_detect = 0.7  # for initially detecting a low confident object
        self.conf_thresh_init = 0.8  # for after turning head toward object threshold
        self.conf_thresh_end = 0.9  # if reach this then stop getting obs

        self.BATCH_SIZE = 12
        self.percentile = 70
        self.max_iters = 100000
        self.max_frames = 10
        self.val_interval = 15
        self.save_interval = 50

        self.BATCH_SIZE = 1
        self.percentile = 70
        self.max_iters = 100000
        self.max_frames = 1
        self.val_interval = 1
        self.save_interval = 1

        self.small_classes = []
        self.rot_interval = 5.0
        self.radius_max = 3.5  #3 #1.75
        self.radius_min = 1.0  #1.25
        self.num_flat_views = 3
        self.num_any_views = 7
        self.num_views = 25
        self.center_from_mask = False  # get object centroid from maskrcnn (True) or gt (False)

        self.obj_per_scene = 5

        # self.origin_quaternion = np.quaternion(1, 0, 0, 0)
        # self.origin_rot_vector = quaternion.as_rotation_vector(self.origin_quaternion)

        # self.homepath = f'/home/nel/gsarch/aithor/data/test2'
        self.homepath = '/home/sirdome/katefgroup/gsarch/ithor/data/test'
        if not os.path.exists(self.homepath):
            os.mkdir(self.homepath)
        else:
            val = input("Delete homepath? [y/n]: ")
            if val == 'y':
                import shutil
                shutil.rmtree(self.homepath)
                os.mkdir(self.homepath)
            else:
                print("ENDING")
                assert (False)

        self.log_freq = 1
        self.log_dir = self.homepath + '/..' + '/log_cem' + '/aa'
        if not os.path.exists(self.log_dir):
            os.mkdir(self.log_dir)
        MAX_QUEUE = 10  # flushes when this amount waiting
        self.writer = SummaryWriter(self.log_dir,
                                    max_queue=MAX_QUEUE,
                                    flush_secs=60)

        self.W = 256
        self.H = 256

        # self.fov = 90
        # hfov = float(self.fov) * np.pi / 180.
        # self.pix_T_camX = np.array([
        #     [(self.W/2.)*1 / np.tan(hfov / 2.), 0., 0., 0.],
        #     [0., (self.H/2.)*1 / np.tan(hfov / 2.), 0., 0.],
        #     [0., 0.,  1, 0],
        #     [0., 0., 0, 1]])
        # self.pix_T_camX[0,2] = self.W/2.
        # self.pix_T_camX[1,2] = self.H/2.

        self.fov = 90
        self.camera_matrix = self.get_camera_matrix(self.W, self.H, self.fov)
        self.K = self.get_habitat_pix_T_camX(self.fov)

        self.init_network()

        self.run_episodes()
Example #25
0
register_coco_instances("dataset_train0", {}, "train_patch_0.json", "/content/thre_whole_patches")
register_coco_instances("dataset_val0", {}, "val_patch_0.json", "/content/thre_whole_patches")

register_coco_instances("dataset_train1", {}, "train_patch_1.json", "/content/thre_whole_patches")
register_coco_instances("dataset_val1", {}, "val_patch_1.json", "/content/thre_whole_patches")

register_coco_instances("dataset_train2", {}, "train_patch_2.json", "/content/thre_whole_patches")
register_coco_instances("dataset_val2", {}, "val_patch_2.json", "/content/thre_whole_patches")

print("done")

"""### first"""

cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("dataset_val0",)
cfg.DATASETS.TEST = ("dataset_train0",)
cfg.DATALOADER.NUM_WORKERS = 1
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.00025
cfg.MODEL.RETINANET.NUM_CLASSES = 2

cfg.SOLVER.MAX_ITER = 1000 #adjust up if val mAP is still rising, adjust down if overfit
cfg.SOLVER.GAMMA = 0.05

cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
Example #26
0
                                                    image_root="./data/20210204_Digi_generated/images/")
DatasetCatalog.register("val", lambda: load_coco_json("./data/20210204_Digi_val.json", "./data/20210204_Digi_generated/valid_images/", "val"))
MetadataCatalog.get("val").set(thing_classes=["trash"],
                                                json_file="./data/20210204_Digi_val.json",
                                                image_root="./data/20210204_Digi_generated/valid_images/")





# # training
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
# cfg.merge_from_file(model_zoo.get_config_file("./detectron2/model_zoo/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.005  # pick a good LR
cfg.SOLVER.MAX_ITER = 6000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = (2000,4000)
cfg.SOLVER.MOMENTUM = 0.9
cfg.SOLVER.WEIGHT_DECAY = 0.0001

cfg.TEST.EVAL_PERIOD = 500
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
Example #27
0
  MetadataCatalog.get("faces_" + d).set(thing_classes=classes)

statement_metadata = MetadataCatalog.get("faces_train")

class CocoTrainer(DefaultTrainer):
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):
    if output_folder is None:
        os.makedirs("coco_eval", exist_ok=True)
        output_folder = "coco_eval"
    return COCOEvaluator(dataset_name, cfg, False, output_folder)

cfg = get_cfg()
cfg.merge_from_file(
  model_zoo.get_config_file(
    "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
  )
)
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
  "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
)
cfg.DATASETS.TRAIN = ("faces_train",)
cfg.DATASETS.TEST = ("faces_val",)
cfg.DATALOADER.NUM_WORKERS = 4
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 1500
cfg.SOLVER.STEPS = (1000, 1500)
cfg.SOLVER.GAMMA = 0.05
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
Example #28
0
def main():
    parser = argparse.ArgumentParser("person falldown trainer")
    parser.add_argument("--datapath", type=str, default="./data")
    parser.add_argument("--res_dir", type=str, default="./res-2")
    parser.add_argument("--cfg_dir", type=str, default="COCO-Detection/")
    parser.add_argument("--cfg",
                        type=str,
                        default="faster_rcnn_R_50_FPN_3x.yaml")
    parser.add_argument("--model_url", type=str, default="./output")
    parser.add_argument("--tta", action='store_true', default=False)
    parser.add_argument("--min_size", type=int, default=800)
    parser.add_argument("--save_bbox", action="store_true", default=False)
    parser.add_argument("--bbox_dir", type=str, default="./bbox_out/")
    parser.add_argument("--model2_url", type=str, default=None)
    parser.add_argument("--cfg2_dir", type=str, default="COCO-Detection/")
    parser.add_argument("--cfg2",
                        type=str,
                        default="faster_rcnn_X_101_32x8d_FPN_3x.yaml")
    args = parser.parse_args()
    if args.save_bbox:
        os.makedirs(args.bbox_dir, exist_ok=True)
    if args.cfg2 is not None and args.cfg2_dir is not None and args.model2_url is not None:
        has_model2 = True
    else:
        has_model2 = False
    DatasetCatalog.register("pfallcnt_" + args.datapath,
                            lambda d=args.datapath: get_person_dict(d))
    MetadataCatalog.get("pfallcnt_" + args.datapath).set(
        thing_classes=["0", "1"], thing_colors=[(0, 255, 0), (255, 0, 0)])
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(args.cfg_dir + args.cfg))
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.MODEL.MASK_ON = False
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2  # only has two class
    cfg.MODEL.WEIGHTS = args.model_url
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.895  # set the testing threshold for this model
    cfg.DATASETS.TEST = ("pfallcnt_" + args.datapath)
    cfg.INPUT.MIN_SIZE_TEST = args.min_size
    cfg.TEST.AUG.ENABLED = args.tta

    if args.tta:
        #cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.993
        predictor = PredictorWithTTA(cfg)
    else:
        predictor = DefaultPredictor(cfg)

    data_dicts = get_person_dict(args.datapath)
    person_metadata = MetadataCatalog.get("pfallcnt_" + args.datapath)
    os.makedirs(os.path.join(args.res_dir, "vis"), exist_ok=True)
    name_list = []
    fallcnts = []
    for d in data_dicts:
        im = cv2.imread(d["file_name"])
        height, width = im.shape[:2]
        outputs = predictor(im)
        field_dict = outputs["instances"].to("cpu").get_fields()
        pred_cls = field_dict["pred_classes"].numpy()
        name_list.append(d["image_id"])
        fallcnts.append(pred_cls.sum())
        print(name_list[-1], fallcnts[-1])
        if args.save_bbox:
            anno_dict = {
                "version": "4.2.7",
                "flag": {},
                "shapes": [],
                "imagePath": "../A/" + d["image_id"],
                "imageData": None,
                "imageHeight": height,
                "imageWidth": width
            }
            bboxes = field_dict["pred_boxes"].tensor.numpy().astype(np.float64)
            for i in range(bboxes.shape[0]):
                box_dict = {
                    "label": str(int(pred_cls[i])),
                    "gound_id": None,
                    "shape_type": "rectangle",
                    "flags": {}
                }
                box_dict["points"] = [[bboxes[i][0], bboxes[i][1]],
                                      [bboxes[i][2], bboxes[i][3]]]
                anno_dict["shapes"].append(box_dict)
            with open(os.path.join(args.bbox_dir, d["image_id"][:-3] + "json"),
                      'w') as f:
                json.dump(anno_dict, f, indent=1)
        v = Visualizer(im[:, :, ::-1],
                       metadata=person_metadata,
                       scale=1.0,
                       instance_mode=ColorMode.SEGMENTATION)
        v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        save_file = os.path.join(os.path.join(args.res_dir, "vis"),
                                 d["image_id"])
        cv2.imwrite(save_file, v.get_image()[:, :, ::-1])

    del predictor

    if has_model2:
        print("has model2, building model2 config")
        cfg2 = get_cfg()
        cfg2.merge_from_file(
            model_zoo.get_config_file(args.cfg2_dir + args.cfg2))
        cfg2.DATALOADER.NUM_WORKERS = 2
        cfg2.SOLVER.IMS_PER_BATCH = 2
        cfg2.MODEL.MASK_ON = False
        cfg2.MODEL.ROI_HEADS.NUM_CLASSES = 2  # only has two class
        cfg2.MODEL.WEIGHTS = args.model2_url
        cfg2.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.92  # set the testing threshold for this model
        cfg2.DATASETS.TEST = ("pfallcnt_" + args.datapath)
        cfg2.INPUT.MIN_SIZE_TEST = args.min_size
        cfg2.TEST.AUG.ENABLED = args.tta

        if args.tta:
            predictor2 = PredictorWithTTA(cfg2)
        else:
            predictor2 = DefaultPredictor(cfg2)
        idx = -1
        modifies = []
        for d in data_dicts:
            idx += 1
            im = cv2.imread(d["file_name"])
            outputs2 = predictor2(im)
            field_dict2 = outputs2["instances"].to("cpu").get_fields()
            pred_cls2 = field_dict2["pred_classes"].numpy()
            fallcnt2 = pred_cls2.sum()
            print("model 2: ", d["file_name"], fallcnt2)
            if fallcnt2 > 5 and fallcnts[idx] < fallcnt2 and name_list[
                    idx] == d["image_id"]:
                fallcnts[idx] = fallcnt2
                modifies.append(name_list[idx])
            elif name_list[idx] != d["image_id"]:
                print("file name is not the same")

    csv_dict = {"file": name_list, "fall_count": fallcnts}
    csv_df = pd.DataFrame(csv_dict)
    csv_df.to_csv(os.path.join(args.res_dir, "fallcnt_submit.csv"),
                  sep=",",
                  index=False)
    print(len(name_list), len(fallcnts))
    if has_model2:
        print(modifies)
        print("modify %d results" % len(modifies))
import detectron2
from detectron2.utils.logger import setup_logger
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from PIL import Image
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

#Setup Detectron2
setup_logger()

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file(
        "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
if torch.cuda.is_available() == False:
    cfg.MODEL.DEVICE = "cpu"

predictor = DefaultPredictor(cfg)

#VGG-16
vgg16 = torchvision.models.vgg16(pretrained=True)
vgg16.eval()

normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225])
preprocess = torchvision.transforms.Compose([
Example #30
0
def retrain_detector(settings):
    """
    settings: properties to be used in the retraining process

    Splits the COCO-formatted data located in annotation_path, then trains and 
    evaluates a Detectron2 model from scratch. The resulting model is saved in 
    the model_path/ folder. 

    Returns an object mapping different AP (average precision) metrics to the 
    model's scores. 
    """

    if len(settings) == 0:
        settings["trainSplit"] = 0.7
        settings["learningRate"] = 0.005
        settings["maxIters"] = 100

    base_path = "annotation_data/"
    coco_path = os.path.join(base_path, "coco")
    output_path = os.path.join(base_path, "output")
    annotation_path = os.path.join(coco_path, "coco_results.json")
    train_path = os.path.join(coco_path, "train.json")
    test_path = os.path.join(coco_path, "test.json")

    # 1) Split coco json file into train and test using cocosplit code
    # Adapted from https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py
    with open(annotation_path, "rt", encoding="UTF-8") as annotations_file:

        # Extract info from json
        coco = json.load(annotations_file)
        info = coco["info"]
        licenses = coco["licenses"]
        images = coco["images"]
        annotations = coco["annotations"]
        categories = coco["categories"]

        # Remove images without annotations
        images_with_annotations = set(
            map(lambda a: int(a["image_id"]), annotations))
        images = list(
            filter(lambda i: i["id"] in images_with_annotations, images))

        # Split images and annotations
        x_images, y_images = train_test_split(
            images, train_size=settings["trainSplit"])
        x_ids = list(map(lambda i: int(i["id"]), x_images))
        x_annots = list(
            filter(lambda a: int(a["image_id"]) in x_ids, annotations))
        y_ids = list(map(lambda i: int(i["id"]), y_images))
        y_annots = list(
            filter(lambda a: int(a["image_id"]) in y_ids, annotations))

        # Save to file
        def save_coco(file, info, licenses, images, annotations, categories):
            with open(file, 'wt', encoding="UTF-8") as coco:
                json.dump(
                    {
                        "info": info,
                        "licenses": licenses,
                        "images": images,
                        "annotations": annotations,
                        "categories": categories
                    },
                    coco,
                    indent=2,
                    sort_keys=True)

        save_coco(train_path, info, licenses, x_images, x_annots, categories)
        save_coco(test_path, info, licenses, y_images, y_annots, categories)

    # 2) Use train/test files to retrain detector
    dataset_name = "annotation_coco"
    image_dir = base_path + "rgb/"
    train_data = dataset_name + "_train"
    test_data = dataset_name + "_test"

    DatasetCatalog.clear()
    MetadataCatalog.clear()
    register_coco_instances(train_data, {}, train_path, image_dir)
    register_coco_instances(test_data, {}, test_path, image_dir)

    MetadataCatalog.get(train_data)
    coco_yaml = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(coco_yaml))
    cfg.DATASETS.TRAIN = (train_data, )
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(categories)
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
        coco_yaml)  # Let training initialize from model zoo
    cfg.OUTPUT_DIR = output_path
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = settings["learningRate"]  # Make sure LR is good
    cfg.SOLVER.MAX_ITER = settings[
        "maxIters"]  # 300 is good for small datasets

    # Train
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Move model to most recent model folder
    model_dir = os.path.join(base_path, "model")
    model_names = os.listdir(model_dir)
    # Get highest x for model/vx
    model_dirs = list(
        filter(lambda n: os.path.isdir(os.path.join(model_dir, n)),
               model_names))
    model_nums = list(map(lambda x: int(x.split("v")[1]), model_dirs))
    last_model_num = max(model_nums)
    # Add model to new folder
    model_path = os.path.join(model_dir, "v" + str(last_model_num))
    new_model_path = os.path.join(model_path, "model_999.pth")
    old_model_path = os.path.join(output_path, "model_final.pth")
    os.replace(old_model_path, new_model_path)

    # Evaluate
    evaluator = COCOEvaluator(test_data, ("bbox", "segm"),
                              False,
                              output_dir="../../annotation_data/output/")
    val_loader = build_detection_test_loader(cfg, test_data)
    inference = inference_on_dataset(trainer.model, val_loader, evaluator)

    # inference keys: bbox, semg
    # bbox and segm keys: AP, AP50, AP75, APs, APm, AP1, AP-category1, ...
    inference_json = json.loads(json.dumps(inference).replace("NaN", "null"))
    return inference_json