Example #1
0
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()

num_classes = 4
classes = load_classes("data/scattered_coins/train/classes.txt")

# set up the neural network
print("Loading network...")
cfgfile = os.path.abspath(
    "cfg/yolov3_mod.cfg"
)  # "/home/jovyan/work/YOLO_v3_tutorial_from_scratch/cfg/yolov3_mod.cfg"
model = Darknet(cfgfile)
print("Network successfully loaded")

# swap out the layers before YOLO and the classes in the YOLO layers
det_layers = [82, 94, 106]
for i in det_layers:
    in_channels = model.module_list[i - 1][0].in_channels
    model.module_list[i - 1] = nn.Sequential(nn.Conv2d(in_channels, 27, 1))
    model.blocks[i + 1]["classes"] = 4
print("Layers have been swapped out")

# load state_dict
checkpoint = torch.load("checkpoint.pkl", map_location=torch.device("cpu"))
model.load_state_dict(checkpoint["model_state_dict"])
prev_epoch = checkpoint["epoch"] + 1
loss = checkpoint["loss"]
Example #2
0
    nms_thresh = 0.4
    iou_thresh = 0.5
    im_width = 640
    im_height = 480
    if Data_type == SCANNET:
        im_width = 1296
        im_height = 968

    # Specify which gpus to use
    torch.manual_seed(seed)
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)

    # Specifiy the model and the loss
    model = Darknet(cfgfile)
    region_loss = model.loss

    # Model settings
    # model.load_weights(weightfile)
    model.load_weights_until_last(weightfile)
    model.print_network()
    model.seen = 0
    region_loss.iter = model.iter
    region_loss.seen = model.seen
    processed_batches = model.seen // batch_size
    init_width = model.width
    init_height = model.height
    test_width = 672
    test_height = 672
    init_epoch = model.seen // nsamples
Example #3
0
def main(camera_id, shelf_id):
    rospy.init_node('MultiProcessingNode', anonymous=True)
    ip = '192.168.0.' + str(camera_id)
    name = str('admin')
    pw = str('a1234567')
    camera = HKCamera(ip, name, pw)

    threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id,
                                                    shelfID=shelf_id[0])
    threadPubMsg_shelfID_1.setDaemon(True)
    threadPubMsg_shelfID_1.start()

    shelf1 = 'shelfID_' + str(shelf_id[0])
    threadPubMsg_dict = {shelf1: threadPubMsg_shelfID_1}

    model = loadDataset()

    cfg = Darknet('cfg/yolov3.cfg')
    cfg.load_weights('yolov3.weights')
    cfg.cuda()
    # global frame_number
    frame_number2 = [0]
    flag = [0]
    bridge = CvBridge()

    dic_change = {}
    pre_res = {}
    huojia1_id = shelf_id[0]
    print("huojia1_id: {}".format(huojia1_id))
    tmp = 0
    while not rospy.is_shutdown():
        frame_origin = camera.getFrame()

        frame_origin = np.array(frame_origin)
        frame_origin = cv2.resize(frame_origin,
                                  None,
                                  fx=0.75,
                                  fy=0.75,
                                  interpolation=cv2.INTER_AREA)
        frame_trans = copy.deepcopy(frame_origin)

        # draw the shangping area
        # left_x, top_y, right_m, bottom_n = shangpin_area(huojia1_id)
        # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2)

        res, camera_id, dict_res = callback(
            (None, cfg, model, frame_number2, bridge, camera_id, flag,
             frame_origin, huojia1_id, pre_res))

        if res == []:
            if tmp > 30:
                threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)]
                threadPubMsg.set_commodity_recognition_trigger_with_image(
                    camera_id=camera_id,
                    person_id=-1,
                    shelf_id=huojia1_id,
                    flag=0,
                    flag1=0,
                    flag2=0,
                    flag_list=[],
                    frame=None)

                tmp = 0

            else:
                tmp += 1
            continue
        else:
            tmp = 0

        dic = xuanze_original(res, frame_origin, model, cfg, camera_id,
                              dic_change, huojia1_id)

        if compare_dic(dic, dic_change) == False:
            pass
        else:
            dic = xuanze(res, frame_origin, model, cfg, threadPubMsg_dict,
                         camera_id, dic, dic_change, huojia1_id, frame_trans)

        #print("**********************")
        #print("dic_change_shelf_{}: {}".format(shelf_id[0], dic))
        #print("")
        dic_change = dic
        pre_res = dict_res

    HKIPcamera.release()
Example #4
0
if __name__ == '__main__':
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    device = torch.device("cuda:0" if CUDA else "cpu")

    num_classes = 80

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    model = model.to(device)

    model.eval()

    videofile = args.video

    cap = cv2.VideoCapture(videofile)
D = computeDistortionCoefficients(K)
print(D)

# ---------------------Extrinsic Calibration-------------------------------------------------------------------------- #
extrinsics = args.extrinsics
# Model Initialization

if extrinsics:
    confidence = float(args.confidence)
    nms_thresh = float(args.nms_thresh)
    CUDA = torch.cuda.is_available()
    num_classes = 80
    colors = pkl.load(open("pallete", "rb"))
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    if CUDA:
        model.cuda()

    mapx, mapy = undistortMap(frame, min_factor)
    sift = cv2.xfeatures2d.SIFT_create()
    count = 0
    kp1 = []
    lines = []
    cap = cv2.VideoCapture("sample_video/" + video_file_name)
import sys
import time
import os
from PIL import Image, ImageDraw
from utils import *
from darknet import Darknet

if __name__ == '__main__':
    if len(sys.argv) == 4:
        cfgfile = sys.argv[1]
        weightfile = sys.argv[2]
        imgdir = sys.argv[3]

    use_cuda = True
    darknet_model = Darknet(cfgfile)
    darknet_model.load_weights(weightfile)
    if use_cuda:
        darknet_model = darknet_model.cuda()

    # read in the label names associated with the darknet model
    if darknet_model.num_classes == 20:
        namesfile = 'data/voc.names'
    elif darknet_model.num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/names'
    for imgfile in os.listdir(imgdir):
        if imgfile.endswith('.jpg') or imgfile.endswith('.png'):
            name = os.path.splitext(imgfile)[0]  #image name w/o extension
            txtname = name + '.txt'
            txtpath = os.path.abspath(
Example #7
0
use_cuda = True

datacfg = {'hands':   'cfg/hands.data'}

cfgfile = {'hands':   'cfg/yolo-hands.cfg',
           'cautery': 'cfg/my_config_realsense.yaml'}

weightfile = {'hands':   'backup/hands/000500.weights'}

namesfile = {'hands': 'data/hands.names'}


#######################################################
# Setting up YOLO-hand
#######################################################
model_hand = Darknet(cfgfile['hands'])
model_hand.load_weights(weightfile['hands'])
print('Loading weights from %s... Done!' % (weightfile['hands']))

if use_cuda:
    model_hand.cuda()

class_names = uyolo.load_class_names(namesfile['hands'])

#######################################################
# Setting up DOPE
#######################################################
yaml_path = cfgfile['cautery']
with open(yaml_path, 'r') as stream:
    try:
        print("Loading DOPE parameters from '{}'...".format(yaml_path))
Example #8
0
# Test parameters
conf_thresh = 0.25
nms_thresh = 0.4
iou_thresh = 0.5

if not os.path.exists(backupdir):
    os.mkdir(backupdir)

###############
torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)

model = Darknet(cfgfile)
region_loss = model.loss

model.load_weights(weightfile)
model.print_network()

region_loss.seen = model.seen
processed_batches = model.seen / batch_size

init_width = model.width
init_height = model.height
init_epoch = model.seen // nsamples

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(dataset.listDataset(
    testlist,
Example #9
0
def main():
    global loss_layers
    global test_loader
    global model
    data_options = read_data_file(FLAGS.data)
    net_options = parse_cfg(FLAGS.config)[0]

    train_dir = data_options['train']
    test_dir = data_options['valid']
    names = data_options['names']

    batch_size = int(net_options['batch'])
    learning_rate = float(net_options['learning_rate'])
    hue = float(net_options['hue'])
    hue = float(net_options['hue'])
    exposure = float(net_options['exposure'])
    saturation = float(net_options['saturation'])
    momentum = float(net_options['momentum'])

    epochs = 100

    model = Darknet(FLAGS.config)
    torch.manual_seed(0)
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = data_options['gpus']
        torch.cuda.manual_seed(0)

    model = model.to(device)
    model.load_weights(weightfile="data/yolov3.weights")
    loss_layers = model.loss_layers
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          momentum=momentum)

    train_data = dataset.YoloDataset(train_dir, (model.width, model.height),
                                     transform=transforms.ToTensor(),
                                     train=True)
    test_data = dataset.YoloDataset(test_dir, (model.width, model.height),
                                    transform=transforms.ToTensor(),
                                    train=False)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    for epoch in range(epochs):
        for idx, (images, labels) in enumerate(train_loader):
            # print(idx, images.shape, labels.shape)
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = model(images)
            org_loss = []
            org_loss = []
            for i, l in enumerate(loss_layers):
                l.seen += labels.data.size(0)
                ol = l(output[i]['output'], labels)
                org_loss.append(ol)
            sum(org_loss).backward()
            optimizer.step()
            # if (idx + 1) % 250 == 0:
            #     model.save_weights('models/batch_{}.weights'.format(idx))
            #     print('Model saved.')
            #     # test(idx)

        model.save_weights('models_scratch/epoch_{}.weights'.format(epoch + 1))
        print('Epoch_{:d} model saved.'.format(epoch + 1))
Example #10
0
class Detector(torch.nn.Module):

    def __init__(self,save_net):
        super(Detector, self).__init__()
        self.net = Darknet(80)

        # self.net.load_state_dict(torch.load("model/yolov3.pth"))
        self.net.load_weights(save_net)

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.net.to(self.device)
        self.net.eval()          #开始测试

    def forward(self, input, thresh, anchors):        #将图片、置信度阈值、建议框输入
        input_ = input.to(self.device)

        output_13, output_26, output_52 = self.net(input_)        #将图片传入网络中得到三个特征图输出

        # output_13 = output_13.cpu()
        # output_26 = output_26.cpu()
        # output_52 = output_52.cpu()

        idxs_13, vecs_13 = self._filter(output_13, thresh)         #得到置信度大于阈值的索引和输出
        boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13])

        idxs_26, vecs_26 = self._filter(output_26, thresh)
        boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])

        idxs_52, vecs_52 = self._filter(output_52, thresh)
        boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52])

        box = torch.cat([boxes_13, boxes_26, boxes_52], dim=0)
        box = nms(box.cpu())

        return box

    def _filter(self, output, thresh):

        output = output.permute(0, 2, 3, 1)            
        output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)

        mask = torch.sigmoid(output[..., 4]) > thresh      #得到置信度大于阈值的掩码

        idxs = mask.nonzero()               #根据掩码得到索引
        vecs = output[mask]                 #置信度大于阈值的总输出

        return idxs, vecs

    def _parse(self, idxs, vecs, t, anchors):    
        anchors = torch.Tensor(anchors).to(self.device)          #将建议框转化为张量

        n = idxs[:, 0]  # 所属的图片,批量传入时,这里不会用到
        a = idxs[:, 3]  # 建议框    [N,13,13,3,15]

        cy = (idxs[:, 1].float() + torch.sigmoid(vecs[:, 1])) * t  # 索引+中心点输出乘以缩放比例得到原图的中心点y
        cx = (idxs[:, 2].float() + torch.sigmoid(vecs[:, 0])) * t  #索引 + 中心点输出乘以缩放比例得到原图的中心点x
        w = anchors[a, 0] * torch.exp(vecs[:, 2])        #对应的实际框的w
        h = anchors[a, 1] * torch.exp(vecs[:, 3])        #对应的实际框的h

        cls = torch.sigmoid(vecs[:,4])


        if len(vecs[:,5:85]) > 0:
            _,pred = torch.max(vecs[:,5:85],dim=1)              #得到分类情况
            box = torch.stack([n.float(), cx, cy, w, h,pred.float(),cls], dim=1)
        else:
            box = torch.stack([n.float(), cx, cy, w, h, h,cls], dim=1)
        return box
Example #11
0
reso = 64  # it should be a multiple of 32 and greater than 32
confidence = 0.1
nms_thesh = 0.4
batch_size = 6

weightsfile = '/Users/reo911gt3/Desktop/mspenny/modules/yolo/yolov3.weights'
cfgfile = '/Users/reo911gt3/Desktop/mspenny/modules/yolo/cfg/yolov3.cfg'

# yolov3 load
num_classes = 80
classes = load_classes(
    '/Users/reo911gt3/Desktop/mspenny/modules/yolo/data/coco.names')

#Set up the neural network
print("Loading network.....")
model = Darknet(cfgfile)
model.load_weights(weightsfile)
print("Network successfully loaded")

model.net_info["height"] = reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
CUDA = torch.cuda.is_available()
if CUDA:
    model.cuda()

#Set the model in evaluation mode
model.eval()
Example #12
0
def main():
    datacfg    = FLAGS.data
    cfgfile    = FLAGS.config
    weightfile = FLAGS.weights
    no_eval    = FLAGS.no_eval

    data_options  = read_data_cfg(datacfg)
    net_options   = parse_cfg(cfgfile)[0]

    global use_cuda
    use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda)

    globals()["trainlist"]     = data_options['train']
    globals()["testlist"]      = data_options['valid']
    globals()["backupdir"]     = data_options['backup']
    globals()["gpus"]          = data_options['gpus']  # e.g. 0,1,2,3
    globals()["ngpus"]         = len(gpus.split(','))
    globals()["num_workers"]   = int(data_options['num_workers'])

    globals()["batch_size"]    = int(net_options['batch'])
    globals()["max_batches"]   = 10*int(net_options['max_batches'])
    globals()["learning_rate"] = float(net_options['learning_rate'])
    globals()["momentum"]      = float(net_options['momentum'])
    globals()["decay"]         = float(net_options['decay'])
    globals()["steps"]         = [float(step) for step in net_options['steps'].split(',')]
    globals()["scales"]        = [float(scale) for scale in net_options['scales'].split(',')]

    #Train parameters
    global max_epochs
    try:
        max_epochs = int(net_options['max_epochs'])
    except KeyError:
        nsamples = file_lines(trainlist)
        max_epochs = (max_batches*batch_size)//nsamples+1

    seed = int(time.time())
    torch.manual_seed(seed)
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    global device
    device = torch.device("cuda" if use_cuda else "cpu")

    global model
    model = Darknet(cfgfile, use_cuda=use_cuda)
    model.load_weights(weightfile)
    #model.print_network()

    nsamples = file_lines(trainlist)
    #initialize the model
    if FLAGS.reset:
        model.seen = 0
        init_epoch = 0
    else:
        init_epoch = model.seen//nsamples

    global loss_layers
    loss_layers = model.loss_layers
    for l in loss_layers:
        l.seen = model.seen

    globals()["test_loader"] = load_testlist(testlist)
    if use_cuda:
        if ngpus > 1:
            model = torch.nn.DataParallel(model).to(device)
        else:
            model = model.to(device)

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if key.find('.bn') >= 0 or key.find('.bias') >= 0:
            params += [{'params': [value], 'weight_decay': 0.0}]
        else:
            params += [{'params': [value], 'weight_decay': decay*batch_size}]
    global optimizer
    optimizer = optim.SGD(model.parameters(), 
                        lr=learning_rate/batch_size, momentum=momentum, 
                        dampening=0, weight_decay=decay*batch_size)

    if evaluate:
        logging('evaluating ...')
        test(0)
    else:
        try:
            print("Training for ({:d},{:d})".format(init_epoch, max_epochs))
            fscore = 0
            if not no_eval and init_epoch > test_interval:
                print('>> initial evaluating ...')
                # mfscore = test(init_epoch)
                print('>> done evaluation.')
            else:
                mfscore = 0.5
            for epoch in range(init_epoch+1, max_epochs):
                nsamples = train(epoch)
                if not no_eval and epoch > test_interval and (epoch%test_interval) == 0:
                    print('>> intermittent evaluating ...')
                    # fscore = test(epoch)
                    print('>> done evaluation.')
                if epoch % save_interval == 0:
                    savemodel(epoch, nsamples)
                    pass
                if FLAGS.localmax and fscore > mfscore:
                    mfscore = fscore
                    savemodel(epoch, nsamples, True)
                print('-'*90)
        except KeyboardInterrupt:
            print('='*80)
            print('Exiting from training by interrupt')
Example #13
0
args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()

num_classes = 80
classes = load_classes(
    "/home/ripo/project/python/workspace/cv/yolo/my/data/coco.names")

#Set up the neural network
print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
#断言式
assert inp_dim % 32 == 0
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()

#Set the model in evaluation mode
"""
Example #14
0
def valid(datacfg, cfgfile, weightfile, save_path, use_cuda = False, size = 416):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    if os.path.exists(save_path) == False:
        os.mkdir(save_path)
    prefix = save_path
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]
    
    m = Darknet(cfgfile)

    m.load_weights(weightfile)
    num_classes = len(names)

    if use_cuda:
        m.cuda()
    m.eval()

    valid_dataset = MyDataset(valid_images, shape=(size, size),
                       is_train = False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                       ]))
    valid_batchsize = 10
    assert(valid_batchsize > 1)
    
    if use_cuda:
        kwargs = {'num_workers': 4, 'pin_memory': True}
    else:
        kwargs = {}
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) 

    fps = [0]*num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    for i in range(num_classes):
        buf = '%s/%s.txt' % (prefix, names[i])
        fps[i] = open(buf, 'w')
   
    lineId = -1
    
    conf_thresh = 0.01
    nms_thresh = 0.5
    for batch_id, (data, target) in enumerate(valid_loader):
        if use_cuda:
            data = data.cuda()
        print('start processing batch{}'.format(batch_id))
        start1 = time.time()
        output = m(data)
        batch_boxes = get_all_boxes(output, conf_thresh, num_classes, only_objectness=0, validation=True, use_cuda = use_cuda)
        for i in range(data.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
            boxes = batch_boxes[i]
            if boxes.numel() == 0:
                continue
            for cls_id in range(num_classes):
                cls_ind = (boxes[:, 6] == cls_id)
                cls_boxes = nms(boxes[cls_ind],nms_thresh)
                if cls_boxes.numel == 0:
                    continue
                for box in cls_boxes:
                    x1 = (box[0] - box[2]/2.0) * width
                    y1 = (box[1] - box[3]/2.0) * height
                    x2 = (box[0] + box[2]/2.0) * width
                    y2 = (box[1] + box[3]/2.0) * height 
                    fps[cls_id].write('%s %f %f %f %f %f\n' %(fileId, box[4] * box[5], x1, y1, x2, y2))
        end1 = time.time()
        print('average time {}s'.format((end1 - start1) / len(data)))
        del data,target
    for i in range(num_classes):
        fps[i].close()
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
            if x.endswith('.jpg') or x.endswith('.png')
        ]

        # MODIFIED!
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
        ]

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            if len(det) == 7:
                continue

            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                           pt_1[0]:pt_2[0]][:, :, ::-1])
            # # ROI.show()

            # # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            if len(det) == 7:
                continue

            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)

    def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor

        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        print('\n', output, '\na')

        if type(output) != int:
            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * orig_img_size[0]) / 2.0  # x, w
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                orig_img_size[1])

        print('\n', output, '\n')
        return output

    def detect_classify(self):
        """
        detect and classify
        """
        for x in self.imgs_path:
            # read image data
            img = cv2.imread(x)
            img = cv2.copyMakeBorder(img,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     cv2.BORDER_CONSTANT,
                                     value=(100, 100, 100))
            img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction, self.prob_th,
                                          self.num_classes, self.nms_th,
                                          self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                print('\n', x)
                self.cls_draw_bbox(output, orig_img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                if not os.path.exists(dst_path):
                    cv2.imwrite(dst_path, orig_img)

    # MODIFIED!
    def detect_classify_modified(self):
        """
        detect and classify
        """
        # print(self.imgs_path)
        for tracklet in self.imgs_path:
            tracklet_camera_path = [
                os.path.join(tracklet, x) for x in os.listdir(tracklet)
            ]

            for tracklet_camera in tracklet_camera_path:
                the_imgs_path = [
                    os.path.join(tracklet_camera, x)
                    for x in os.listdir(tracklet_camera) if x.endswith('.jpg')
                ]
                # print(the_imgs_path)

                for the_img in the_imgs_path:
                    # print(the_img)
                    # read image data
                    img = cv2.imread(the_img)
                    img = cv2.copyMakeBorder(img,
                                             BORDER,
                                             BORDER,
                                             BORDER,
                                             BORDER,
                                             cv2.BORDER_CONSTANT,
                                             value=(100, 100, 100))
                    img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

                    img2det = process_img(img, self.inp_dim)
                    img2det = img2det.to(device)  # put image data to device

                    # vehicle detection
                    prediction = self.detector.forward(img2det, CUDA=True)

                    # calculating scaling factor
                    orig_img_size = list(img.size)
                    output = self.process_predict(prediction, self.prob_th,
                                                  self.num_classes,
                                                  self.nms_th, self.inp_dim,
                                                  orig_img_size)

                    orig_img = cv2.cvtColor(np.asarray(img),
                                            cv2.COLOR_RGB2BGR)  # RGB => BGR

                    print(the_img)
                    try:
                        if type(output) != int:
                            self.cls_draw_bbox(output, orig_img)
                            print('\n', os.path.split(the_img)[0])
                            dst_path = self.dst_dir + '/' + os.path.split(
                                the_img)[0] + '/' + os.path.split(the_img)[1]
                            print(dst_path)
                            if not os.path.exists(dst_path):
                                cv2.imwrite(dst_path, orig_img)
                    except Exception as inst:
                        img.show()
                        print(inst)
                        exit(2)
Example #16
0
def demo():

    params = {
        "video": "video.avi",  # Video to run detection upon
        "dataset": "pasacal",  # Dataset on which the network has been trained
        "confidence": 0.5,  # Object Confidence to filter predictions
        "nms_thresh": 0.4,  # NMS Threshold
        "cfgfile": "cfg/yolov3.cfg",  # Config file
        "weightsfile": "yolov3.weights",  # Weightsfile
        "repo":
        416  # Input resolution of the network.  Increase to increase accuracy.  Decrease to increase speed
    }

    confidence = float(params["confidence"])
    nms_thesh = float(params["nms_thresh"])
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    bbox_attrs = 5 + num_classes

    bboxes = []
    xywh = []

    print("Loading network.....")
    model = Darknet(params["cfgfile"])
    model.load_weights(params["weightsfile"])
    print("Network successfully loaded")

    model.net_info["height"] = params["repo"]
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = params["video"]

    # set 0 for debug
    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        print("ret: ", ret)
        print("frame: ", frame.shape)
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print(
                    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                )
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            print("output: ", output)
            print("output: ", output.shape)

            for i in output:
                x0 = i[1].int()
                y0 = i[2].int()
                x1 = i[3].int()
                y1 = i[4].int()
                bbox = (x0, y0, x1, y1)
                bboxes.append(bbox)
                print(bbox)
                w = x1 - x0
                h = y1 - y0
                xywh.append((x0, y0, w, h))
                print(x0, y0, w, h)

            #return bboxes

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            # write bbox
            list(map(lambda x: write(x, orig_im, classes, colors), output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}g7".format(
                frames / (time.time() - start)))
            #return xywh

        else:
            break
Example #17
0
def main():

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda().half()

    model.eval()
    # Use external camera for detection
    # rsh.initialize_camera(args.width, args.height)
    # Use the webcam for detection
    cap = cv2.VideoCapture(0)
    assert cap.isOpened(), 'Cannot capture source'
    frames = 0
    # width = 640; height = 480;
    start = time.time()
    # pipe = rs.pipeline()
    # config = rs.config()
    # config.enable_stream(rs.stream.depth, width, height, rs.format.z16, 30)
    # config.enable_stream(rs.stream.color, width, height, rs.format.rgb8, 30)
    # profile = pipe.start(config)
    # align_to = rs.stream.color
    # align = rs.align(align_to)
    print('################| INITILIZATION SEQUENCE COMPLETE |#############')

    while (1):

        # rgb[1,:,:,:,], depth = rsh.get_rgbd()
        # temp = pipe.wait_for_frames()
        # aligned_frames = align.process(temp)
        # aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image
        # color_frame = aligned_frames.get_color_frame()
        #
        # if not aligned_depth_frame or not color_frame:
        #     pass
        #
        # rgb = np.asanyarray(color_frame.get_data(),dtype=np.uint8)
        # depth = np.asanyarray(aligned_depth_frame.get_data(),dtype=np.uint8)
        # # rgb = rgb#.transpose(2,0,1)#, depth.tranpose(1,0)
        ret, rgb = cap.read()
        img, orig_im, dim = util.prep_image(rgb, inp_dim)
        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        if CUDA:
            img = img.cuda().half()
            im_dim = im_dim.half().cuda()
            # write_results = write_results_half
            predict_transform = predict_transform_half

        output = model(Variable(img, volatile=True), CUDA)
        output = util.write_results(output,
                                    confidence,
                                    num_classes,
                                    nms=True,
                                    nms_conf=nms_thesh)

        if type(output) == int:
            frames += 1
            print("FPS of the video is {:5.2f}".format(frames /
                                                       (time.time() - start)))
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            continue

        im_dim = im_dim.repeat(output.size(0), 1)
        scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

        output[:, [1, 3]] -= (inp_dim -
                              scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
        output[:, [2, 4]] -= (inp_dim -
                              scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

        output[:, 1:5] /= scaling_factor

        for i in range(output.shape[0]):
            output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i,
                                                                           0])
            output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i,
                                                                           1])

        colors = pkl.load(open("pallete", "rb"))

        list(map(lambda x: util.write(x, orig_im), output))

        cv2.imshow("frame", orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            print('################| QUIT |#############')
            break
        frames += 1

        print("FPS of the video is {:5.2f}".format(frames /
                                                   (time.time() - start)))
Example #18
0

def center_to_corner_2d(boxes):
    boxes[:, 0] = (boxes[:, 0] - boxes[:, 2] / 2)
    boxes[:, 1] = (boxes[:, 1] - boxes[:, 3] / 2)
    boxes[:, 2] = (boxes[:, 2] + boxes[:, 0])
    boxes[:, 3] = (boxes[:, 3] + boxes[:, 1])

    return boxes


if __name__ == "__main__":
    args = arg_parse()

    # Instantiate a model
    model = Darknet(args.cfgfile, train=False)

    # Get model specs
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    num_classes = int(model.net_info["classes"])
    bbox_attrs = 5 + num_classes

    # Load weights PyTorch style
    model.load_state_dict(torch.load(args.weightsfile))

    # Set to evaluation (don't accumulate gradients)
    model.eval()

    model = model.to(device)  ## Really? You're gonna eval on the CPU? :)
Example #19
0
def run_video_demo(input_data, UI):
    args = {
        'confidence': CONFIDENCE_THRESH,
        'cfgfile': CFG_FILE,
        'nms_thres': NMS_THRESH,
        'reso': RESO,
        'weights': WEIGHTS_FILE,
        'video': input_data['video'],
        'object': input_data['object'],
        'feature': input_data['feature'],
        'color': input_data['color'],
        'feature_flag': None,
        'color_flag': None
    }

    # Setting up parameter flags
    if args['feature'] == '':
        # No feature is provided by the user
        args['feature_flag'] = False
    else:
        args['feature_flag'] = True
    if args['color'] == '':
        # Color detection is not to be performed
        args['color_flag'] = False
    else:
        args['color_flag'] = True

    confidence = float(args['confidence'])
    nms_thesh = float(args['nms_thres'])
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = NUM_CLASSES

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args['cfgfile'])
    model.load_weights(args['weights'])
    print("Network successfully loaded")

    model.net_info["height"] = args['reso']
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()

    output_for_ui = {}
    object_no = {}
    timestamp = {}

    videofile = args['video']

    cap = cv2.VideoCapture(videofile)
    fps = cap.get(cv2.CAP_PROP_FPS)
    print('FPS of original video:', fps)

    assert cap.isOpened(), 'Cannot capture source'

    FPS_STORE = []
    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)

            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}  Frame no: {}".format(
                    frames / (time.time() - start), frames))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            classes = load_classes('data/obj.names')
            print(classes)
            colors = pkl.load(open("pallete", "rb"))

            # Routine to find if current frame has object of interest.

            list(
                map(
                    lambda x: write(x, orig_im, classes, colors, frames, fps,
                                    timestamp, output_for_ui, args, cap),
                    output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {}  Frame no: {}".format(
                round(frames / (time.time() - start), 2), frames))
            FPS_STORE.append(frames / (time.time() - start))
        else:
            break

    cap.release()

    output_for_ui['AVG_FPS'] = 'Average FPS of the program: {}d'.format(
        sum(FPS_STORE) / len(FPS_STORE))

    print(output_for_ui)
    # Color extraction routine
    if args['color_flag']:
        desired_color = args['color']
        for file in listdir('{}'.format(OUTPUT_IMAGES_PATH)):
            kmc = km.KMeansColours(img=file,
                                   clusters=5,
                                   desired_color=desired_color,
                                   file_dir='{}\\'.format(OUTPUT_IMAGES_PATH),
                                   file_dest='.\\crop_thumbnails\\')
            output_for_ui[file] = output_for_ui[
                file] + 'Desired color {} is present: {}\n\n'.format(
                    desired_color, kmc.driver())

    elif not args['color_flag']:
        for file in listdir('{}'.format(OUTPUT_IMAGES_PATH)):
            output_for_ui[file] = output_for_ui[file] + '\n\n'

    list(
        map(os.unlink, (os.path.join(OUTPUT_IMAGES_PATH, f)
                        for f in os.listdir(OUTPUT_IMAGES_PATH))))
    list(
        map(os.unlink, (os.path.join('.\\crop_thumbnails\\', f)
                        for f in os.listdir('.\\crop_thumbnails'))))
    UI.write_output_data(output_for_ui)
def run():

    logger = logging.getLogger()

    # Parse command window input
    parser = argparse.ArgumentParser(description='SingleShotPose')
    parser.add_argument('--datacfg', type=str,
                        default='cfg/ape.data')  # data config
    parser.add_argument('--modelcfg', type=str,
                        default='cfg/yolo-pose.cfg')  # network config
    parser.add_argument(
        '--initweightfile', type=str,
        default='backup/init.weights')  # initialization weights
    parser.add_argument('--pretrain_num_epochs', type=int,
                        default=0)  # how many epoch to pretrain
    args = parser.parse_args()
    datacfg = args.datacfg
    modelcfg = args.modelcfg
    initweightfile = args.initweightfile
    pretrain_num_epochs = args.pretrain_num_epochs

    print("ARGS: ", args)

    # Parse data configuration file
    data_options = read_data_cfg(datacfg)
    trainlist = data_options['valid']
    gpus = data_options['gpus']
    num_workers = int(data_options['num_workers'])
    backupdir = data_options['backup']
    im_width = int(data_options['width'])
    im_height = int(data_options['height'])
    fx = float(data_options['fx'])
    fy = float(data_options['fy'])
    u0 = float(data_options['u0'])
    v0 = float(data_options['v0'])

    print("DATA OPTIONS: ", data_options)

    # Parse network and training configuration parameters
    net_options = parse_cfg(modelcfg)[0]
    loss_options = parse_cfg(modelcfg)[-1]
    batch_size = int(net_options['batch'])
    max_batches = int(net_options['max_batches'])
    max_epochs = int(net_options['max_epochs'])
    learning_rate = float(net_options['learning_rate'])
    momentum = float(net_options['momentum'])
    decay = float(net_options['decay'])
    conf_thresh = float(net_options['conf_thresh'])
    num_keypoints = int(net_options['num_keypoints'])
    num_classes = int(loss_options['classes'])
    num_anchors = int(loss_options['num'])
    steps = [float(step) for step in net_options['steps'].split(',')]
    scales = [float(scale) for scale in net_options['scales'].split(',')]
    # anchors       = [float(anchor) for anchor in loss_options['anchors'].split(',')]

    print("NET OPTIONS: ", net_options)
    print("LOSS OPTIONS: ", loss_options)

    # Specifiy the model and the loss
    model = Darknet(modelcfg)

    # # Model settings
    model.load_weights(initweightfile)
    model.print_network()
    # model.seen        = 0
    # processed_batches = model.seen/batch_size
    init_width = 416  # model.width
    init_height = 416  # model.height
    batch_size = 1
    num_workers = 0

    # print("Size: ", init_width, init_height)

    bg_file_names = get_all_files('../VOCdevkit/VOC2012/JPEGImages')
    # Specify the number of workers
    use_cuda = True
    kwargs = {
        'num_workers': num_workers,
        'pin_memory': True
    } if use_cuda else {}

    logger.info("Loading data")

    # valid_dataset = dataset_multi.listDataset("../LINEMOD/duck/test_occlusion.txt", shape=(init_width, init_height),
    #                                             shuffle=False,
    #                                             objclass="duck",
    #                                             transform=transforms.Compose([
    #                                                 transforms.ToTensor(),
    #                                             ]))

    # Get the dataloader for training dataset

    dataloader = torch.utils.data.DataLoader(dataset.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=False,
        seen=0,
        batch_size=batch_size,
        num_workers=num_workers,
        bg_file_names=bg_file_names),
                                             batch_size=batch_size,
                                             shuffle=False,
                                             **kwargs)

    model.cuda()
    model.eval()

    delay = {True: 0, False: 1}
    paused = True

    # print("Classes in dataset ", num_classes)
    print("Batches in dataloader: ", len(dataloader))
    tbar = tqdm(dataloader, ascii=True, dynamic_ncols=True)
    for ii, s in enumerate(tbar):
        images, targets = s
        # print(ii, "IMAGES:" , images.shape)
        # print(ii, "TARGET\n", targets.shape)
        bs = images.shape[0]
        t = targets.cpu().numpy().reshape(bs, 50, -1)
        # print("TARGET [0, 0:1] \n", t[0, :1])
        # print("CLASSES ", t[0, :, 0])

        images_gpu = images.cuda()

        model_out = model(images_gpu).detach()
        all_boxes = np.array(
            get_region_boxes(model_out,
                             num_classes,
                             num_keypoints,
                             anchor_dim=num_anchors)).reshape(
                                 batch_size, 1, -1)

        # print("Model OUT", all_boxes.shape)

        pred = np.zeros_like(all_boxes)
        pred[:, 0, 0] = all_boxes[:, 0, -1]
        pred[:, 0, 1:-2] = all_boxes[:, 0, :-3]

        viz = visualize_results(images, t, pred, img_size=416, show_3d=True)

        cv2.imshow("Res ", viz)

        k = cv2.waitKey(delay[paused])
        if k & 0xFF == ord('q'):
            break
        if k & 0xFF == ord('p'):
            paused = not paused
Example #21
0
# Test parameters
conf_thresh   = 0.25
nms_thresh    = 0.4
iou_thresh    = 0.5

if not os.path.exists(backupdir):
    os.mkdir(backupdir)
    
###############
torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)

#定义模型
model       = Darknet(cfgfile)
region_loss = model.loss

model.load_weights(weightfile)
model.print_network()
model.seen=0
region_loss.seen  = model.seen
processed_batches = model.seen/batch_size

init_width        = model.width
init_height       = model.height
init_epoch        = model.seen/nsamples 
print('--------------------init_width,h------------',init_width,init_height)
kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
'''
test_loader = torch.utils.data.DataLoader(
Example #22
0
if __name__ == '__main__':
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 2

    CUDA = torch.cuda.is_available()

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    if args.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(args.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(args.weights_path))

    model.eval()  # Set in evaluation mode
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
Example #23
0
batch_size    = int(net_options['batch'])
max_batches   = int(net_options['max_batches'])
learning_rate = float(net_options['learning_rate'])
momentum      = float(net_options['momentum'])

max_epochs    = max_batches*batch_size/nsamples+1
use_cuda      = True
seed          = 22222
eps           = 1e-5

###############
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(seed)

model       = Darknet(cfgfile)
region_loss = model.loss

model.load_weights(weightfile)
model.print_network()
init_epoch = model.seen // nsamples

kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(
    lmdb_utils.lmdbDataset(testdb, shape=(160, 160),
                   shuffle=False,
                   transform=None,
                   train=False),
    batch_size=batch_size, shuffle=False, **kwargs)

if use_cuda:
Example #24
0
def main():
    # Parsing arguments
    arguments_parser = ArgumentsParser()
    args = arguments_parser.parse_arguments()
    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thresh = float(args.nms_thresh)

    # Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()

    read_dir = time.time()

    # Detection phase
    load_batch = time.time()
    image_manager = Cv2ImageManager()
    loaded_images, list_of_images = image_manager.read_images(images)
    im_batches = list(
        map(prep_image, loaded_images,
            [inp_dim for x in range(len(list_of_images))]))
    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_images]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1

    if batch_size != 1:
        num_batches = len(list_of_images) // batch_size + leftover
        im_batches = [
            torch.cat(
                (im_batches[i * batch_size:min((i + 1) *
                                               batch_size, len(im_batches))]))
            for i in range(num_batches)
        ]

    if CUDA:
        im_dim_list = im_dim_list.cuda()

    start_det_loop = time.time()
    detector = Detector(model, im_batches, batch_size, inp_dim, confidence,
                        nms_thresh, CLASSES, NUMBER_OF_CLASSES, CUDA)
    output = detector.detect(list_of_images, im_dim_list)

    output_recast = time.time()
    class_load = time.time()

    draw = time.time()

    det_images = list(
        map(
            lambda x: image_manager.draw_bounding_boxes(
                x, loaded_images, CLASSES), output))
    det_names = list(
        map(lambda x: "{det}/{x}".format(det=args.det, x=x),
            [osp.basename(image_name) for image_name in list_of_images]))
    image_manager.write_images(det_names, det_images)

    end = time.time()

    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch",
                                   start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format(
        "Detection (" + str(len(list_of_images)) + " images)",
        output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing",
                                   class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img",
                                   (end - load_batch) / len(list_of_images)))
    print("----------------------------------------------------------")

    torch.cuda.empty_cache()
Example #25
0
        u0          = float(data_options['u0'])
        v0          = float(data_options['v0'])
        test_width  = int(net_options['test_width'])
        test_height = int(net_options['test_height'])


        # 指定GPU
        use_cuda      = False
        seed          = int(time.time())
        torch.manual_seed(seed)
        if use_cuda:
            os.environ['CUDA_VISIBLE_DEVICES'] = gpus
            torch.cuda.manual_seed(seed)

        # 指定模型和损失函数
        model       = Darknet(modelcfg)
        region_loss = RegionLoss(num_keypoints=9, num_classes=1, anchors=[], num_anchors=1, pretrain_num_epochs=15, use_cuda=use_cuda)

        # 加载权重
        model.load_weights_until_last(initweightfile)

        #exportToOnnx(model)


        model.print_network()
        model.seen = 0
        region_loss.iter  = model.iter
        region_loss.seen  = model.seen
        processed_batches = model.seen//batch_size
        init_width        = model.width
        init_height       = model.height
Example #26
0
def demo(cfgfile, weightfile):
    # This vector decides in which Device the layer will be computed 0 for CPU 1 for GPU
    if args.gpu:
        het_part = np.array([
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ])
    else:
        het_part = np.zeros(32, dtype=int)
    if args.demo:
        het_part = np.ones(32, dtype=int)
    m = Darknet(cfgfile, het_part)
    m.print_network()
    if len(m.models) != len(het_part):
        print('Number of model layers and partition vector mismatch')
        exit(-1)
    m.load_weights(weightfile, het_part)
    print('Loading weights from %s... Done!' % (weightfile))

    if m.num_classes == 20:
        namesfile = 'data/voc.names'
    elif m.num_classes == 80:
        namesfile = 'data/coco.names'
    else:
        namesfile = 'data/names'
    class_names = load_class_names(namesfile)

    use_cuda = args.gpu
    #if use_cuda:
    #m.cuda()

    #cap = cv2.VideoCapture("nvcamerasrc ! video/x-raw(memory:NVMM), width=(int)640, height=(int)480, format=(string)I420, framerate=(fraction)60/1 ! nvvidconv ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink")
    cap = cv2.VideoCapture(
        "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080,format=(string)NV12, framerate=(fraction)30/1 ! nvvidconv ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink drop=1"
    )
    if cap.isOpened():
        # Window creation and specifications
        windowName = cfgfile
        cv2.namedWindow(windowName, cv2.WINDOW_NORMAL)
        cv2.moveWindow(windowName, 1920 - 1280, 0)
        cv2.resizeWindow(windowName, 1280, 1080)
        cv2.setWindowTitle(windowName, "YOLOv2 Object Detection")
        font = cv2.FONT_HERSHEY_PLAIN
        helpText = "'Esc' to Quit"
        showFullScreen = False
        showHelp = True
        start = 0.0
        end = 0.0
    else:
        print("Unable to open camera")
        exit(-1)

    while True:
        res, img = cap.read()
        if res:
            sized = cv2.resize(img, (m.width, m.height))
            bboxes = do_detect(m, sized, 0.5, 0.4, use_cuda, het_part)
            print('------')
            draw_img = plot_boxes_cv2(img, bboxes, None, class_names)
            if showHelp == True:
                cv2.putText(img, helpText, (11, 20), font, 1.0, (32, 32, 32),
                            4, cv2.LINE_AA)
                cv2.putText(img, helpText, (10, 20), font, 1.0,
                            (240, 240, 240), 1, cv2.LINE_AA)
            end = time.time()
            cv2.putText(img, "{0:.0f}fps".format(1 / (end - start)), (531, 50),
                        font, 3.0, (32, 32, 32), 8, cv2.LINE_AA)
            cv2.putText(img, "{0:.0f}fps".format(1 / (end - start)), (530, 50),
                        font, 3.0, (240, 240, 240), 2, cv2.LINE_AA)
            cv2.imshow(windowName, draw_img)
            start = time.time()
            key = cv2.waitKey(1)
            if key == 27:  # Check for ESC key
                cv2.destroyAllWindows()
                break
            elif key == 74:  # Toggle fullscreen; This is the F3 key on this particular keyboard
                # Toggle full screen mode
                if showFullScreen == False:
                    cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN,
                                          cv2.WINDOW_FULLSCREEN)
                else:
                    cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN,
                                          cv2.WINDOW_NORMAL)
                    showFullScreen = not showFullScreen
        else:
            print("Unable to read image")
            exit(-1)
Example #27
0
def test(datacfg, cfgfile, weightfile, imgfile):

    # ******************************************#
    #			PARAMETERS PREPARATION			#
    # ******************************************#

    #parse configuration files
    options = read_data_cfg(datacfg)
    meshname = options['mesh']
    name = options['name']

    #Parameters for the network
    seed = int(time.time())
    gpus = '0'  # define gpus to use
    test_width = 544  # define test image size
    test_height = 544
    torch.manual_seed(seed)  # seed torch random
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)  # seed cuda random
    conf_thresh = 0.1
    num_classes = 1

    # Read object 3D model, get 3D Bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    diam = float(options['diam'])

    # now configure camera intrinsics
    internal_calibration = get_camera_intrinsic()

    # ******************************************#
    #	NETWORK CREATION						#
    # ******************************************#

    # Create the network based on cfg file
    model = Darknet(cfgfile)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # ******************************************#
    #	INPUT IMAGE PREPARATION FOR NN 			#
    # ******************************************#

    # Now prepare image: convert to RGB, resize, transform to Tensor
    # use cuda,
    img = Image.open(imgfile).convert('RGB')
    ori_size = img.size  # store original size
    img = img.resize((test_width, test_height))
    t1 = time.time()
    img = transforms.Compose([
        transforms.ToTensor(),
    ])(img)  #.float()
    img = Variable(img, requires_grad=True)
    img = img.unsqueeze(0)  # add a fake batch dimension
    img = img.cuda()

    # ******************************************#
    #	PASS IT TO NETWORK AND GET PREDICTION	#
    # ******************************************#

    # Forward pass
    output = model(img).data
    #print("Output Size: {}".format(output.size(0)))
    t2 = time.time()

    # ******************************************#
    #		EXTRACT PREDICTIONS 				#
    # ******************************************#

    # Using confidence threshold, eliminate low-confidence predictions
    # and get only boxes over the confidence threshold
    all_boxes = get_region_boxes(output, conf_thresh, num_classes)

    boxes = all_boxes[0]

    # iterate through boxes to find the one with highest confidence
    best_conf_est = -1
    best_box_index = -1
    for j in range(len(boxes)):
        # the confidence is in index = 18
        if (boxes[j][18] > best_conf_est):
            box_pr = boxes[j]  # get bounding box
            best_conf_est = boxes[j][18]
            best_box_index = j
    #print("Best box is: {} and 2D prediction is {}".format(best_box_index,box_pr))

    # Denormalize the corner predictions
    # This are the predicted 2D points with which a bounding cube can be drawn
    corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
    corners2D_pr[:, 0] = corners2D_pr[:, 0] * ori_size[0]  # Width
    corners2D_pr[:, 1] = corners2D_pr[:, 1] * ori_size[1]  # Height
    t3 = time.time()

    # **********************************************#
    #	GET OBJECT POSE ESTIMATION					#
    #  Remember the problem in 6D Pose estimation 	#
    #  is exactly to estimate the pose - position 	#
    #  and orientation of the object of interest 	#
    #  with reference to a camera frame. That is 	#
    #  why although the 2D projection of the 3D 	#
    #  bounding cube are ready, we still need to  	#
    #  compute the rotation matrix -orientation- 	#
    #  and a translation vector -position- for the  #
    #  object 										#
    #											 	#
    # **********************************************#

    # get rotation matrix and transform
    R_pr, t_pr = pnp(
        np.array(np.transpose(
            np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)),
                 dtype='float32'), corners2D_pr,
        np.array(internal_calibration, dtype='float32'))
    t4 = time.time()

    # ******************************************#
    #	DISPLAY IMAGE WITH BOUNDING CUBE		#
    # ******************************************#

    # Reload Original img
    img = cv2.imread(imgfile)

    # create a window to display image
    wname = "Prediction"
    cv2.namedWindow(wname)
    # draw each predicted 2D point
    for i, (x, y) in enumerate(corners2D_pr):
        # get colors to draw the lines
        col1 = 28 * i
        col2 = 255 - (28 * i)
        col3 = np.random.randint(0, 256)
        cv2.circle(img, (x, y), 3, (col1, col2, col3), -1)
        cv2.putText(img, str(i), (int(x) + 5, int(y) + 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (col1, col2, col3), 1)

    # Get each predicted point and the centroid
    p1 = corners2D_pr[1]
    p2 = corners2D_pr[2]
    p3 = corners2D_pr[3]
    p4 = corners2D_pr[4]
    p5 = corners2D_pr[5]
    p6 = corners2D_pr[6]
    p7 = corners2D_pr[7]
    p8 = corners2D_pr[8]
    center = corners2D_pr[0]

    # Draw cube lines around detected object
    # draw front face
    line_point = 3
    cv2.line(img, (p1[0], p1[1]), (p2[0], p2[1]), (0, 255, 0), line_point)
    cv2.line(img, (p2[0], p2[1]), (p4[0], p4[1]), (0, 255, 0), line_point)
    cv2.line(img, (p4[0], p4[1]), (p3[0], p3[1]), (0, 255, 0), line_point)
    cv2.line(img, (p3[0], p3[1]), (p1[0], p1[1]), (0, 255, 0), line_point)

    # draw back face
    cv2.line(img, (p5[0], p5[1]), (p6[0], p6[1]), (0, 255, 0), line_point)
    cv2.line(img, (p7[0], p7[1]), (p8[0], p8[1]), (0, 255, 0), line_point)
    cv2.line(img, (p6[0], p6[1]), (p8[0], p8[1]), (0, 255, 0), line_point)
    cv2.line(img, (p5[0], p5[1]), (p7[0], p7[1]), (0, 255, 0), line_point)

    # draw right face
    cv2.line(img, (p2[0], p2[1]), (p6[0], p6[1]), (0, 255, 0), line_point)
    cv2.line(img, (p1[0], p1[1]), (p5[0], p5[1]), (0, 255, 0), line_point)

    # draw left face
    cv2.line(img, (p3[0], p3[1]), (p7[0], p7[1]), (0, 255, 0), line_point)
    cv2.line(img, (p4[0], p4[1]), (p8[0], p8[1]), (0, 255, 0), line_point)

    # Show the image and wait key press
    cv2.imshow(wname, img)
    cv2.waitKey()

    print("Rotation: {}".format(R_pr))
    print("Translation: {}".format(t_pr))
    print(" Predict time: {}".format(t2 - t1))
    print(" 2D Points extraction time: {}".format(t3 - t2))
    print(" Pose calculation time: {}:".format(t4 - t3))
    print(" Total time: {}".format(t4 - t1))
    print("Press any key to close.")
Example #28
0
def valid(datacfg, cfgfile, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    fps = [0] * m.num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = m(data).data
        batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes,
                                       m.anchors, m.num_anchors, 0, 1)
        for i in range(output.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
            print(valid_files[lineId])
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                for j in range((len(box) - 5) / 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = det_conf * cls_conf
                    fps[cls_id].write('%s %f %f %f %f %f\n' %
                                      (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
Example #29
0
def valid(datacfg, cfgfile, weightfile, outfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    backupdir = options['backup']
    name = options['name']
    if not os.path.exists(backupdir):
        makedirs(backupdir)

    # Parameters
    prefix = 'results'
    seed = int(time.time())
    gpus = '0'  # Specify which gpus to use
    test_width = 416  #originally 544
    test_height = 416  #originally 544
    torch.manual_seed(seed)
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    save = True
    testtime = True
    use_cuda = True
    num_classes = 1
    testing_samples = 0.0
    eps = 1e-5
    notpredicted = 0
    conf_thresh = 0.1
    nms_thresh = 0.4
    match_thresh = 0.5
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')

    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d = []
    errs_3d = []
    errs_trans = []
    errs_angle = []
    errs_corner2D = []
    preds_trans = []
    preds_rot = []
    preds_corners2D = []
    gts_trans = []
    gts_rot = []
    gts_corners2D = []

    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    # diam          = calc_pts_diameter(np.array(mesh.vertices))
    diam = float(options['diam'])

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(test_width, test_height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    z = np.zeros((3, 1))
    for batch_idx, (data, target) in enumerate(test_loader):

        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()

        # Forward pass
        output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, conf_thresh, num_classes)
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):

            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)

            # Get how many object are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est):
                        match = corner_confidence9(
                            box_gt[:18], torch.FloatTensor(boxes[j][:18]))
                        box_pr = boxes[j]
                        best_conf_est = boxes[j][18]

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 416
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 416
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 416
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 416
                preds_corners2D.append(corners2D_pr)
                gts_corners2D.append(corners2D_gt)

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr,
                                             axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)

                # Compute [R|t] by pnp
                R_gt, t_gt = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_gt,
                    np.array(internal_calibration, dtype='float32'))
                R_pr, t_pr = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_pr,
                    np.array(internal_calibration, dtype='float32'))

                demo_path = 'test/{}/demo/demo_'.format(
                    weightfile) + valid_files[count][-8:-3] + 'png'
                result_path = 'test/{}/result/result_'.format(
                    weightfile) + valid_files[count][-8:-3] + 'png'
                img_path = valid_files[count]
                print(img_path, os.path.exists(img_path), demo_path, 'saved.')

                img = cv2.imread(img_path)

                img = draw_demo_img(img, corners2D_pr, (0, 255, 0))
                cv2.imwrite(result_path, img)

                img = draw_demo_img(img, corners2D_gt, (0, 0, 255))
                cv2.imwrite(demo_path, img)

                if save:
                    preds_trans.append(t_pr)
                    gts_trans.append(t_gt)
                    preds_rot.append(R_pr)
                    gts_rot.append(R_gt)

                    np.savetxt(
                        backupdir + '/test/gt/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_pr, dtype='float32'))

                # Compute translation error
                trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)

                # Compute angle error
                angle_dist = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Compute 3D distances
                transform_3d_gt = compute_transformation(vertices, Rt_gt)
                transform_3d_pred = compute_transformation(vertices, Rt_pr)
                norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred,
                                        axis=0)
                vertex_dist = np.mean(norm3d)
                errs_3d.append(vertex_dist)

                # Sum errors
                testing_error_trans += trans_dist
                testing_error_angle += angle_dist
                testing_error_pixel += pixel_dist
                testing_samples += 1
                count = count + 1

        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5
    acc = len(np.where(
        np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    acc3d10 = len(np.where(
        np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)
                     [0]) * 100. / (len(errs_corner2D) + eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('-----------------------------------')
        print('  tensor to cuda : %f' % (t2 - t1))
        print('         predict : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print('            eval : %f' % (t5 - t4))
        print('           total : %f' % (t5 - t1))
        print('-----------------------------------')

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
        px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.
            format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging(
        "   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f"
        % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging(
        '   Translation error: %f m, angle error: %f degree, pixel error: % f pix'
        % (testing_error_trans / nts, testing_error_angle / nts,
           testing_error_pixel / nts))

    if save:
        predfile = backupdir + '/predictions_linemod_' + name + '.mat'
        scipy.io.savemat(
            predfile, {
                'R_gts': gts_rot,
                't_gts': gts_trans,
                'corner_gts': gts_corners2D,
                'R_prs': preds_rot,
                't_prs': preds_trans,
                'corner_prs': preds_corners2D
            })

    with open('test/{}/test_report.txt'.format(weightfile), 'a') as f:
        f.write('Results of {}\n'.format(name))
        f.write('-----------------------------------\n')
        f.write('  tensor to cuda : %f\n' % (t2 - t1))
        f.write('         predict : %f\n' % (t3 - t2))
        f.write('get_region_boxes : %f\n' % (t4 - t3))
        f.write('            eval : %f\n' % (t5 - t4))
        f.write('           total : %f\n' % (t5 - t1))
        f.write('-----------------------------------\n')
        f.write('[{:.2f}%]Acc using {} px 2D Projection = \n'.format(
            acc, px_threshold))
        f.write('[{:.2f}%]Acc using 10% threshold - {} vx 3D Transformation\n'.
                format(acc3d10, diam * 0.1))
        f.write('[{:.2f}%]Acc using 5 cm 5 degree metric\n'.format(acc5cm5deg))
        f.write(
            "Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f\n"
            % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
        f.write(
            'Translation error: %f m, angle error: %f degree, pixel error: % f pix\n'
            % (testing_error_trans / nts, testing_error_angle / nts,
               testing_error_pixel / nts))
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()



num_classes = 80
classes = load_classes("data/coco.names")



#Set up the neural network
print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0 
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()


#Set the model in evaluation mode
model.eval()
Example #31
0

args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()

num_classes = 80
classes = load_classes("data/coco.names")

#Set up the neural network
print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()

#Set the model in evaluation mode
model.eval()