Exemple #1
import cv2
import matplotlib.pyplot as plt
from utils import *
from darknet import Darknet

# Set the NMS Threshold
score_threshold = 0.6
# Set the IoU threshold
iou_threshold = 0.4
cfg_file = "cfg/yolov3.cfg"
weight_file = "weights/yolov3.weights"
namesfile = "data/coco.names"
m = Darknet(cfg_file)
class_names = load_class_names(namesfile)
# m.print_network()
original_image = cv2.imread("images/city_scene.jpg")
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
img = cv2.resize(original_image, (m.width, m.height))
# detect the objects
boxes = detect_objects(m, img, iou_threshold, score_threshold)
# plot the image with the bounding boxes and corresponding object class labels
plot_boxes(original_image, boxes, class_names, plot_labels=True)
Exemple #2
args = arg_parse()
images = args.images
batch_size = int(args.bs)
confidence = float(args.confidence)
nms_thresh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()

#Load the class file
num_classes = 80 #for COCO
classes = load_classes('data/coco.names')

#set up neural network
print('Loading network...')
model = Darknet(args.cfgfile)
print('Network Successfully loaded')

model.net_info['height'] = args.reso
inp_dim = int(model.net_info['height'])
assert inp_dim % 32 == 0
assert inp_dim > 32

#If there's a GPU available, Put a model on GPU
if CUDA:

#Set the model in evaluation mode
Exemple #3
def valid(datacfg, cfgfile, weightfile, outfile):
    cudnn.enabled = True
    cudnn.benchmark = True

    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    print('shape:', m.width, 'x', m.height)
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 16, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,

    fps = []
    if not os.path.exists('results'):
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps.append(open(buf, 'w'))

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        data = data.cuda()
        output = m(data)
        batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes,
                                       m.anchors, m.num_anchors, 0, 1)
        for i in range(output.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
                '[{}/{}]: '.format(batch_idx * output.size(0) + i,
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                for j in range((len(box) - 5) // 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = det_conf * cls_conf
                    fps[cls_id].write('%s %f %f %f %f %f\n' %
                                      (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
Exemple #4
def valid(datacfg, cfgfile, weightfile, outfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options      = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname     = options['mesh']
    backupdir    = options['backup']
    name         = options['name']
    if not os.path.exists(backupdir):

    # Parameters
    prefix       = 'results'
    seed         = int(time.time())
    gpus         = '0'     # Specify which gpus to use
    test_width   = 544
    test_height  = 544
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    save            = False
    testtime        = True
    use_cuda        = True
    num_classes     = 1
    testing_samples = 0.0
    eps             = 1e-5
    notpredicted    = 0 
    conf_thresh     = 0.1
    nms_thresh      = 0.4
    match_thresh    = 0.5
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')

    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d             = []
    errs_3d             = []
    errs_trans          = []
    errs_angle          = []
    errs_corner2D       = []
    preds_trans         = []
    preds_rot           = []
    preds_corners2D     = []
    gts_trans           = []
    gts_rot             = []
    gts_corners2D       = []

    # Read object model information, get 3D bounding box corners
    mesh          = MeshPly(meshname)
    vertices      = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
    corners3D     = get_3D_corners(vertices)
    # diam          = calc_pts_diameter(np.array(mesh.vertices))
    diam          = float(options['diam'])

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]
    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images, shape=(test_width, test_height),
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) 

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    z = np.zeros((3, 1))
    for batch_idx, (data, target) in enumerate(test_loader):
        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            target = target.cuda()
        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()
        # Forward pass
        output = model(data).data  
        t3 = time.time()
        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, conf_thresh, num_classes)        
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):
            # For each image, get all the predictions
            boxes   = all_boxes[i]
            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths  = target[i].view(-1, 21)
            # Get how many object are present in the scene
            num_gts = truths_length(truths)

             # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt        = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], 
                                truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], 
                                truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est):
                        match         = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
                        box_pr        = boxes[j]
                        best_conf_est = boxes[j][18]

                # Denormalize the corner predictions 
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480               
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1)
                corner_dist = np.mean(corner_norm)
                # Compute [R|t] by pnp
                R_gt, t_gt = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'),  corners2D_gt, np.array(internal_calibration, dtype='float32'))
                R_pr, t_pr = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'),  corners2D_pr, np.array(internal_calibration, dtype='float32'))

                if save:

                    np.savetxt(backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32'))
                # Compute translation error
                trans_dist   = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                # Compute angle error
                angle_dist   = calcAngularDistance(R_gt, R_pr)
                # Compute pixel error
                Rt_gt        = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr        = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt   = compute_projection(vertices, Rt_gt, internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) 
                norm         = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist   = np.mean(norm)

                # Compute 3D distances
                transform_3d_gt   = compute_transformation(vertices, Rt_gt) 
                transform_3d_pred = compute_transformation(vertices, Rt_pr)  
                norm3d            = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0)
                vertex_dist       = np.mean(norm3d)    

                # Sum errors
                testing_error_trans  += trans_dist
                testing_error_angle  += angle_dist
                testing_error_pixel  += pixel_dist
                testing_samples      += 1
                count = count + 1

        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5
    acc         = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
    acc5cm5deg  = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)
    acc3d10     = len(np.where(np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d)+eps)
    acc5cm5deg  = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)
    corner_acc  = len(np.where(np.array(errs_corner2D) <= px_threshold)[0]) * 100. / (len(errs_corner2D)+eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('  tensor to cuda : %f' % (t2 - t1))
        print('         predict : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print('            eval : %f' % (t5 - t4))
        print('           total : %f' % (t5 - t1))

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging("   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging('   Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans/nts, testing_error_angle/nts, testing_error_pixel/nts) )

    if save:
        predfile = backupdir + '/predictions_linemod_' + name +  '.mat'
        scipy.io.savemat(predfile, {'R_gts': gts_rot, 't_gts':gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs':preds_trans, 'corner_prs': preds_corners2D})
Exemple #5

if __name__ == '__main__':
    # 実行オプション
    args = arg_parse()
    print("\n--- running options ---")

    use_cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if use_cuda else "cpu")
    Tensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

    # ニューラルネットワークのセットアップ
    print("Loading network......")
    model = Darknet(args.cfgfile).to(device)
    print("Network successfully loaded")
    inp_dim = int(model.net_info["height"])

    # モデルをevaluationモードにセット

    # captureとwriterのセットアップ
    video = cv2.VideoCapture(args.videopath)
    fps = video.get(cv2.CAP_PROP_FPS)
    w = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter("output.avi", fourcc, fps, (w, h))
Exemple #6
def main():
    """main method, containing training logic such as hardware, optimizer, weight loader, dataloaders etc.
       we make a number of variables available globally, as we will use them within the train method later on
    datacfg = FLAGS.data
    cfgfile = FLAGS.config
    weightfile = FLAGS.weights

    data_options = read_data_cfg(datacfg)
    net_options = parse_cfg(cfgfile)[0]

    global use_cuda
    use_cuda = torch.cuda.is_available() and (True if use_cuda is None else

    globals()["trainlist"] = data_options['train']
    globals()["testlist"] = data_options['valid']
    globals()["backupdir"] = data_options['backup']
    globals()["gpus"] = data_options['gpus']  # e.g. 0,1,2,3
    globals()["ngpus"] = len(gpus.split(','))
    globals()["num_workers"] = int(data_options['num_workers'])

    globals()["batch_size"] = int(net_options['batch'])
    globals()["max_batches"] = int(net_options['max_batches'])
    globals()["learning_rate"] = float(net_options['learning_rate'])
    globals()["momentum"] = float(net_options['momentum'])
    globals()["decay"] = float(net_options['decay'])
    globals()["steps"] = [
        float(step) for step in net_options['steps'].split(',')
    globals()["scales"] = [
        float(scale) for scale in net_options['scales'].split(',')

    # Train parameters
    global max_epochs
    global batch_size
    global num_workers
    global max_batches
    if 'max_epochs' in net_options:
        max_epochs = int(net_options['max_epochs'])
        nsamples = file_lines(trainlist)
        max_epochs = (max_batches * batch_size) // nsamples + 1

    seed = int(time.time())
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    global device
    device = torch.device("cuda" if use_cuda else "cpu")

    global model
    model = Darknet(cfgfile, use_cuda=use_cuda)
    # model.print_network()

    nsamples = file_lines(trainlist)

    init_epoch = model.seen // nsamples

    global loss_layers
    loss_layers = model.loss_layers
    for l in loss_layers:
        l.seen = model.seen

    globals()["test_loader"] = load_testlist(testlist)
    if use_cuda:
        if ngpus > 1:
            model = torch.nn.DataParallel(model).to(device)
            model = model.to(device)

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if key.find('.bn') >= 0 or key.find('.bias') >= 0:
            params += [{'params': [value], 'weight_decay': 0.0}]
            params += [{'params': [value], 'weight_decay': decay * batch_size}]
    global optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate / batch_size,
                          weight_decay=decay * batch_size)

    if evaluate:
        logging('evaluating ...')
            """here, the magic happens.
               we call train() every epoch and test() / savemodel() every few epochs
            print("Training for ({:d}) epochs.".format(max_epochs))
            fscore = 0
            if init_epoch > save_interval:
                mfscore = test(init_epoch - 1)
                mfscore = 0.5
            for epoch in range(init_epoch, max_epochs):
                nsamples = train(epoch)
                if epoch > save_interval:
                    fscore = test(epoch)
                if (epoch + 1) % save_interval == 0:
                    savemodel(epoch, nsamples)
                if FLAGS.localmax and fscore > mfscore:
                    mfscore = fscore
                    savemodel(epoch, nsamples, True)
                print('-' * 90)
        except KeyboardInterrupt:
            print('=' * 80)
            print('Exiting from training by interrupt')
    weightsfile = "weights/yolov3.weights"
    num_classes = 80

    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes
    model = Darknet(cfgfile)
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32

    if CUDA:
