Exemple #1
0
def valid(datacfg, cfgfile, weightfile, outfile):
    cudnn.enabled = True
    cudnn.benchmark = True

    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()
    print('shape:', m.width, 'x', m.height)

    fps = []
    if not os.path.exists('results'):
        os.mkdir('results')
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps.append(open(buf, 'w'))

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, valid_file in enumerate(valid_files):
        image = cv2.imread(valid_file)
        assert image is not None
        image2 = letterbox_image(image, m.width, m.height)
        if batch_idx == 0:
            cv2.imwrite('letterbox_image.jpg', image2.astype(np.uint8))
        image_tensor = image_to_tensor(image2)

        data = image_tensor.cuda()
        with torch.no_grad():
            output = m(data)
        # if batch_idx == 0:
        #     outputs[-1] = data
        #     save_outputs('./outputs.npz', outputs)
        batch_boxes = get_region_boxes2(output, image.shape[1], image.shape[0],
                                        m.width, m.height, conf_thresh,
                                        m.num_classes, m.anchors,
                                        m.num_anchors, 1)

        fileId = os.path.basename(valid_file).split('.')[0]
        height, width = image.shape[:2]
        print('[{}/{}]: '.format(batch_idx, len(valid_files)), valid_file, ' ',
              len(batch_boxes[0]))
        boxes = batch_boxes[0]
        boxes = nms_class(boxes, nms_thresh, m.num_classes)
        for box in boxes:
            x1 = (box[0] - box[2] / 2.0) * width
            y1 = (box[1] - box[3] / 2.0) * height
            x2 = (box[0] + box[2] / 2.0) * width
            y2 = (box[1] + box[3] / 2.0) * height

            if x1 < 0:
                x1 = 0
            if y1 < 0:
                y1 = 0
            if x2 >= width:
                x2 = width - 1
            if y2 >= height:
                y2 = height - 1

            for j in range(m.num_classes):
                prob = box[5 + j]
                if prob >= conf_thresh:
                    fps[j].write('%s %f %f %f %f %f\n' %
                                 (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
def predict():    
    target = os.path.join(APP_ROOT, 'static/')
    print(target)
    if not os.path.isdir(target):
            os.mkdir(target)
    else:
        print("Couldn't create upload directory: {}".format(target))
    print(request.files.getlist("file"))
    for upload in request.files.getlist("file"):
        print(upload)
        print("{} is the file name".format(upload.filename))
        filename = upload.filename
        destination = "/".join([target, filename])
        print ("Accept incoming file:", filename)
        print ("Save it to:", destination)
        upload.save(destination)

    scales = "1,2,3"
    print (filename)
    
    images = "static/"+str(filename)
    batch_size = int(1)
    confidence = float(0.5)
    nms_thesh = float(0.4)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    classes = load_classes('data/coco.names') 
    print("Loading network.....")
    model = Darknet("cfg/yolov3.cfg")
    model.load_weights("yolov3.weights")
    print("Network successfully loaded")
    model.net_info["height"] = "416"
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32
    if CUDA:
        model.cuda()
    model.eval()
    read_dir = time.time()
    try:
        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
    except FileNotFoundError:
        print ("No file or directory with the name {}".format(images))
        exit()
    load_batch = time.time()
    batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
    im_batches = [x[0] for x in batches]
    orig_ims = [x[1] for x in batches]
    im_dim_list = [x[2] for x in batches]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
    if CUDA:
        im_dim_list = im_dim_list.cuda()
    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover            
        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)]        
    i = 0
    write = False
    model(get_test_input(inp_dim, CUDA), CUDA)
    start_det_loop = time.time()
    objs = {}
    for batch in im_batches:
        start = time.time()
        if CUDA:
            batch = batch.cuda()
        with torch.no_grad():
            prediction = model(Variable(batch), CUDA)
        prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
        if type(prediction) == int:
            i += 1
            continue
        end = time.time()
        prediction[:,0] += i*batch_size
        if not write:
            output = prediction
            write = 1
        else:
            output = torch.cat((output,prediction))
        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
            print("----------------------------------------------------------")
        i += 1
        if CUDA:
            torch.cuda.synchronize()
    try:
        output
    except NameError:
        print("No detections were made")
        exit()
        
    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
    scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
    output[:,1:5] /= scaling_factor
    for i in range(output.shape[0]):
        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
    output_recast = time.time()
    class_load = time.time()
    colors = pkl.load(open("pallete", "rb"))
    draw = time.time()
    def write(x, batches, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2,color, 2)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
        return img
    list(map(lambda x: write(x, im_batches, orig_ims), output))
    det_names = pd.Series(imlist).apply(lambda x: "{}/{}".format("static",x.split("/")[-1]))
    list(map(cv2.imwrite, det_names, orig_ims))
    end = time.time()
    torch.cuda.empty_cache()
    return render_template("results.html",image_name=filename)
Exemple #3
0
class Yolo6dDetector:
    def __init__(self):
        print('initialization........')
        self.cfgfile = 'cfg/yolo-pose.cfg'
        self.outfile = 'output_file'
        self.object_weight = 'backup/small_duck/model.weights'
        self.ply_model = './LINEMOD/small_duck/small_duck.ply'
        self.model = Darknet(self.cfgfile)
        self.num_classes = 1
        self.conf_thresh = 0.1
        self.test_width = 544
        self.test_height = 544
        self.R_pr = None
        self.r_pr = None
        self.Rt_pr = None
        self.visualize = True
        self.data = None

    def loadData(self,empty_one,empty_two):
        print("in python detector loadData\n")
        print("%s\n"%empty_one)
        print("%s\n"%empty_two)
        self.model.load_weights(self.object_weight)
        self.model.eval()
        self.mesh = MeshPly(self.ply_model)
        self.vertices = np.c_[np.array(self.mesh.vertices),
                              np.ones((len(self.mesh.vertices), 1))].transpose()
        self.corners3D = get_3D_corners(self.vertices)
        self.internal_calibration = get_camera_intrinsic() # Read intrinsic camera parameters
        self.edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3],
                         [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]


    def setColorImg(self,img):
        print("in python detector setColorImg\n")
        img = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))

        img = img.resize((self.test_width, self.test_height))
        transform = transforms.Compose([transforms.ToTensor(), ])
        self.data = Variable(transform(img).view(1,3,544,544))

        # Images
        img_show = self.data[0, :, :, :]
        img_show = img_show.numpy().squeeze()
        self.img_show = np.transpose(img_show, (1, 2, 0))
        # print('successful')
        # print(img)

    def detection(self):
        print("in python detector detection\n")

        #forward pass
        output = self.model(self.data).cuda()

        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, self.conf_thresh, self.num_classes)

        # Iterate through all images in the batch
        for i in range(output.size(0)):
            # For each image, get all the predictions
            boxes = all_boxes[i]
            best_conf_est = -1

            # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation
            for j in range(len(boxes)):
                if (boxes[j][18] > best_conf_est):
                    # match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
                    box_pr = boxes[j]
                    best_conf_est = boxes[j][18]

            # Denormalize the corner predictions
            corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
            corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1920
            corners2D_pr[:, 1] = corners2D_pr[:, 1] * 1080

            # Compute [R|t] by pnp

            self.R_pr, self.t_pr = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), self.corners3D[:3, :]), axis=1)),
                                          dtype='float32'), corners2D_pr,
                                 np.array(self.internal_calibration, dtype='float32'))
            # self.R_pr.append(R_pr)
            # self.r_pr.append(t_pr)

            # # Compute pixel error
            self.Rt_pr = np.concatenate((self.R_pr, self.t_pr), axis=1)

            # proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
            proj_corners_pr = np.transpose(compute_projection(self.corners3D, self.Rt_pr, self.internal_calibration))

            if self.visualize:
                # Visualize
                plt.xlim((0, 1920))
                plt.ylim((0, 1080))
                plt.imshow(scipy.misc.imresize(self.img_show, (1080, 1920)))
                # Projections
                for edge in self.edges_corners:
                    # plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)
                    plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)
                plt.gca().invert_yaxis()
                plt.show()


    def getReuslt(self):
        print("in python detector getReuslt\n")
        print(self.R_pr)
        print(self.t_pr)



    def setDepthImg(self, img):


        print("in python detector setColorImg\n")
Exemple #4
0
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        # self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') or x.endswith('.png')]

        # MODIFIED!
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
            if x.startswith('set') and x.endswith('_image')
        ]
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
        ]
        self.imgs_path.sort()
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
        ]
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
            if y.endswith('.jpg') or y.endswith('.png')
        ]

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        car_color, car_direction, car_type = None, None, None

        # 1
        for det in output:
            if len(det) == 7:
                continue

            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                           pt_1[0]:pt_2[0]][:, :, ::-1])
            # # ROI.show()

            # # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)
            break

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            if len(det) == 7:
                continue

            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)
            break

        return car_color, car_direction, car_type

    def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor

        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:
            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * orig_img_size[0]) / 2.0  # x, w
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                orig_img_size[1])

        return output

    def detect_classify(self, query_pair):
        pre_path = ''
        color_dict = {}
        type_dict = {}
        # cars = []
        # all_cars_per_camera = {}
        index_list_all = []
        index_list_per_camera = []

        pre_camera_id = self.imgs_path[0].split('/')[3]

        stream_i = 0
        print("\n\nProcessing stream %d...\n" % stream_i)

        tracklet_i = 0
        """
        detect and classify
        """
        for x in self.imgs_path:
            curr_path = os.path.split(x)[0]

            # read image data
            img = cv2.imread(x)
            img = cv2.copyMakeBorder(img,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     cv2.BORDER_CONSTANT,
                                     value=(100, 100, 100))
            img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction, self.prob_th,
                                          self.num_classes, self.nms_th,
                                          self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                # print('\n', x)
                car_color, car_direction, car_type = self.cls_draw_bbox(
                    output, orig_img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                # if not os.path.exists(dst_path):
                # cv2.imwrite(dst_path, orig_img)

            if curr_path != pre_path and pre_path != '':
                start_length = os.path.split(os.path.split(pre_path)[0])[1]
                detect_color = max(color_dict, key=color_dict.get)
                detect_type = max(type_dict, key=type_dict.get)
                print("Tracklet %d detects " % tracklet_i, detect_color,
                      detect_type)
                # add_to_all(all_cars_per_camera, detect_color, detect_type)
                compare_query_append(query_pair, detect_color, detect_type,
                                     index_list_per_camera, tracklet_i,
                                     start_length)
                tracklet_i += 1

                color_dict.clear()
                type_dict.clear()

                curr_camera_id = x.split('/')[3]
                if curr_camera_id != pre_camera_id:
                    print("The query result on stream %d:" % stream_i,
                          index_list_per_camera)
                    index_list_all.append(deepcopy(index_list_per_camera))
                    index_list_per_camera.clear()

                    pre_camera_id = curr_camera_id

                    stream_i += 1
                    tracklet_i = 0
                    print("\n\nProcessing stream %d...\n" % stream_i)

            if car_color != None:
                if car_color not in color_dict:
                    color_dict[car_color] = 0
                color_dict[car_color] += 1

            if car_type != None:
                if car_type not in type_dict:
                    type_dict[car_type] = 0
                type_dict[car_type] += 1

            pre_path = curr_path

        # add the last one
        if pre_path != '':
            start_length = os.path.split(os.path.split(pre_path)[0])[1]
            detect_color = max(color_dict, key=color_dict.get)
            detect_type = max(type_dict, key=type_dict.get)
            print("Tracklet %d detects " % tracklet_i, detect_color,
                  detect_type)
            compare_query_append(query_pair, detect_color, detect_type,
                                 index_list_per_camera, tracklet_i,
                                 start_length)
            # print(all_cars_per_camera)
            color_dict.clear()
            type_dict.clear()

            print("The query result on stream %d:" % stream_i,
                  index_list_per_camera)
            index_list_all.append(deepcopy(index_list_per_camera))

        return index_list_all
Exemple #5
0
class ObjectDetection:
    def __init__(self, id):
        # self.cap = cv2.VideoCapture(id)
        self.cap = WebcamVideoStream(src=id).start()
        self.cfgfile = "cfg/yolov3.cfg"
        # self.cfgfile = 'cfg/yolov3-tiny.cfg'
        self.weightsfile = "yolov3.weights"
        # self.weightsfile = 'yolov3-tiny.weights'
        self.confidence = float(0.25)
        self.nms_thesh = float(0.4)
        self.num_classes = 80
        self.classes = load_classes('data/coco.names')
        self.colors = pkl.load(open("pallete", "rb"))
        self.model = Darknet(self.cfgfile)
        self.CUDA = torch.cuda.is_available()
        self.model.load_weights(self.weightsfile)
        self.model.net_info["height"] = 160
        self.inp_dim = int(self.model.net_info["height"])
        self.width = 640  #640#
        self.height = 480  #360#
        print("Loading network.....")
        if self.CUDA:
            self.model.cuda()
        print("Network successfully loaded")
        assert self.inp_dim % 32 == 0
        assert self.inp_dim > 32
        self.model.eval()

    def main(self):
        q = queue.Queue()
        while True:

            def frame_render(queue_from_cam):
                frame = self.cap.read(
                )  # If you capture stream using opencv (cv2.VideoCapture()) the use the following line
                # ret, frame = self.cap.read()
                frame = cv2.resize(frame, (self.width, self.height))
                queue_from_cam.put(frame)

            cam = threading.Thread(target=frame_render, args=(q, ))
            cam.start()
            cam.join()
            frame = q.get()
            q.task_done()
            fps = FPS().start()
            try:
                img, orig_im, dim = prep_image(frame, self.inp_dim)
                im_dim = torch.FloatTensor(dim).repeat(1, 2)
                if self.CUDA:  #### If you have a gpu properly installed then it will run on the gpu
                    im_dim = im_dim.cuda()
                    img = img.cuda()
                # with torch.no_grad():               #### Set the model in the evaluation mode
                output = self.model(Variable(img), self.CUDA)
                output = write_results(output,
                                       self.confidence,
                                       self.num_classes,
                                       nms=True,
                                       nms_conf=self.nms_thesh
                                       )  #### Localize the objects in a frame
                output = output.type(torch.half)
                if list(output.size()) == [1, 86]:
                    pass
                else:
                    output[:,
                           1:5] = torch.clamp(output[:, 1:5], 0.0,
                                              float(
                                                  self.inp_dim)) / self.inp_dim

                    #            im_dim = im_dim.repeat(output.size(0), 1)
                    output[:, [1, 3]] *= frame.shape[1]
                    output[:, [2, 4]] *= frame.shape[0]
                    list(
                        map(
                            lambda x: write(x, frame, self.classes, self.colors
                                            ), output))
                    x, y, w, h = b_boxes["bbox"][0], b_boxes["bbox"][
                        1], b_boxes["bbox"][2], b_boxes["bbox"][3]
                    distance = (2 * 3.14 * 180) / (
                        w + h * 360) * 1000 + 3  ### Distance measuring in Inch
                    feedback = ("{}".format(labels["Current Object"]) + " " +
                                "is" + " at {} ".format(round(distance)) +
                                "Inches")
                    # speak.Speak(feedback)     # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency
                    print(feedback)
            except:
                pass
            fps.update()
            fps.stop()
            print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
            print("[INFO] approx. FPS: {:.1f}".format(fps.fps()))
            frame = cv2.putText(frame, str("{:.2f} Inches".format(distance)),
                                (x, y), cv2.FONT_HERSHEY_DUPLEX, 0.6,
                                (0, 0, 255), 1, cv2.LINE_AA)
            cv2.imshow("Object Detection Window", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            continue
Exemple #6
0
def valid(datacfg, modelcfg, weightfile):
    def truths_length(truths, max_num_gt=50):
        for i in range(max_num_gt):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    data_options = read_data_cfg(datacfg)
    valid_images = data_options['valid']
    meshname     = data_options['mesh']
    backupdir    = data_options['backup']
    name         = data_options['name']
    gpus         = data_options['gpus'] 
    fx           = float(data_options['fx'])
    fy           = float(data_options['fy'])
    u0           = float(data_options['u0'])
    v0           = float(data_options['v0'])
    im_width     = int(data_options['width'])
    im_height    = int(data_options['height'])
    if not os.path.exists(backupdir):
        makedirs(backupdir)

    # Parameters
    seed = int(time.time())
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)
    save            = False
    testtime        = True
    num_classes     = 1
    testing_samples = 0.0
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')
    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d             = []
    errs_3d             = []
    errs_trans          = []
    errs_angle          = []
    errs_corner2D       = []
    preds_trans         = []
    preds_rot           = []
    preds_corners2D     = []
    gts_trans           = []
    gts_rot             = []
    gts_corners2D       = []

    # Read object model information, get 3D bounding box corners
    mesh      = MeshPly(meshname)
    vertices  = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    try:
        diam  = float(options['diam'])
    except:
        diam  = calc_pts_diameter(np.array(mesh.vertices))
        
    # Read intrinsic camera parameters
    intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy)

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]
    
    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(modelcfg)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()
    test_width    = model.test_width
    test_height   = model.test_height
    num_keypoints = model.num_keypoints 
    num_labels    = num_keypoints * 2 + 3

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images, 
                                        shape=(test_width, test_height),
                                        shuffle=False,
                                        transform=transforms.Compose([transforms.ToTensor(),]))

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) 

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        t1 = time.time()
        # Pass data to GPU
        data = data.cuda()
        target = target.cuda()
        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()
        # Forward pass
        output = model(data).data  
        t3 = time.time()
        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, num_classes, num_keypoints)        
        t4 = time.time()
        # Evaluation
        # Iterate through all batch elements
        for box_pr, target in zip([all_boxes], [target[0]]):
            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target.view(-1, num_keypoints*2+3)
            # Get how many objects are present in the scene
            num_gts    = truths_length(truths)
            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = list()
                for j in range(1, 2*num_keypoints+1):
                    box_gt.append(truths[k][j])
                box_gt.extend([1.0, 1.0])
                box_gt.append(truths[k][0])

                # Denormalize the corner predictions 
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height          
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
                preds_corners2D.append(corners2D_pr)
                gts_corners2D.append(corners2D_gt)

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)
                
                # Compute [R|t] by pnp
                R_gt, t_gt = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'),  corners2D_gt, np.array(intrinsic_calibration, dtype='float32'))
                R_pr, t_pr = pnp(np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'),  corners2D_pr, np.array(intrinsic_calibration, dtype='float32'))
                
                # Compute translation error
                trans_dist   = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)
                
                # Compute angle error
                angle_dist   = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)
                
                # Compute pixel error
                Rt_gt        = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr        = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt   = compute_projection(vertices, Rt_gt, intrinsic_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) 
                norm         = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist   = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Compute 3D distances
                transform_3d_gt   = compute_transformation(vertices, Rt_gt) 
                transform_3d_pred = compute_transformation(vertices, Rt_pr)  
                norm3d            = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0)
                vertex_dist       = np.mean(norm3d)    
                errs_3d.append(vertex_dist)  

                # Sum errors
                testing_error_trans  += trans_dist
                testing_error_angle  += angle_dist
                testing_error_pixel  += pixel_dist
                testing_samples      += 1
                count = count + 1

                if save:
                    preds_trans.append(t_pr)
                    gts_trans.append(t_gt)
                    preds_rot.append(R_pr)
                    gts_rot.append(R_gt)

                    np.savetxt(backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32'))


        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5 # 5 pixel threshold for 2D reprojection error is standard in recent sota 6D object pose estimation works 
    eps          = 1e-5
    acc          = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
    acc5cm5deg   = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)
    acc3d10      = len(np.where(np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d)+eps)
    acc5cm5deg   = len(np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans)+eps)
    corner_acc   = len(np.where(np.array(errs_corner2D) <= px_threshold)[0]) * 100. / (len(errs_corner2D)+eps)
    mean_err_2d  = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('-----------------------------------')
        print('  tensor to cuda : %f' % (t2 - t1))
        print('    forward pass : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print(' prediction time : %f' % (t4 - t1))
        print('            eval : %f' % (t5 - t4))
        print('-----------------------------------')

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging("   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging('   Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans/nts, testing_error_angle/nts, testing_error_pixel/nts) )


    result_data = {
        'model': cfgfile,
        'acc': acc,
        'acc3d10': acc3d10,
        'acc5cm5deg': acc5cm5deg,
        'mean_err_2d': mean_err_2d,
        'errs_3d': np.mean(errs_3d),
        'mean_corner_err_2d': mean_corner_err_2d,
        'translation_err': testing_error_trans/nts,
        'angle_err': testing_error_angle/nts,
        'px_err': testing_error_pixel/nts
    }

    print(result_data)

    try:
        df = pd.read_csv('test_metrics.csv')
        df = df.append(result_data, ignore_index=True)
        df.to_csv('test_metrics.csv', index=False)
    except:
        df = pd.DataFrame.from_records([result_data])
        df.to_csv('test_metrics.csv', index=False)

    if save:
        predfile = backupdir + '/predictions_linemod_' + name +  '.mat'
        scipy.io.savemat(predfile, {'R_gts': gts_rot, 't_gts':gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs':preds_trans, 'corner_prs': preds_corners2D})
Exemple #7
0
def valid(datacfg, cfgfile, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    # backup = cfgs.backup
    backup = weightfile.split('/')[-2]
    ckpt = weightfile.split('/')[-1].split('.')[0]
    prefix = 'results/' + backup.split('/')[-1] + '/e' + ckpt
    print('saving to: ' + prefix)
    names = cfg.classes

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    fps = [0] * m.num_classes
    if not os.path.exists(prefix):
        # os.mkdir(prefix)
        os.makedirs(prefix)
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = m(data).data
        batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes,
                                       m.anchors, m.num_anchors, 0, 1)
        for i in range(output.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
            print(valid_files[lineId])
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                # import pdb
                # pdb.set_trace()
                for j in range((len(box) - 5) / 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = det_conf * cls_conf
                    fps[cls_id].write('%s %f %f %f %f %f\n' %
                                      (fileId, prob, x1, y1, x2, y2))
                    # fps[cls_id].write('%s %f %f %f %f %f %f\n' % (fileId, det_conf, cls_conf, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0 
assert inp_dim > 32

#If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()


#Set the model in evaluation mode
model.eval()

read_dir = time.time()
#Detection phase
try:
    imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]
except NotADirectoryError:
    imlist = []
    imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
    print ("No file or directory with the name {}".format(images))
    exit()
    
if not os.path.exists(args.det):
    os.makedirs(args.det)
Exemple #9
0
def eval_list(cfgfile, weightfile, imglist):
    #m = TinyYoloFace14Net()
    #m.eval()
    #m.load_darknet_weights(tiny_yolo_weight)

    m = Darknet(cfgfile)
    m.eval()
    m.load_weights(weightfile)
    eval_wid = m.width
    eval_hei = m.height

    use_cuda = True
    if use_cuda:
        m.cuda()

    conf_thresh = 0.25
    nms_thresh = 0.4
    iou_thresh = 0.5
    min_box_scale = 8. / m.width

    with open(imglist) as fp:
        lines = fp.readlines()

    total = 0.0
    proposals = 0.0
    correct = 0.0
    lineId = 0
    avg_iou = 0.0
    for line in lines:
        img_path = line.rstrip()
        if img_path[0] == '#':
            continue
        lineId = lineId + 1
        lab_path = img_path.replace('images', 'labels')
        lab_path = lab_path.replace('JPEGImages', 'labels')
        lab_path = lab_path.replace('.jpg', '.txt').replace('.png', '.txt')
        #truths = read_truths(lab_path)
        truths = read_truths_args(lab_path, min_box_scale)
        #print(truths)

        img = Image.open(img_path).convert('RGB').resize((eval_wid, eval_hei))
        boxes = do_detect(m, img, conf_thresh, nms_thresh, use_cuda)

        if False:
            savename = "tmp/%06d.jpg" % (lineId)
            print("save %s" % savename)
            plot_boxes(img, boxes, savename)

        total = total + truths.shape[0]

        for i in range(len(boxes)):
            if boxes[i][4] > conf_thresh:
                proposals = proposals + 1

        for i in range(truths.shape[0]):
            box_gt = [
                truths[i][1], truths[i][2], truths[i][3], truths[i][4], 1.0
            ]
            best_iou = 0
            for j in range(len(boxes)):
                iou = bbox_iou(box_gt, boxes[j], x1y1x2y2=False)
                best_iou = max(iou, best_iou)
            if best_iou > iou_thresh:
                avg_iou += best_iou
                correct = correct + 1

    precision = 1.0 * correct / proposals
    recall = 1.0 * correct / total
    fscore = 2.0 * precision * recall / (precision + recall)
    print("%d IOU: %f, Recal: %f, Precision: %f, Fscore: %f\n" %
          (lineId - 1, avg_iou / correct, recall, precision, fscore))
Exemple #10
0
# 初始化网络并载入权重
print("载入神经网络...")
model = Darknet(args.cfgfile)  # Darknet类中初始化时得到了网络结构和网络的参数信息,保存在其参数net_info,module_list中
model.load_weights(args.weightsfile)  # 将权重文件载入,并复制给对应的网络结构model中
print("模型加载成功.")
# 网络输入数据大小
model.net_info["height"] = args.reso  # model类中net_info是一个字典。’’height’’是图片的宽高,因为图片缩放到416x416,所以宽高一样大
inp_dim = int(model.net_info["height"])  # inp_dim是网络输入图片尺寸(如416*416)
assert inp_dim % 32 == 0  # 如果设定的输入图片的尺寸不是32的位数或者不大于32,抛出异常
assert inp_dim > 32

# 如果GPU可用, 模型切换到cuda中运行
if CUDA:
    model.cuda()

model.eval()  # 变成测试模式,这主要是对dropout和batch normalization的操作在训练和测试的时候是不一样的

read_dir = time.time()  # read_dir 是一个用于测量时间的检查点,开始计时
# 加载待检测图像列表
try:
    # 从磁盘读取图像或从目录读取多张图像。图像的路径存储在一个名为 imlist 的列表中,imlist列表保存了images文件中所有图片的完整路径,一张图片路径对应一个元素。
    # osp.realpath('.')得到了图片所在文件夹的绝对路径,images是测试图片文件夹,listdir(images)得到了images文件夹下面所有图片的名字。
    # 通过join()把目录(文件夹)的绝对路径和图片名结合起来,就得到了一张图片的完整路径
    imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)]   # 值:'D:\\PyCharm Professional\\projects\\YOLO_tutorial\\imgs\\dog-cycle-car.png'
except NotADirectoryError:  # 如果上面的路径有错,只得到images文件夹绝对路径即可
    imlist = []
    imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
    print("No file or directory with the name {}".format(images))
    exit()
# 存储结果目录
Exemple #11
0
def main():
    # Parsing arguments
    arguments_parser = ArgumentsParser()
    args = arguments_parser.parse_arguments()
    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thresh = float(args.nms_thresh)

    # Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    # If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()

    # Set the model in evaluation mode
    model.eval()

    read_dir = time.time()

    # Detection phase
    load_batch = time.time()
    image_manager = Cv2ImageManager()
    loaded_images, list_of_images = image_manager.read_images(images)
    im_batches = list(
        map(prep_image, loaded_images,
            [inp_dim for x in range(len(list_of_images))]))
    im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_images]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

    leftover = 0
    if (len(im_dim_list) % batch_size):
        leftover = 1

    if batch_size != 1:
        num_batches = len(list_of_images) // batch_size + leftover
        im_batches = [
            torch.cat(
                (im_batches[i * batch_size:min((i + 1) *
                                               batch_size, len(im_batches))]))
            for i in range(num_batches)
        ]

    if CUDA:
        im_dim_list = im_dim_list.cuda()

    start_det_loop = time.time()
    detector = Detector(model, im_batches, batch_size, inp_dim, confidence,
                        nms_thresh, CLASSES, NUMBER_OF_CLASSES, CUDA)
    output = detector.detect(list_of_images, im_dim_list)

    output_recast = time.time()
    class_load = time.time()

    draw = time.time()

    det_images = list(
        map(
            lambda x: image_manager.draw_bounding_boxes(
                x, loaded_images, CLASSES), output))
    det_names = list(
        map(lambda x: "{det}/{x}".format(det=args.det, x=x),
            [osp.basename(image_name) for image_name in list_of_images]))
    image_manager.write_images(det_names, det_images)

    end = time.time()

    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch",
                                   start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format(
        "Detection (" + str(len(list_of_images)) + " images)",
        output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing",
                                   class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img",
                                   (end - load_batch) / len(list_of_images)))
    print("----------------------------------------------------------")

    torch.cuda.empty_cache()
Exemple #12
0
    num_classes = 2

    CUDA = torch.cuda.is_available()

    bbox_attrs = 5 + num_classes

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    if args.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(args.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(args.weights_path))

    model.eval()  # Set in evaluation mode
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()

    videofile = args.video
def run():

    logger = logging.getLogger()

    # Parse command window input
    parser = argparse.ArgumentParser(description='SingleShotPose')
    parser.add_argument('--datacfg', type=str,
                        default='cfg/ape.data')  # data config
    parser.add_argument('--modelcfg', type=str,
                        default='cfg/yolo-pose.cfg')  # network config
    parser.add_argument(
        '--initweightfile', type=str,
        default='backup/init.weights')  # initialization weights
    parser.add_argument('--pretrain_num_epochs', type=int,
                        default=0)  # how many epoch to pretrain
    args = parser.parse_args()
    datacfg = args.datacfg
    modelcfg = args.modelcfg
    initweightfile = args.initweightfile
    pretrain_num_epochs = args.pretrain_num_epochs

    print("ARGS: ", args)

    # Parse data configuration file
    data_options = read_data_cfg(datacfg)
    trainlist = data_options['valid']
    gpus = data_options['gpus']
    num_workers = int(data_options['num_workers'])
    backupdir = data_options['backup']
    im_width = int(data_options['width'])
    im_height = int(data_options['height'])
    fx = float(data_options['fx'])
    fy = float(data_options['fy'])
    u0 = float(data_options['u0'])
    v0 = float(data_options['v0'])

    print("DATA OPTIONS: ", data_options)

    # Parse network and training configuration parameters
    net_options = parse_cfg(modelcfg)[0]
    loss_options = parse_cfg(modelcfg)[-1]
    batch_size = int(net_options['batch'])
    max_batches = int(net_options['max_batches'])
    max_epochs = int(net_options['max_epochs'])
    learning_rate = float(net_options['learning_rate'])
    momentum = float(net_options['momentum'])
    decay = float(net_options['decay'])
    conf_thresh = float(net_options['conf_thresh'])
    num_keypoints = int(net_options['num_keypoints'])
    num_classes = int(loss_options['classes'])
    num_anchors = int(loss_options['num'])
    steps = [float(step) for step in net_options['steps'].split(',')]
    scales = [float(scale) for scale in net_options['scales'].split(',')]
    # anchors       = [float(anchor) for anchor in loss_options['anchors'].split(',')]

    print("NET OPTIONS: ", net_options)
    print("LOSS OPTIONS: ", loss_options)

    # Specifiy the model and the loss
    model = Darknet(modelcfg)

    # # Model settings
    model.load_weights(initweightfile)
    model.print_network()
    # model.seen        = 0
    # processed_batches = model.seen/batch_size
    init_width = 416  # model.width
    init_height = 416  # model.height
    batch_size = 1
    num_workers = 0

    # print("Size: ", init_width, init_height)

    bg_file_names = get_all_files('../VOCdevkit/VOC2012/JPEGImages')
    # Specify the number of workers
    use_cuda = True
    kwargs = {
        'num_workers': num_workers,
        'pin_memory': True
    } if use_cuda else {}

    logger.info("Loading data")

    # valid_dataset = dataset_multi.listDataset("../LINEMOD/duck/test_occlusion.txt", shape=(init_width, init_height),
    #                                             shuffle=False,
    #                                             objclass="duck",
    #                                             transform=transforms.Compose([
    #                                                 transforms.ToTensor(),
    #                                             ]))

    # Get the dataloader for training dataset

    dataloader = torch.utils.data.DataLoader(dataset.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=False,
        seen=0,
        batch_size=batch_size,
        num_workers=num_workers,
        bg_file_names=bg_file_names),
                                             batch_size=batch_size,
                                             shuffle=False,
                                             **kwargs)

    model.cuda()
    model.eval()

    delay = {True: 0, False: 1}
    paused = True

    # print("Classes in dataset ", num_classes)
    print("Batches in dataloader: ", len(dataloader))
    tbar = tqdm(dataloader, ascii=True, dynamic_ncols=True)
    for ii, s in enumerate(tbar):
        images, targets = s
        # print(ii, "IMAGES:" , images.shape)
        # print(ii, "TARGET\n", targets.shape)
        bs = images.shape[0]
        t = targets.cpu().numpy().reshape(bs, 50, -1)
        # print("TARGET [0, 0:1] \n", t[0, :1])
        # print("CLASSES ", t[0, :, 0])

        images_gpu = images.cuda()

        model_out = model(images_gpu).detach()
        all_boxes = np.array(
            get_region_boxes(model_out,
                             num_classes,
                             num_keypoints,
                             anchor_dim=num_anchors)).reshape(
                                 batch_size, 1, -1)

        # print("Model OUT", all_boxes.shape)

        pred = np.zeros_like(all_boxes)
        pred[:, 0, 0] = all_boxes[:, 0, -1]
        pred[:, 0, 1:-2] = all_boxes[:, 0, :-3]

        viz = visualize_results(images, t, pred, img_size=416, show_3d=True)

        cv2.imshow("Res ", viz)

        k = cv2.waitKey(delay[paused])
        if k & 0xFF == ord('q'):
            break
        if k & 0xFF == ord('p'):
            paused = not paused
Exemple #14
0
def demo():

    params = {
        "video": "video.avi",  # Video to run detection upon
        "dataset": "pasacal",  # Dataset on which the network has been trained
        "confidence": 0.5,  # Object Confidence to filter predictions
        "nms_thresh": 0.4,  # NMS Threshold
        "cfgfile": "cfg/yolov3.cfg",  # Config file
        "weightsfile": "yolov3.weights",  # Weightsfile
        "repo":
        416  # Input resolution of the network.  Increase to increase accuracy.  Decrease to increase speed
    }

    confidence = float(params["confidence"])
    nms_thesh = float(params["nms_thresh"])
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    bbox_attrs = 5 + num_classes

    bboxes = []
    xywh = []

    print("Loading network.....")
    model = Darknet(params["cfgfile"])
    model.load_weights(params["weightsfile"])
    print("Network successfully loaded")

    model.net_info["height"] = params["repo"]
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    videofile = params["video"]

    # set 0 for debug
    cap = cv2.VideoCapture(0)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():

        ret, frame = cap.read()
        print("ret: ", ret)
        print("frame: ", frame.shape)
        if ret:

            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            with torch.no_grad():
                output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print(
                    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                )
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)

            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * im_dim[:, 1].view(-1, 1)) / 2

            output[:, 1:5] /= scaling_factor

            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                im_dim[i, 0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                im_dim[i, 1])

            print("output: ", output)
            print("output: ", output.shape)

            for i in output:
                x0 = i[1].int()
                y0 = i[2].int()
                x1 = i[3].int()
                y1 = i[4].int()
                bbox = (x0, y0, x1, y1)
                bboxes.append(bbox)
                print(bbox)
                w = x1 - x0
                h = y1 - y0
                xywh.append((x0, y0, w, h))
                print(x0, y0, w, h)

            #return bboxes

            classes = load_classes('data/coco.names')
            colors = pkl.load(open("pallete", "rb"))

            # write bbox
            list(map(lambda x: write(x, orig_im, classes, colors), output))

            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}g7".format(
                frames / (time.time() - start)))
            #return xywh

        else:
            break
class SegPoseNet(nn.Module):
    def __init__(self, data_options):
        super(SegPoseNet, self).__init__()

        pose_arch_cfg = data_options['pose_arch_cfg']
        self.width = int(data_options['width'])
        self.height = int(data_options['height'])
        self.channels = int(data_options['channels'])
        self.domains = int(data_options['domains'])

        # note you need to change this after modifying the network
        self.output_h = 76
        self.output_w = 76

        self.coreModel = Darknet(pose_arch_cfg, self.width, self.height, self.channels, self.domains)
        self.segLayer = PoseSegLayer(data_options)
        self.regLayer = Pose2DLayer(data_options)
        self.discLayer = Discriminator()
        self.training = False

    def forward(self, x, y = None, adapt = False, domains = None):
        outlayers = self.coreModel(x, domains=domains)

        if self.training and adapt:
            in1 = source_only(outlayers[0], domains)
            in2 = source_only(outlayers[1], domains)
        else:
            in1 = outlayers[0]
            in2 = outlayers[1]

        out3 = self.discLayer(outlayers[2])
        out4 = outlayers[3]
        out5 = outlayers[4]

        out1 = self.segLayer(in1)
        out2 = self.regLayer(in2)

        out_preds = [out1, out2, out3, out4, out5]
        return out_preds

    def train(self):
        self.coreModel.train()
        self.segLayer.train()
        self.regLayer.train()
        self.discLayer.train()
        self.training = True

    def eval(self):
        self.coreModel.eval()
        self.segLayer.eval()
        self.regLayer.eval()
        self.discLayer.eval()
        self.training = False

    def print_network(self):
        self.coreModel.print_network()

    def load_weights(self, weightfile):
        self.coreModel.load_state_dict(torch.load(weightfile))

    def save_weights(self, weightfile):
        torch.save(self.coreModel.state_dict(), weightfile)
Exemple #16
0
class Patch():
    def __init__(self, config, device):
        self.config = config
        self.device = device
        
        # Create pytorch3D renderer
        self.renderer = self.create_renderer()

        # Datasets
        self.mesh_dataset = MeshDataset(config.mesh_dir, device, max_num=config.num_meshes)
        self.bg_dataset = BackgroundDataset(config.bg_dir, config.img_size, max_num=config.num_bgs)
        self.test_bg_dataset = BackgroundDataset(config.test_bg_dir, config.img_size, max_num=config.num_test_bgs)

        # Initialize adversarial patch
        self.patch = None
        self.idx = None

        # Yolo model:
        self.dnet = Darknet(self.config.cfgfile)
        self.dnet.load_weights(self.config.weightfile)
        self.dnet = self.dnet.eval()
        self.dnet = self.dnet.to(self.device)

        if self.config.patch_dir is not None:
          self.patch = torch.load(self.config.patch_dir + '/patch_save.pt').to(self.device)
          self.idx = torch.load(self.config.patch_dir + '/idx_save.pt').to(self.device)

        self.test_bgs = DataLoader(
          self.test_bg_dataset, 
          batch_size=1, 
          shuffle=True, 
          num_workers=1)
  
        self.min_contrast = 0.8
        self.max_contrast = 1.2
        self.min_brightness = -0.1
        self.max_brightness = 0.1
        self.noise_factor = 0.10

    def attack_faster_rcnn(self):
        path_to_checkpoint='model-180000.pth'
        dataset_name="coco2017"
        backbone_name="resnet101"
        prob_thresh=0.6

        dataset_class = DatasetBase.from_name(dataset_name)
        backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
        model = FasterRCNN(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
        model.load(path_to_checkpoint)

        train_bgs = DataLoader(
            self.bg_dataset, 
            batch_size=self.config.batch_size, 
            shuffle=True, 
            num_workers=1)

        if self.patch is None or self.idx is None:
          self.initialize_patch()
        
        mesh = self.mesh_dataset.meshes[0]
        total_variation = TotalVariation_3d(mesh, self.idx).to(self.device)

        optimizer = torch.optim.SGD([self.patch], lr=1e-1, momentum=0.9)

        for epoch in range(self.config.epochs):
            ep_loss = 0.0
            ep_acc = 0.0
            n = 0.0

            for mesh in self.mesh_dataset:
                # Copy mesh for each camera angle
                mesh = mesh.extend(self.num_angles_train)

                for bg_batch in train_bgs:
                    bg_batch = bg_batch.to(self.device)

                    optimizer.zero_grad()

                    texture_image = mesh.textures.atlas_padded()

                    # Random patch augmentation
                    contrast = torch.FloatTensor(1).uniform_(self.min_contrast, self.max_contrast).to(self.device)
                    brightness = torch.FloatTensor(1).uniform_(self.min_brightness, self.max_brightness).to(self.device)
                    noise = torch.FloatTensor(self.patch.shape).uniform_(-1, 1) * self.noise_factor
                    noise = noise.to(self.device)
                    augmented_patch = (self.patch * contrast) + brightness + noise

                    # Clamp patch to avoid PyTorch3D issues
                    clamped_patch = augmented_patch.clone().clamp(min=1e-6, max=0.99999)
                    mesh.textures._atlas_padded[:,self.idx,:,:,:] = clamped_patch
      
                    mesh.textures.atlas = mesh.textures._atlas_padded
                    mesh.textures._atlas_list = None

                    # Render mesh onto background image
                    rand_translation = torch.randint(
                      -self.config.rand_translation, 
                      self.config.rand_translation, 
                      (2,)
                      )

                    images = self.render_mesh_on_bg_batch(mesh, bg_batch, self.num_angles_train, x_translation=rand_translation[0].item(),
                                                          y_translation=rand_translation[1].item())
                    
                    reshape_img = images[:,:,:,:3].permute(0, 3, 1, 2)
                    reshape_img = reshape_img.to(self.device)

                    # image_tensor, scale = dataset_class.preprocess(reshape_img, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
                    detection_bboxes, detection_classes, detection_probs, _ = \
                        model.eval().forward(reshape_img.cuda())
                    # detection_bboxes /= scale

                    kept_indices = detection_probs > prob_thresh
                    detection_bboxes = detection_bboxes[kept_indices]
                    detection_classes = detection_classes[kept_indices]
                    detection_probs = detection_probs[kept_indices]
                    human_dets = torch.where(detection_classes == 1, torch.ones(1), torch.zeros(1)).cuda()

                    disap_loss = torch.mean(human_dets * detection_probs)

                    tv = total_variation(self.patch)
                    tv_loss = tv * 2.5
                    
                    loss = disap_loss + tv_loss
                    
                    n += bg_batch.shape[0]

                    if torch.isnan(loss).item():
                      continue

                    ep_loss += loss.item()

                    loss.backward(retain_graph=True)
                    optimizer.step()
            
            # Save image and print performance statistics
            print('tv={}, dis={}'.format(tv_loss, disap_loss))
            patch_save = self.patch.cpu().detach().clone()
            idx_save = self.idx.cpu().detach().clone()
            torch.save(patch_save, 'patch_save.pt')
            torch.save(idx_save, 'idx_save.pt')
            
            print('epoch={} loss={}'.format(
              epoch, 
              (ep_loss / n)
              )
            )

            if epoch % 5 == 0:
              self.test_patch()
              self.change_cameras('train')

    def attack(self):
        train_bgs = DataLoader(
            self.bg_dataset, 
            batch_size=self.config.batch_size, 
            shuffle=True, 
            num_workers=1)

        if self.patch is None or self.idx is None:
          self.initialize_patch()
        
        mesh = self.mesh_dataset.meshes[0]
        total_variation = TotalVariation_3d(mesh, self.idx).to(self.device)

        optimizer = torch.optim.SGD([self.patch], lr=1e-1, momentum=0.9)
        
        for epoch in range(self.config.epochs):
            ep_loss = 0.0
            ep_acc = 0.0
            n = 0.0

            for mesh in self.mesh_dataset:
                # Copy mesh for each camera angle
                mesh = mesh.extend(self.num_angles_train)

                for bg_batch in train_bgs:
                    bg_batch = bg_batch.to(self.device)

                    # To enable random camera distance training, uncomment this line:
                    # self.change_cameras('train', camera_dist=random.uniform(1.4, 3.0))

                    optimizer.zero_grad()

                    texture_image = mesh.textures.atlas_padded()

                    # Random patch augmentation
                    contrast = torch.FloatTensor(1).uniform_(self.min_contrast, self.max_contrast).to(self.device)
                    brightness = torch.FloatTensor(1).uniform_(self.min_brightness, self.max_brightness).to(self.device)
                    noise = torch.FloatTensor(self.patch.shape).uniform_(-1, 1) * self.noise_factor
                    noise = noise.to(self.device)
                    augmented_patch = (self.patch * contrast) + brightness + noise

                    # Clamp patch to avoid PyTorch3D issues
                    clamped_patch = augmented_patch.clone().clamp(min=1e-6, max=0.99999)
                    mesh.textures._atlas_padded[:,self.idx,:,:,:] = clamped_patch
      
                    mesh.textures.atlas = mesh.textures._atlas_padded
                    mesh.textures._atlas_list = None

                    # Render mesh onto background image
                    rand_translation = torch.randint(
                      -self.config.rand_translation, 
                      self.config.rand_translation, 
                      (2,)
                      )

                    images = self.render_mesh_on_bg_batch(mesh, bg_batch, self.num_angles_train, x_translation=rand_translation[0].item(),
                                                          y_translation=rand_translation[1].item())
                    
                    reshape_img = images[:,:,:,:3].permute(0, 3, 1, 2)
                    reshape_img = reshape_img.to(self.device)

                    # Run detection model on images
                    output = self.dnet(reshape_img)

                    d_loss = dis_loss(output, self.dnet.num_classes, self.dnet.anchors, self.dnet.num_anchors, 0)
                    acc_loss = calc_acc(output, self.dnet.num_classes, self.dnet.num_anchors, 0)

                    tv = total_variation(self.patch)
                    tv_loss = tv * 2.5
                    
                    loss = d_loss + tv_loss

                    ep_loss += loss.item()
                    ep_acc += acc_loss.item()
                    
                    n += bg_batch.shape[0]

                    loss.backward(retain_graph=True)
                    optimizer.step()
            
            # Save image and print performance statistics
            patch_save = self.patch.cpu().detach().clone()
            idx_save = self.idx.cpu().detach().clone()
            torch.save(patch_save, 'patch_save.pt')
            torch.save(idx_save, 'idx_save.pt')

            save_image(reshape_img[0].cpu().detach(), "TEST_RENDER.png")
        
            print('epoch={} loss={} success_rate={}'.format(
              epoch, 
              (ep_loss / n), 
              (ep_acc / n) / self.num_angles_train)
            )

            if epoch % 5 == 0:
              self.test_patch()
              self.change_cameras('train')
    
    def test_patch(self):
        self.change_cameras('test')
        angle_success = torch.zeros(self.num_angles_test)
        total_loss = 0.0
        n = 0.0
        for mesh in self.mesh_dataset:
            mesh = mesh.extend(self.num_angles_test)
            for bg_batch in self.test_bgs:
                bg_batch = bg_batch.to(self.device)

                texture_image=mesh.textures.atlas_padded()
                                
                clamped_patch = self.patch.clone().clamp(min=1e-6, max=0.99999)
                mesh.textures._atlas_padded[:,self.idx,:,:,:] = clamped_patch
      
                mesh.textures.atlas = mesh.textures._atlas_padded
                mesh.textures._atlas_list = None
                
                rand_translation = torch.randint(
                  -self.config.rand_translation, 
                  self.config.rand_translation, 
                  (2,)
                  )

                images = self.render_mesh_on_bg_batch(mesh, bg_batch, self.num_angles_test, x_translation=rand_translation[0].item(),
                                                y_translation=rand_translation[1].item())
                
                reshape_img = images[:,:,:,:3].permute(0, 3, 1, 2)
                reshape_img = reshape_img.to(self.device)
                output = self.dnet(reshape_img)
                
                for angle in range(self.num_angles_test):
                    acc_loss = calc_acc(output[angle], self.dnet.num_classes, self.dnet.num_anchors, 0)
                    angle_success[angle] += acc_loss.item()

                n += bg_batch.shape[0]
        
        save_image(reshape_img[0].cpu().detach(), "TEST.png")
        unseen_success_rate = torch.sum(angle_success) / (n * self.num_angles_test)
        print('Angle success rates: ', angle_success / n)
        print('Unseen bg success rate: ', unseen_success_rate.item())

    def test_patch_faster_rcnn(self, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
        dataset_class = DatasetBase.from_name(dataset_name)
        backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
        model = FasterRCNN(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
        model.load(path_to_checkpoint)

        angle_success = torch.zeros(self.num_angles_test)
        total_loss = 0.0
        n = 0.0
        with torch.no_grad():
            for mesh in self.mesh_dataset:
                mesh = mesh.extend(self.num_angles_test)
                for bg_batch in self.test_bgs:
                    bg_batch = bg_batch.to(self.device)
                    
                    texture_image=mesh.textures.atlas_padded()
                    clamped_patch = self.patch.clone().clamp(min=1e-6, max=0.99999)
                    mesh.textures._atlas_padded[:,self.idx,:,:,:] = clamped_patch
          
                    mesh.textures.atlas = mesh.textures._atlas_padded
                    mesh.textures._atlas_list = None

                    rand_translation = torch.randint(
                      -self.config.rand_translation, 
                      self.config.rand_translation, 
                      (2,)
                      )

                    images = self.render_mesh_on_bg_batch(
                      mesh, 
                      bg_batch, 
                      self.num_angles_test, 
                      x_translation=rand_translation[0].item(),
                      y_translation=rand_translation[1].item()
                      )

                    reshape_img = images[:,:,:,:3].permute(0, 3, 1, 2)
                    save_image(reshape_img[0].cpu().detach(), "TEST_PRE.png")

                    for angle in range(self.num_angles_test):
                        image = torchvision.transforms.ToPILImage()(reshape_img[angle,:,:,:].cpu())
                        # image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
                        image_tensor = reshape_img[angle, ..., :]
                        scale = 1.0
                        save_image(image_tensor.cpu().detach(), "TEST_POST.png")

                        img = Image.open('TEST_POST.png').convert('RGB')
                        img = torchvision.transforms.ToTensor()(image)
                        image_tensor = img.cuda()

                        detection_bboxes, detection_classes, detection_probs, _ = \
                            model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
                        detection_bboxes /= scale

                        kept_indices = detection_probs > prob_thresh
                        detection_bboxes = detection_bboxes[kept_indices]
                        detection_classes = detection_classes[kept_indices]
                        detection_probs = detection_probs[kept_indices]

                        draw = ImageDraw.Draw(image)

                        for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
                            color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                            bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
                            category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                            draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color, width=3)
                            draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)
                        if angle==0:
                            image.save("out/images/test_%d.png" % n)

                    n += 1.0

    def initialize_patch(self):
        print('Initializing patch...')
        # Code for sampling faces:
        # mesh = self.mesh_dataset.meshes[0]
        # box = mesh.get_bounding_boxes()
        # max_x = box[0,0,1]
        # max_y = box[0,1,1]
        # max_z = box[0,2,1]
        # min_x = box[0,0,0]
        # min_y = box[0,1,0]
        # min_z = box[0,2,0]

        # len_z = max_z - min_z
        # len_x = max_x - min_x
        # len_y = max_y - min_y

        # verts = mesh.verts_padded()
        # v_shape = verts.shape
        # sampled_verts = torch.zeros(v_shape[1]).to('cuda')

        # for i in range(v_shape[1]):
        #   #original human1 not SMPL
        #   #if verts[0,i,2] > min_z + len_z * 0.55 and verts[0,i,0] > min_x + len_x*0.3 and verts[0,i,0] < min_x + len_x*0.7 and verts[0,i,1] > min_y + len_y*0.6 and verts[0,i,1] < min_y + len_y*0.7:
        #   #SMPL front
        #   if verts[0,i,2] > min_z + len_z * 0.55 and verts[0,i,0] > min_x + len_x*0.35 and verts[0,i,0] < min_x + len_x*0.65 and verts[0,i,1] > min_y + len_y*0.65 and verts[0,i,1] < min_y + len_y*0.75:
        #   #back
        #   #if verts[0,i,2] < min_z + len_z * 0.5 and verts[0,i,0] > min_x + len_x*0.35 and verts[0,i,0] < min_x + len_x*0.65 and verts[0,i,1] > min_y + len_y*0.65 and verts[0,i,1] < min_y + len_y*0.75:
        #   #leg
        #   #if verts[0,i,0] > min_x + len_x*0.5 and verts[0,i,0] < min_x + len_x and verts[0,i,1] > min_y + len_y*0.2 and verts[0,i,1] < min_y + len_y*0.3:
        #     sampled_verts[i] = 1

        # faces = mesh.faces_padded()
        # f_shape = faces.shape

        # sampled_planes = list()
        # for i in range(faces.shape[1]):
        #   v1 = faces[0,i,0]
        #   v2 = faces[0,i,1]
        #   v3 = faces[0,i,2]
        #   if sampled_verts[v1]+sampled_verts[v2]+sampled_verts[v3]>=1:
        #     sampled_planes.append(i)
        
        # Sample faces from index file:
        sampled_planes = np.load(self.config.idx).tolist()
        idx = torch.Tensor(sampled_planes).long().to(self.device)
        self.idx = idx
        patch = torch.rand(len(sampled_planes), 1, 1, 3, device=(self.device), requires_grad=True)
        self.patch = patch

    def create_renderer(self):
        self.num_angles_train = self.config.num_angles_train
        self.num_angles_test = self.config.num_angles_test

        azim_train = torch.linspace(-1 * self.config.angle_range_train, self.config.angle_range_train, self.num_angles_train)
        azim_test = torch.linspace(-1 * self.config.angle_range_test, self.config.angle_range_test, self.num_angles_test)

        # Cameras for SMPL meshes:
        camera_dist = 2.2
        R, T = look_at_view_transform(camera_dist, 6, azim_train)
        train_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T)
        self.train_cameras = train_cameras

        R, T = look_at_view_transform(camera_dist, 6, azim_test)
        test_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T)
        self.test_cameras = test_cameras
        
        raster_settings = RasterizationSettings(
            image_size=self.config.img_size, 
            blur_radius=0.0, 
            faces_per_pixel=1, 
        )

        lights = PointLights(device=self.device, location=[[0.0, 85, 100.0]])

        renderer = MeshRenderer(
            rasterizer=MeshRasterizer(
                cameras=train_cameras, 
                raster_settings=raster_settings
            ),
            shader=HardPhongShader(
                device=self.device, 
                cameras=train_cameras,
                lights=lights
            )
        )

        return renderer
    
    def change_cameras(self, mode, camera_dist=2.2):
      azim_train = torch.linspace(-1 * self.config.angle_range_train, self.config.angle_range_train, self.num_angles_train)
      azim_test = torch.linspace(-1 * self.config.angle_range_test, self.config.angle_range_test, self.num_angles_test)

      R, T = look_at_view_transform(camera_dist, 6, azim_train)
      train_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T)
      self.train_cameras = train_cameras

      R, T = look_at_view_transform(camera_dist, 6, azim_test)
      test_cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T)
      self.test_cameras = test_cameras

      if mode == 'train':
        self.renderer.rasterizer.cameras=self.train_cameras
        self.renderer.shader.cameras=self.train_cameras
      elif mode == 'test':
        self.renderer.rasterizer.cameras=self.test_cameras
        self.renderer.shader.cameras=self.test_cameras

    def render_mesh_on_bg(self, mesh, bg_img, num_angles, location=None, x_translation=0, y_translation=0):
        images = self.renderer(mesh)
        bg = bg_img.unsqueeze(0)
        bg_shape = bg.shape
        new_bg = torch.zeros(bg_shape[2], bg_shape[3], 3)
        new_bg[:,:,0] = bg[0,0,:,:]
        new_bg[:,:,1] = bg[0,1,:,:]
        new_bg[:,:,2] = bg[0,2,:,:]

        human = images[:, ..., :3]
        
        human_size = self.renderer.rasterizer.raster_settings.image_size

        if location is None:
            dH = bg_shape[2] - human_size
            dW = bg_shape[3] - human_size
            location = (
                dW // 2 + x_translation,
                dW - (dW // 2) - x_translation,
                dH // 2 + y_translation,
                dH - (dH // 2) - y_translation
            )

        contour = torch.where((human == 1).cpu(), torch.zeros(1).cpu(), torch.ones(1).cpu())
        new_contour = torch.zeros(num_angles, bg_shape[2], bg_shape[3], 3)
        
        new_contour[:,:,:,0] = F.pad(contour[:,:,:,0], location, "constant", value=0)
        new_contour[:,:,:,1] = F.pad(contour[:,:,:,1], location, "constant", value=0)
        new_contour[:,:,:,2] = F.pad(contour[:,:,:,2], location, "constant", value=0)

        new_human = torch.zeros(num_angles, bg_shape[2], bg_shape[3], 3)
        new_human[:,:,:,0] = F.pad(human[:,:,:,0], location, "constant", value=0)
        new_human[:,:,:,1] = F.pad(human[:,:,:,1], location, "constant", value=0)
        new_human[:,:,:,2] = F.pad(human[:,:,:,2], location, "constant", value=0)

        final = torch.where((new_contour == 0).cpu(), new_bg.cpu(), new_human.cpu())
        return final

    def render_mesh_on_bg_batch(self, mesh, bg_imgs, num_angles, location=None, x_translation=0, y_translation=0):
        num_bgs = bg_imgs.shape[0]

        images = self.renderer(mesh) # (num_angles, 416, 416, 4)
        images = torch.cat(num_bgs*[images], dim=0) # (num_angles * num_bgs, 416, 416, 4)

        bg_shape = bg_imgs.shape

        # bg_imgs: (num_bgs, 3, 416, 416) -> (num_bgs, 416, 416, 3)
        bg_imgs = bg_imgs.permute(0, 2, 3, 1)

        # bg_imgs: (num_bgs, 416, 416, 3) -> (num_bgs * num_angles, 416, 416, 3)
        bg_imgs = bg_imgs.repeat_interleave(repeats=num_angles, dim=0)

        # human: RGB channels of render (num_angles * num_bgs, 416, 416, 3)
        human = images[:, ..., :3]
        human_size = self.renderer.rasterizer.raster_settings.image_size

        if location is None:
            dH = bg_shape[2] - human_size
            dW = bg_shape[3] - human_size
            location = (
                dW // 2 + x_translation,
                dW - (dW // 2) - x_translation,
                dH // 2 + y_translation,
                dH - (dH // 2) - y_translation
            )

        contour = torch.where((human == 1), torch.zeros(1).to(self.device), torch.ones(1).to(self.device))
        new_contour = torch.zeros(num_angles * num_bgs, bg_shape[2], bg_shape[3], 3, device=self.device)
        
        new_contour[:,:,:,0] = F.pad(contour[:,:,:,0], location, "constant", value=0)
        new_contour[:,:,:,1] = F.pad(contour[:,:,:,1], location, "constant", value=0)
        new_contour[:,:,:,2] = F.pad(contour[:,:,:,2], location, "constant", value=0)

        new_human = torch.zeros(num_angles * num_bgs, bg_shape[2], bg_shape[3], 3, device=self.device)
        new_human[:,:,:,0] = F.pad(human[:,:,:,0], location, "constant", value=0)
        new_human[:,:,:,1] = F.pad(human[:,:,:,1], location, "constant", value=0)
        new_human[:,:,:,2] = F.pad(human[:,:,:,2], location, "constant", value=0)

        # output: (num_angles * num_bgs, 416, 416, 3)
        final = torch.where((new_contour == 0), bg_imgs, new_human)
        return final
Exemple #17
0
def valid(datacfg, cfgfile, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    name_list = options['names']
    prefix = 'results'
    names = load_class_names(name_list)
    print(names)
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(cfgfile)
    # print(m)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    fps = [0] * m.num_classes
    if not os.path.exists('results'):
        os.mkdir('results')
    print(len(names), m.num_classes)
    for i in range(m.num_classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, names[i])
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for _, (data, target) in tqdm(enumerate(valid_loader)):
        data = data.cuda()
        output = m(data)
        batch_boxes = get_all_boxes(output,
                                    conf_thresh,
                                    m.num_classes,
                                    only_objectness=0,
                                    validation=True)

        for i in range(data.size(0)):
            lineId = lineId + 1
            fileId = os.path.basename(valid_files[lineId]).split('.')[0]
            width, height = get_image_size(valid_files[lineId])
            # print(valid_files[lineId])
            boxes = batch_boxes[i]
            boxes = nms(boxes, nms_thresh)
            for box in boxes:
                x1 = (box[0] - box[2] / 2.0) * width
                y1 = (box[1] - box[3] / 2.0) * height
                x2 = (box[0] + box[2] / 2.0) * width
                y2 = (box[1] + box[3] / 2.0) * height

                det_conf = box[4]
                for j in range((len(box) - 5) // 2):
                    cls_conf = box[5 + 2 * j]
                    cls_id = box[6 + 2 * j]
                    prob = det_conf * cls_conf
                    fps[cls_id].write('%s %f %f %f %f %f\n' %
                                      (fileId, prob, x1, y1, x2, y2))

    for i in range(m.num_classes):
        fps[i].close()
Exemple #18
0
class Darknet_Detector():
    def __init__(self, id_num, cfg_file,wt_file,class_file,pallete_file, nms_threshold = .3 , conf = 0.7, resolution=1024, num_classes=80, nms_classwise= True):
        #Set up the neural network
        print("Loading network.....")
        self.model = Darknet(cfg_file)
        self.model.load_weights(wt_file)
        print("Network successfully loaded")
        
        self.nms = nms_threshold
        self.conf = conf
        self.nms_classwise = nms_classwise
        self.resolution = resolution # sets size of max dimension
        
        if id_num == 0:
            self.CUDA = True
            torch.cuda.set_device(0)
            torch.cuda.empty_cache()
            
        elif id_num == 1:
            self.CUDA = True
            torch.cuda.set_device(1)
            torch.cuda.empty_cache()
        else:
            self.CUDA = False
            
        self.colors = pkl.load(open(pallete_file, "rb"))
        self.num_classes = num_classes
        self.classes = load_classes(class_file) 
    

        
        self.model.net_info["height"] = self.resolution
        inp_dim = int(self.model.net_info["height"])
        assert inp_dim % 32 == 0 
        assert inp_dim > 32
    
        #If there's a GPU availible, put the model on GPU
        if self.CUDA:
            self.model.cuda()
        
        
        #Set the model in evaluation mode
        self.model.eval()
        
        
    def prep_image(self,img,inp_dim):
        """
        Prepare image for inputting to the neural network. 
        Returns a Variable 
        """
        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = cv2.resize(orig_im, (inp_dim, inp_dim))
        img_ = img[:,:,::-1].transpose((2,0,1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        return img_, orig_im, dim

#def write(x, img):
#    c1 = tuple(x[1:3].int())
#    c2 = tuple(x[3:5].int())
#    cls = int(x[-1])
#    label = "{0}".format(classes[cls])
#    color = random.choice(colors)
#    cv2.rectangle(img, c1, c2,color, 1)
#    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
#    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
#    cv2.rectangle(img, c1, c2,color, -1)
#    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
#    return img
#        orig_im = img
#        dim = orig_im.shape[1], orig_im.shape[0]
#        img = cv2.resize(orig_im, (inp_dim, inp_dim))
#        img_ = img[:,:,::-1].transpose((2,0,1)).copy()
#        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
#        return img_, orig_im, dim
    

    def write(self,x, img):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        cls = int(x[-1])
        label = "{0}".format(self.classes[cls])
        color = random.choice(self.colors)
        cv2.rectangle(img, c1, c2,color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
        return img    
        
    
    
    def detect(self,image, show = False,verbose = False,save_file = None):
        start = time.time()
#        
        try: # image is already loaded
            img, orig_im, dim = self.prep_image(image, self.resolution)
        except: # image is a file path
            image = cv2.imread(image)
            img, orig_im, dim = self.prep_image(image, self.resolution)
            
        im_dim = torch.FloatTensor(dim).repeat(1,2)                        
            
        if self.CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        
        
        output = self.model(Variable(img), self.CUDA)
        output = write_results(output, self.conf, self.num_classes, nms = True, nms_conf = self.nms)
        output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.resolution))/self.resolution
        
        im_dim = im_dim.repeat(output.size(0), 1)
        output[:,[1,3]] *= image.shape[1]
        output[:,[2,4]] *= image.shape[0]

                
        out = list(map(lambda x: self.write(x, orig_im), output))
        
        if verbose:
            print("FPS of the video is {:5.2f}".format( 1.0 / (time.time() - start)))
            
        if save_file != None:
            cv2.imwrite(save_file, orig_im)    
            
        if show:
            cv2.imshow("frame", orig_im)
            cv2.waitKey(0)


            
        
       
        return output, orig_im
from darknet import Darknet
from caffenet import CaffeNet
from PIL import Image
from utils import image2torch, convert2cpu
from torch.autograd import Variable

cfgfile1 = 'reid.cfg'
weightfile1 = 'reid.weights'
cfgfile2 = 'reid_nbn.cfg'
weightfile2 = 'reid_nbn.weights'
cfgfile3 = 'reid_nbn.prototxt'
weightfile3 = 'reid_nbn.caffemodel'

m1 = Darknet(cfgfile1)
m1.load_weights(weightfile1)
m1.eval()

m2 = Darknet(cfgfile2)
m2.load_weights(weightfile2)
m2.eval()

m3 = CaffeNet(cfgfile3)
m3.load_weights(weightfile3)
m3.eval()

img = torch.rand(8, 3, 128, 64)
img = Variable(img)

output1 = m1(img).clone()
output2 = m2(img).clone()
output3 = m3(img).clone()
Exemple #20
0
def valid(datacfg, cfgfile, weightfile, outfile):
    def truths_length(truths):
        for i in range(50):
            if truths[i][1] == 0:
                return i

    # Parse configuration files
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    meshname = options['mesh']
    backupdir = options['backup']
    name = options['name']
    if not os.path.exists(backupdir):
        makedirs(backupdir)

    # Parameters
    prefix = 'results'
    seed = int(time.time())
    gpus = '0'  # Specify which gpus to use
    test_width = 544
    test_height = 544
    torch.manual_seed(seed)
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    save = True
    testtime = True
    use_cuda = True
    num_classes = 1
    testing_samples = 0.0
    eps = 1e-5
    notpredicted = 0
    conf_thresh = 0.1
    nms_thresh = 0.4
    match_thresh = 0.5
    if save:
        makedirs(backupdir + '/test')
        makedirs(backupdir + '/test/gt')
        makedirs(backupdir + '/test/pr')

    # To save
    testing_error_trans = 0.0
    testing_error_angle = 0.0
    testing_error_pixel = 0.0
    errs_2d = []
    errs_3d = []
    errs_trans = []
    errs_angle = []
    errs_corner2D = []
    preds_trans = []
    preds_rot = []
    preds_corners2D = []
    gts_trans = []
    gts_rot = []
    gts_corners2D = []

    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    print('vertices', vertices)
    corners3D = get_3D_corners(vertices)
    print('corners3D', corners3D)
    # diam          = calc_pts_diameter(np.array(mesh.vertices))
    diam = float(options['diam'])

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic()

    # Get validation file names
    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(cfgfile)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # Get the parser for the test dataset
    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(test_width, test_height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 1

    # Specify the number of workers for multiple processing, get the dataloader for the test dataset
    kwargs = {'num_workers': 4, 'pin_memory': True}
    test_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batchsize,
                                              shuffle=False,
                                              **kwargs)

    logging("   Testing {}...".format(name))
    logging("   Number of test samples: %d" % len(test_loader.dataset))
    # Iterate through test batches (Batch size for test data is 1)
    count = 0
    z = np.zeros((3, 1))
    for batch_idx, (data, target) in enumerate(test_loader):

        t1 = time.time()
        # Pass data to GPU
        if use_cuda:
            data = data.cuda()
            target = target.cuda()

        # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
        data = Variable(data, volatile=True)
        t2 = time.time()

        # Forward pass
        output = model(data).data
        t3 = time.time()

        # Using confidence threshold, eliminate low-confidence predictions
        all_boxes = get_region_boxes(output, conf_thresh, num_classes)
        t4 = time.time()

        # Iterate through all images in the batch
        for i in range(output.size(0)):
            print('output.size(0) is ', output.size(0))
            # For each image, get all the predictions
            boxes = all_boxes[i]

            # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
            truths = target[i].view(-1, 21)

            # Get how many object are present in the scene
            num_gts = truths_length(truths)

            # Iterate through each ground-truth object
            for k in range(num_gts):
                box_gt = [
                    truths[k][1], truths[k][2], truths[k][3], truths[k][4],
                    truths[k][5], truths[k][6], truths[k][7], truths[k][8],
                    truths[k][9], truths[k][10], truths[k][11], truths[k][12],
                    truths[k][13], truths[k][14], truths[k][15], truths[k][16],
                    truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]
                ]
                best_conf_est = -1

                # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation
                for j in range(len(boxes)):
                    if (boxes[j][18] > best_conf_est):
                        match = corner_confidence9(
                            box_gt[:18], torch.FloatTensor(boxes[j][:18]))
                        box_pr = boxes[j]
                        best_conf_est = boxes[j][18]

                # Denormalize the corner predictions
                corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]),
                                        dtype='float32')
                corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                        dtype='float32')
                corners2D_gt[:, 0] = corners2D_gt[:, 0] * 1280
                corners2D_gt[:, 1] = corners2D_gt[:, 1] * 720
                corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1280
                corners2D_pr[:, 1] = corners2D_pr[:, 1] * 720
                preds_corners2D.append(corners2D_pr)
                gts_corners2D.append(corners2D_gt)

                # Compute corner prediction error
                corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr,
                                             axis=1)
                corner_dist = np.mean(corner_norm)
                errs_corner2D.append(corner_dist)

                # Compute [R|t] by pnp
                _, R_gt, t_gt = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_gt,
                    np.array(internal_calibration, dtype='float32'))
                _, R_pr, t_pr = pnp(
                    np.array(np.transpose(
                        np.concatenate((np.zeros((3, 1)), corners3D[:3, :]),
                                       axis=1)),
                             dtype='float32'), corners2D_pr,
                    np.array(internal_calibration, dtype='float32'))

                if save:
                    preds_trans.append(t_pr)
                    gts_trans.append(t_gt)
                    preds_rot.append(R_pr)
                    gts_rot.append(R_gt)

                    np.savetxt(
                        backupdir + '/test/gt/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/R_' + valid_files[count][-8:-3] +
                        'txt', np.array(R_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/t_' + valid_files[count][-8:-3] +
                        'txt', np.array(t_pr, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/gt/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_gt, dtype='float32'))
                    np.savetxt(
                        backupdir + '/test/pr/corners_' +
                        valid_files[count][-8:-3] + 'txt',
                        np.array(corners2D_pr, dtype='float32'))

                # Compute translation error
                trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr)))
                errs_trans.append(trans_dist)

                # Compute angle error
                angle_dist = calcAngularDistance(R_gt, R_pr)
                errs_angle.append(angle_dist)

                # Compute pixel error
                Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
                Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
                proj_2d_gt = compute_projection(vertices, Rt_gt,
                                                internal_calibration)
                proj_2d_pred = compute_projection(vertices, Rt_pr,
                                                  internal_calibration)
                norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
                pixel_dist = np.mean(norm)
                errs_2d.append(pixel_dist)

                # Compute 3D distances
                transform_3d_gt = compute_transformation(vertices, Rt_gt)
                transform_3d_pred = compute_transformation(vertices, Rt_pr)
                norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred,
                                        axis=0)
                vertex_dist = np.mean(norm3d)
                errs_3d.append(vertex_dist)

                # Sum errors
                testing_error_trans += trans_dist
                testing_error_angle += angle_dist
                testing_error_pixel += pixel_dist
                testing_samples += 1
                count = count + 1

        t5 = time.time()

    # Compute 2D projection error, 6D pose error, 5cm5degree error
    px_threshold = 5
    acc = len(np.where(
        np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    acc3d10 = len(np.where(
        np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps)
    acc5cm5deg = len(
        np.where((np.array(errs_trans) <= 0.05)
                 & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) +
                                                              eps)
    corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold)
                     [0]) * 100. / (len(errs_corner2D) + eps)
    mean_err_2d = np.mean(errs_2d)
    mean_corner_err_2d = np.mean(errs_corner2D)
    nts = float(testing_samples)

    if testtime:
        print('-----------------------------------')
        print('  tensor to cuda : %f' % (t2 - t1))
        print('         predict : %f' % (t3 - t2))
        print('get_region_boxes : %f' % (t4 - t3))
        print('            eval : %f' % (t5 - t4))
        print('           total : %f' % (t5 - t1))
        print('-----------------------------------')

    # Print test statistics
    logging('Results of {}'.format(name))
    logging('   Acc using {} px 2D Projection = {:.2f}%'.format(
        px_threshold, acc))
    logging('   Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'.
            format(diam * 0.1, acc3d10))
    logging('   Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg))
    logging(
        "   Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f"
        % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d))
    logging(
        '   Translation error: %f m, angle error: %f degree, pixel error: % f pix'
        % (testing_error_trans / nts, testing_error_angle / nts,
           testing_error_pixel / nts))

    if save:
        predfile = backupdir + '/predictions_linemod_' + name + '.mat'
        scipy.io.savemat(
            predfile, {
                'R_gts': gts_rot,
                't_gts': gts_trans,
                'corner_gts': gts_corners2D,
                'R_prs': preds_rot,
                't_prs': preds_trans,
                'corner_prs': preds_corners2D
            })
Exemple #21
0
class YOLO_detection:
    def __init__(self):
        self.boxes = BoundingBoxes()
        self.box = BoundingBox()
        self.image_pub = rospy.Publisher("YOLO_detect_result",
                                         Image,
                                         queue_size=1)
        self.boxes_pub = rospy.Publisher("YOLO_detect_result_boxes",
                                         BoundingBoxes,
                                         queue_size=1)
        # self.result = rospy.Publisher('YOLO_detect_result', Float64MultiArray, queue_size=10)
        self.bridge = CvBridge()
        #self.image_sub = rospy.Subscriber("/camera/rgb/image_raw", Image, self.callback)
        self.image_sub = rospy.Subscriber("/wideangle/image_color", Image,
                                          self.callback)
        self.batch_size = 1
        self.reso = 416
        self.confidence = 0.5
        self.nms_thesh = 0.4
        self.CUDA = torch.cuda.is_available()
        self.num_classes = 80

        # self.classes = load_classes("/home/iairiv/code/yolo/src/yolo_detection/src/data/coco.names")
        # self.cfg_file = "/home/iairiv/code/yolo/src/yolo_detection/src/cfg/yolov3.cfg"
        # self.weights_file = "/home/iairiv/code/yolo/src/yolo_detection/src/yolov3.weights"

        self.colors = random_color()

        # self.classes = load_classes("/space/code/rosadas/src/yolo_detection/src/data/coco.names")
        # self.cfg_file = "/space/code/rosadas/src/yolo_detection/src/cfg/yolov3.cfg"
        # self.weights_file = "/space/code/rosadas/src/yolo_detection/src/yolov3.weights"

        self.classes = load_classes(rospy.get_param("yolo_classname"))
        self.cfg_file = rospy.get_param("yolo_cfg")
        self.weights_file = rospy.get_param("yolo_weight")

        self.model = Darknet(self.cfg_file)
        self.model.load_weights(self.weights_file)
        self.model.net_info["height"] = self.reso
        if self.CUDA: self.model.cuda()
        self.model.eval()
        self.send_by_UDP = False
        self.draw_res = True
        # if self.send_by_UDP:
        #     self.UDP = UDPtrans.YOLO_UDP('195.0.0.5', 7800)

    def transform_input(self, img):
        return prep_image(img, self.reso)

    def yolo_detection(self, input):
        if self.CUDA:
            input = input.cuda()
        with torch.no_grad():
            prediction = self.model(Variable(input), self.CUDA)
        # print prediction
        prediction = write_results(prediction,
                                   self.confidence,
                                   self.num_classes,
                                   nms_conf=self.nms_thesh)
        return prediction

    def write(self, output, img):
        # im_dim_list = [(img.shape[1], img.shape[0])]
        # im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
        # if self.CUDA:
        #     im_dim_list = im_dim_list.cuda()
        # scaling_factor = torch.min(self.reso / im_dim_list, 1)[0].view(-1, 1)
        # # print output
        # output[:, [1, 3]] -= (self.reso - scaling_factor * im_dim_list[:, 0]) / 2
        # output[:, [2, 4]] -= (self.reso - scaling_factor * im_dim_list[:, 1]) / 2
        # output[:, 1:5] /= scaling_factor

        # for x in output:
        #     c1 = tuple(x[1:3].int())
        #     c2 = tuple(x[3:5].int())
        #     cls = int(x[-1])
        #     color = self.colors[cls]
        #     label = "{0}".format(self.classes[cls])
        #     print(label)
        #     color = [255,255,0]
        #     cv2.rectangle(img, c1, c2, color, 2)
        #     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
        #     c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        #     cv2.rectangle(img, c1, c2, color, 2)
        #     cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 1)
        # return img
        for x in output:
            c1 = tuple(x[1:3].int())
            c2 = tuple(x[3:5].int())
            cls = int(x[-1])
            color = (0, 255, 0)  #self.colors[cls]
            label = "{0}".format(self.classes[cls])
            print(label)
            cv2.rectangle(img, c1, c2, color, 4)
            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
            cv2.rectangle(img, c1, c2, color, -1)
            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
        return img

    def callback(self, data):
        startt = time.time()
        try:
            start_time = rospy.Time.now()
            start_time_second = start_time.to_sec()
            timeArray = time.localtime(start_time_second)
            timeArray_H_M_S = time.strftime("%H_%M_%S", timeArray)
            nano_seconds = str(
                int(start_time.to_nsec() -
                    int(start_time_second) * 1e9)).zfill(9)
            timeArray_H_M_S_MS = timeArray_H_M_S + "_" + nano_seconds[:3]
            print(timeArray_H_M_S_MS)
            # YOLO detect
            cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
            # cv_image = cv2.resize(cv_image, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST)
            input_image = self.transform_input(cv_image)
            prediction = self.yolo_detection(input_image)
            # print(type(prediction))
            # coordinate transformation
            if type(prediction) == int:
                if self.draw_res == True:
                    result = cv_image
                # if self.send_by_UDP:
                #     self.UDP.send_message(timeArray_H_M_S_MS, None)
            else:
                # image size should be the same with the size when we calibrate
                im_dim_list_list = [(cv_image.shape[1], cv_image.shape[0])]
                # print im_dim_list_list
                im_dim_list = torch.FloatTensor(im_dim_list_list).repeat(1, 2)
                if self.CUDA:
                    im_dim_list = im_dim_list.cuda()
                scaling_factor = torch.min(self.reso / im_dim_list,
                                           1)[0].view(-1, 1)
                prediction[:,
                           [1, 3]] -= (self.reso -
                                       scaling_factor * im_dim_list[:, 0]) / 2
                prediction[:,
                           [2, 4]] -= (self.reso -
                                       scaling_factor * im_dim_list[:, 1]) / 2
                prediction[:, 1:5] /= scaling_factor
                prediction[:, [1, 3]] = torch.clamp(prediction[:, [1, 3]], 0.0,
                                                    im_dim_list_list[0][0])
                prediction[:, [2, 4]] = torch.clamp(prediction[:, [2, 4]], 0.0,
                                                    im_dim_list_list[0][1])
                # print prediction
                # UDP send
                # if self.send_by_UDP:
                #     self.UDP.send_message(timeArray_H_M_S_MS, prediction.cpu().numpy().tolist())
                # draw Image
                if self.draw_res:
                    result = self.write(prediction, cv_image)
                # pub.publish(self.boxes)

        except CvBridgeError as e:
            print(e)
        # return prediction
        #
        # # cv2.imshow("image windows", result)
        # # cv2.waitKey(3)
        #

        try:
            # prediction = prediction.cpu().numpy().tolist()
            # boxes = self.boxes.bounding_boxes()
            # print(type(prediction))
            boxes = BoundingBoxes()
            if type(prediction) == int:
                detec_len = 0
            else:
                detec_len = len(prediction)
            for i in range(detec_len):
                box = BoundingBox()
                box.num = prediction[i][0]
                box.xmin = prediction[i][1]
                box.ymin = prediction[i][2]
                box.xmax = prediction[i][3]
                box.ymax = prediction[i][4]
                box.probability = prediction[i][6]
                box.id = "{0}".format(self.classes[int(prediction[i][7])])
                #self.box_pub.publish(self.box)

                boxes.bounding_boxes.append(box)
            boxes.objNum = detec_len
            boxes.header.stamp = rospy.Time.now()
            self.image_pub.publish(self.bridge.cv2_to_imgmsg(result, "bgr8"))

            self.boxes_pub.publish(boxes)

        except CvBridgeError as e:
            print(e)
        # time.sleep(0.05)
        print('yolo use:', time.time() - startt)
Exemple #22
0
class YOLO3(object):
    def __init__(self,
                 cfgfile,
                 weightfile,
                 namesfile,
                 use_cuda=True,
                 is_plot=False,
                 is_xywh=False):
        # net definition
        self.net = Darknet(cfgfile)
        self.net.load_weights(weightfile)
        print('Loading weights from %s... Done!' % (weightfile))
        self.device = "cuda" if use_cuda else "cpu"
        self.net.eval()
        self.net.to(self.device)

        # constants
        self.size = self.net.width, self.net.height
        self.conf_thresh = 0.5
        self.nms_thresh = 0.4
        self.use_cuda = use_cuda
        self.is_plot = is_plot
        self.is_xywh = is_xywh
        self.class_names = self.load_class_names(namesfile)

    def __call__(self, ori_img):
        # img to tensor
        assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
        img = ori_img.astype(np.float) / 255.
        img = cv2.resize(img, self.size)
        img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0)
        # forward
        with torch.no_grad():
            img = img.to(self.device)
            out_boxes = self.net(img)
            boxes = get_all_boxes(out_boxes, self.conf_thresh,
                                  self.net.num_classes, self.use_cuda)[0]
            boxes = nms(boxes, self.nms_thresh)
            # print(boxes)
        # plot boxes
        if self.is_plot:
            return self.plot_bbox(ori_img, boxes)
        if len(boxes) == 0:
            return None, None, None

        height, width = ori_img.shape[:2]
        boxes = np.vstack(boxes)
        bbox = np.empty_like(boxes[:, :4])
        if self.is_xywh:
            # bbox x y w h
            bbox[:, 0] = boxes[:, 0] * width
            bbox[:, 1] = boxes[:, 1] * height
            bbox[:, 2] = boxes[:, 2] * width
            bbox[:, 3] = boxes[:, 3] * height
        else:
            # bbox xmin ymin xmax ymax
            bbox[:, 0] = (boxes[:, 0] - boxes[:, 2] / 2.0) * width
            bbox[:, 1] = (boxes[:, 1] - boxes[:, 3] / 2.0) * height
            bbox[:, 2] = (boxes[:, 0] + boxes[:, 2] / 2.0) * width
            bbox[:, 3] = (boxes[:, 1] + boxes[:, 3] / 2.0) * height
        cls_conf = boxes[:, 5]
        cls_ids = boxes[:, 6]
        return bbox, cls_conf, cls_ids

    def load_class_names(self, namesfile):
        with open(namesfile, 'r', encoding='utf8') as fp:
            class_names = [line.strip() for line in fp.readlines()]
        return class_names

    def plot_bbox(self, ori_img, boxes):
        img = ori_img
        height, width = img.shape[:2]
        for box in boxes:
            # get x1 x2 x3 x4
            x1 = int(round(((box[0] - box[2] / 2.0) * width).item()))
            y1 = int(round(((box[1] - box[3] / 2.0) * height).item()))
            x2 = int(round(((box[0] + box[2] / 2.0) * width).item()))
            y2 = int(round(((box[1] + box[3] / 2.0) * height).item()))
            cls_conf = box[5]
            cls_id = box[6]
            # import random
            # color = random.choices(range(256),k=3)
            color = [int(x) for x in np.random.randint(256, size=3)]
            # put texts and rectangles
            img = cv2.putText(img, self.class_names[cls_id], (x1, y1),
                              cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
            img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
        return img
Exemple #23
0
class YOLOv3(object):
    def __init__(self,
                 cfgfile,
                 weightfile,
                 namesfile,
                 score_thresh=0.7,
                 conf_thresh=0.01,
                 nms_thresh=0.45,
                 is_xywh=False,
                 use_cuda=True):
        # net definition
        self.net = Darknet(cfgfile)
        self.net.load_weights(weightfile)
        logger = logging.getLogger("root.detector")
        logger.info('Loading weights from %s... Done!' % (weightfile))
        self.device = "cuda" if use_cuda else "cpu"
        self.net.eval()
        self.net.to(self.device)

        # constants
        self.size = self.net.width, self.net.height
        self.score_thresh = score_thresh
        self.conf_thresh = conf_thresh
        self.nms_thresh = nms_thresh
        self.use_cuda = use_cuda
        self.is_xywh = is_xywh
        self.num_classes = self.net.num_classes
        self.class_names = self.load_class_names(namesfile)

    def __call__(self, ori_img):
        # img to tensor
        assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
        img = ori_img.astype(np.float) / 255.

        img = cv2.resize(img, self.size)
        img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0)

        # forward
        with torch.no_grad():
            img = img.to(self.device)
            out_boxes = self.net(img)
            boxes = get_all_boxes(out_boxes,
                                  self.conf_thresh,
                                  self.num_classes,
                                  use_cuda=self.use_cuda)  # batch size is 1
            # boxes = nms(boxes, self.nms_thresh)

            boxes = post_process(boxes, self.net.num_classes, self.conf_thresh,
                                 self.nms_thresh)[0].cpu()
            boxes = boxes[boxes[:, -2] >
                          self.score_thresh, :]  # bbox xmin ymin xmax ymax

        if len(boxes) == 0:
            bbox = torch.FloatTensor([]).reshape([0, 4])
            cls_conf = torch.FloatTensor([])
            cls_ids = torch.LongTensor([])
        else:
            height, width = ori_img.shape[:2]
            bbox = boxes[:, :4]
            if self.is_xywh:
                # bbox x y w h
                bbox = xyxy_to_xywh(bbox)

            bbox = bbox * torch.FloatTensor([[width, height, width, height]])
            cls_conf = boxes[:, 5]
            cls_ids = boxes[:, 6].long()
        return bbox.numpy(), cls_conf.numpy(), cls_ids.numpy()

    def load_class_names(self, namesfile):
        with open(namesfile, 'r', encoding='utf8') as fp:
            class_names = [line.strip() for line in fp.readlines()]
        return class_names
Exemple #24
0
def valid(datacfg, modelcfg, weightfile):

    # Parameters
    options = read_data_cfg(datacfg)
    dataDir = options['dataDir']
    meshname = options['mesh']
    name = options['name']
    filetype = options['rgbfileType']
    fx = float(options['fx'])
    fy = float(options['fy'])
    u0 = float(options['u0'])
    v0 = float(options['v0'])
    seed = int(time.time())
    gpus = options['gpus']
    img_width = 640
    img_height = 480
    torch.manual_seed(seed)

    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)

    visualize = True
    num_classes = 1
    conf_thresh = 0.5
    # nms_thresh   = 0.4
    # match_thresh = 0.5

    # Read object model information, get 3D bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)

    # Read intrinsic camera parameters
    internal_calibration = get_camera_intrinsic(u0, v0, fx, fy)

    # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
    model = Darknet(modelcfg)
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # apply transformation on the input images
    transform = transforms.Compose([
        transforms.ToTensor(),
        # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        # transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # read still images as per the test set
    with open(os.path.join(dataDir, 'test.txt'), 'r') as file:
        lines = file.readlines()
    imgindex = lines[2].rstrip()
    imgpath = os.path.join(dataDir, 'rgb', str(imgindex) + filetype)

    # read image for visualization
    img = cv2.imread(imgpath)
    # cv2.imshow('yolo6d', img), # cv2.waitKey(1)

    # read images usin PIL
    img_ = Image.open(imgpath).convert('RGB')
    img_ = img_.resize((img_width, img_height))
    t1 = time.time()

    # transform into Tensor
    img_ = transform(img_)
    data = Variable(img_).cuda().unsqueeze(0)
    t2 = time.time()

    # Forward pass
    output = model(data).data
    t3 = time.time()

    # Using confidence threshold, eliminate low-confidence predictions
    all_boxes = get_region_boxes2(output, conf_thresh, num_classes)
    # all_boxes = do_detect(model, img, 0.1, 0.4)
    t4 = time.time()

    # For each image, get all the predictions
    allBoxes = []
    boxes = all_boxes[0]
    print(len(boxes) - 1, 'onigiri(s) found')
    for j in range(len(boxes) - 1):

        # ignore 1st box (NOTE: not sure why its incorrect)
        box_pr = boxes[j + 1]

        # Denormalize the corner predictions
        corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]),
                                dtype='float32')
        corners2D_pr[:, 0] = corners2D_pr[:, 0] * img_width
        corners2D_pr[:, 1] = corners2D_pr[:, 1] * img_height

        # Compute [R|t] by PnP
        R_pr, t_pr = pnp(
            np.array(np.transpose(
                np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)),
                     dtype='float32'), corners2D_pr,
            np.array(internal_calibration, dtype='float32'))
        Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
        proj_corners_pr = np.transpose(
            compute_projection(corners3D, Rt_pr, internal_calibration))

        allBoxes.append(proj_corners_pr)

    t5 = time.time()

    # Visualize
    if visualize:
        # Projections
        for corner in allBoxes:
            color = (0, 0, 255)
            linewidth = 2
            img = cv2.line(img, tuple(corner[0]), tuple(corner[1]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[0]), tuple(corner[2]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[0]), tuple(corner[4]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[1]), tuple(corner[3]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[1]), tuple(corner[5]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[2]), tuple(corner[3]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[2]), tuple(corner[6]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[3]), tuple(corner[7]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[4]), tuple(corner[5]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[4]), tuple(corner[6]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[5]), tuple(corner[7]), color,
                           linewidth)
            img = cv2.line(img, tuple(corner[6]), tuple(corner[7]), color,
                           linewidth)
        cv2.imshow('yolo6d pose', img)
        key = cv2.waitKey(10000) & 0xFF
        if key == 27:
            print('stopping, keyboard interrupt')
            sys.exit()
class Patch():
    def __init__(self, config, device):
        self.config = config
        self.device = device

        # Create pytorch3D renderer
        self.renderer = self.create_renderer()

        # Datasets
        self.mesh_dataset = MeshDataset(config.mesh_dir, device)
        self.bg_dataset = BackgroundDataset(config.bg_dir,
                                            config.img_size,
                                            max_num=config.num_bgs)
        self.test_bg_dataset = BackgroundDataset(config.test_bg_dir,
                                                 config.img_size,
                                                 max_num=config.num_test_bgs)

        # Initialize adversarial patch, and TV loss
        #self.patch = torch.rand((100, 100, 3), device=device, requires_grad=True)
        self.total_variation = TotalVariation().to(device)
        self.patch = torch.load("data/patch_save_2.pt").to(device)
        self.idx = torch.load("data/idx_save_2.pt").to(device)

        # Yolo model:
        self.dnet = Darknet(self.config.cfgfile)
        self.dnet.load_weights(self.config.weightfile)
        self.dnet = self.dnet.eval()
        self.dnet = self.dnet.to(self.device)

    def attack(self):
        train_bgs = DataLoader(self.bg_dataset,
                               batch_size=self.config.batch_size,
                               shuffle=True,
                               num_workers=1)
        mesh = self.mesh_dataset.meshes[0]
        print(self.patch.shape)
        total_variation = TotalVariation().cuda()
        optimizer = torch.optim.SGD([self.patch], lr=1.0, momentum=0.9)

        for epoch in range(self.config.epochs):
            ep_loss = 0.0
            ep_acc = 0.0
            n = 0.0

            for mesh in self.mesh_dataset:
                # Copy mesh for each camera angle
                mesh = mesh.extend(self.num_angles)
                #mesh_texture = mesh.textures.maps_padded()
                #c = 0
                for bg_batch in train_bgs:
                    #c = c+1
                    #print('iter'+ str(c))
                    bg_batch = bg_batch.to(self.device)

                    optimizer.zero_grad()

                    # Apply patch to mesh texture (hard coded for now)
                    #mesh_texture[:, 575:675, 475:575, :] = self.patch[None]

                    texture_image = mesh.textures.atlas_padded()
                    mesh.textures._atlas_padded[0,
                                                self.idx, :, :, :] = self.patch

                    mesh.textures.atlas = mesh.textures._atlas_padded
                    mesh.textures._atlas_list = None

                    # Render mesh onto background image
                    # images = self.render_mesh_on_bg(mesh, bg)
                    #images = self.render_mesh_on_bg_batch(mesh, bg_batch)
                    rand_translation = torch.randint(-100, 100, (2, ))
                    images = self.render_mesh_on_bg_batch(
                        mesh,
                        bg_batch,
                        x_translation=rand_translation[0].item(),
                        y_translation=rand_translation[1].item())
                    # print('images: ', images.shape)
                    reshape_img = images[:, :, :, :3].permute(0, 3, 1, 2)
                    reshape_img = reshape_img.to(self.device)

                    # Run detection model on images
                    output = self.dnet(reshape_img)

                    # Compute losses:
                    d_loss = dis_loss(output, self.dnet.num_classes,
                                      self.dnet.anchors, self.dnet.num_anchors,
                                      0)
                    acc_loss = calc_acc(output, self.dnet.num_classes,
                                        self.dnet.num_anchors, 0)

                    tv = self.total_variation(self.patch)
                    tv_loss = tv * 2.5

                    loss = d_loss + torch.sum(
                        torch.max(tv_loss,
                                  torch.tensor(0.1).to(self.device)))

                    ep_loss += loss.item()
                    ep_acc += acc_loss.item()

                    n += bg_batch.shape[0]

                    #TODO: Remove Retain Graph
                    loss.backward(retain_graph=True)
                    optimizer.step()

            # Save image and print performance statistics
            patch_save = self.patch.cpu().detach().clone()
            idx_save = self.idx.cpu().detach().clone()
            # torch.save(patch_save, 'patch_save.pt')
            # torch.save(idx_save, 'idx_save.pt')
            #save_image(self.patch.cpu().detach().permute(2, 0, 1), self.config.output + '_{}.png'.format(epoch))
            print('epoch={} loss={} success_rate={}'.format(
                epoch, (ep_loss / n), (ep_acc / n) / self.num_angles))
            self.test_patch()
            #TODO: Pass the variable value
            if epoch % 10 == 0:
                self.test_patch_faster_rcnn(
                    path_to_checkpoint="faster_rcnn/model-180000.pth",
                    dataset_name="coco2017",
                    backbone_name="resnet101",
                    prob_thresh=0.6)

    def test_patch(self):
        angle_success = torch.zeros(self.num_angles)
        total_loss = 0.0
        n = 0.0
        for mesh in self.mesh_dataset:
            mesh = mesh.extend(self.num_angles)
            #mesh_texture = mesh.textures.maps_padded()
            for bg in self.test_bg_dataset:

                #mesh_texture[:, 575:675, 475:575, :] = self.patch[None]
                texture_image = mesh.textures.atlas_padded()
                mesh.textures._atlas_padded[0, self.idx, :, :, :] = self.patch

                mesh.textures.atlas = mesh.textures._atlas_padded
                mesh.textures._atlas_list = None

                #images = self.render_mesh_on_bg(mesh, bg)

                rand_translation = torch.randint(-100, 100, (2, ))
                images = self.render_mesh_on_bg(
                    mesh,
                    bg,
                    x_translation=rand_translation[0].item(),
                    y_translation=rand_translation[1].item())

                reshape_img = images[:, :, :, :3].permute(0, 3, 1, 2)
                reshape_img = reshape_img.to(self.device)
                output = self.dnet(reshape_img)

                d_loss = dis_loss(output, self.dnet.num_classes,
                                  self.dnet.anchors, self.dnet.num_anchors, 0)

                for angle in range(self.num_angles):
                    acc_loss = calc_acc(output[angle], self.dnet.num_classes,
                                        self.dnet.num_anchors, 0)
                    angle_success[angle] += acc_loss.item()

                tv = self.total_variation(self.patch)
                tv_loss = tv * 2.5

                loss = d_loss + torch.sum(
                    torch.max(tv_loss,
                              torch.tensor(0.1).to(self.device)))

                total_loss += loss.item()
                n += 1.0

        unseen_success_rate = angle_success.mean() / len(self.test_bg_dataset)
        print('Unseen bg success rate: ', unseen_success_rate.item())

    def test_patch_faster_rcnn(self, path_to_checkpoint: str,
                               dataset_name: str, backbone_name: str,
                               prob_thresh: float):
        #TODO: Make it for general model(even though this might be difficult)
        dataset_class = DatasetBase.from_name(dataset_name)
        backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
        model = FasterRCNN(
            backbone,
            dataset_class.num_classes(),
            pooler_mode=Config.POOLER_MODE,
            anchor_ratios=Config.ANCHOR_RATIOS,
            anchor_sizes=Config.ANCHOR_SIZES,
            rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
            rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
        model.load(path_to_checkpoint)

        angle_success = torch.zeros(self.num_angles)
        total_loss = 0.0
        with torch.no_grad():
            for mesh in self.mesh_dataset:
                mesh = mesh.extend(self.num_angles)
                #mesh_texture = mesh.textures.maps_padded()
                n = 0.0
                for bg in self.test_bg_dataset:

                    texture_image = mesh.textures.atlas_padded()
                    mesh.textures._atlas_padded[0,
                                                self.idx, :, :, :] = self.patch

                    mesh.textures.atlas = mesh.textures._atlas_padded
                    mesh.textures._atlas_list = None

                    images = self.render_mesh_on_bg(mesh, bg)
                    reshape_img = images[:, :, :, :3].permute(0, 3, 1, 2)
                    #reshape_img = reshape_img.to(self.device)

                    for angle in range(self.num_angles):
                        save_image(reshape_img[angle].cpu().detach(),
                                   "out/tmp.png")
                        image = T.transforms.Image.open("out/tmp.png")
                        image_tensor, scale = dataset_class.preprocess(
                            image, Config.IMAGE_MIN_SIDE,
                            Config.IMAGE_MAX_SIDE)

                        detection_bboxes, detection_classes, detection_probs, _ = \
                            model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
                        detection_bboxes /= scale

                        kept_indices = detection_probs > prob_thresh
                        detection_bboxes = detection_bboxes[kept_indices]
                        detection_classes = detection_classes[kept_indices]
                        detection_probs = detection_probs[kept_indices]

                        draw = ImageDraw.Draw(image)

                        for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                                   detection_classes.tolist(),
                                                   detection_probs.tolist()):
                            color = random.choice([
                                'red', 'green', 'blue', 'yellow', 'purple',
                                'white'
                            ])
                            bbox = BBox(left=bbox[0],
                                        top=bbox[1],
                                        right=bbox[2],
                                        bottom=bbox[3])
                            category = dataset_class.LABEL_TO_CATEGORY_DICT[
                                cls]

                            draw.rectangle(((bbox.left, bbox.top),
                                            (bbox.right, bbox.bottom)),
                                           outline=color)
                            draw.text((bbox.left, bbox.top),
                                      text=f'{category:s} {prob:.3f}',
                                      fill=color)
                        if angle == 0:
                            image.save("out/images/test_%d.png" % n)

                        #angle_success[angle] += success
                    save_image(reshape_img[0].cpu().detach(),
                               "rendered_output.png")
                    n += 1.0

        unseen_success_rate = angle_success.mean() / len(self.test_bg_dataset)
        print('Unseen model (faster_rcnn) success rate: ',
              unseen_success_rate.item())

    def create_renderer(self):
        self.num_angles = self.config.num_angles
        azim = torch.linspace(-1 * self.config.angle_range,
                              self.config.angle_range, self.num_angles)

        R, T = look_at_view_transform(dist=1.0, elev=0, azim=azim)

        T[:, 1] = -85
        T[:, 2] = 200

        cameras = FoVPerspectiveCameras(device=self.device, R=R, T=T)

        raster_settings = RasterizationSettings(
            image_size=self.config.img_size,
            blur_radius=0.0,
            faces_per_pixel=1,
        )

        lights = PointLights(device=self.device, location=[[0.0, 85, 100.0]])

        renderer = MeshRenderer(rasterizer=MeshRasterizer(
            cameras=cameras, raster_settings=raster_settings),
                                shader=HardPhongShader(device=self.device,
                                                       cameras=cameras,
                                                       lights=lights))
        return renderer

    def render_mesh_on_bg(self,
                          mesh,
                          bg_img,
                          location=None,
                          x_translation=0,
                          y_translation=0):
        images = self.renderer(mesh)
        bg = bg_img.unsqueeze(0)
        bg_shape = bg.shape
        new_bg = torch.zeros(bg_shape[2], bg_shape[3], 3)
        new_bg[:, :, 0] = bg[0, 0, :, :]
        new_bg[:, :, 1] = bg[0, 1, :, :]
        new_bg[:, :, 2] = bg[0, 2, :, :]

        human = images[:, ..., :3]

        human_size = self.renderer.rasterizer.raster_settings.image_size

        if location is None:
            dH = bg_shape[2] - human_size
            dW = bg_shape[3] - human_size
            location = (dW // 2 + x_translation,
                        dW - (dW // 2) - x_translation,
                        dH // 2 + y_translation,
                        dH - (dH // 2) - y_translation)

        contour = torch.where((human == 1).cpu(),
                              torch.zeros(1).cpu(),
                              torch.ones(1).cpu())
        new_contour = torch.zeros(self.num_angles, bg_shape[2], bg_shape[3], 3)

        new_contour[:, :, :, 0] = F.pad(contour[:, :, :, 0],
                                        location,
                                        "constant",
                                        value=0)
        new_contour[:, :, :, 1] = F.pad(contour[:, :, :, 1],
                                        location,
                                        "constant",
                                        value=0)
        new_contour[:, :, :, 2] = F.pad(contour[:, :, :, 2],
                                        location,
                                        "constant",
                                        value=0)

        new_human = torch.zeros(self.num_angles, bg_shape[2], bg_shape[3], 3)
        new_human[:, :, :, 0] = F.pad(human[:, :, :, 0],
                                      location,
                                      "constant",
                                      value=0)
        new_human[:, :, :, 1] = F.pad(human[:, :, :, 1],
                                      location,
                                      "constant",
                                      value=0)
        new_human[:, :, :, 2] = F.pad(human[:, :, :, 2],
                                      location,
                                      "constant",
                                      value=0)

        final = torch.where((new_contour == 0).cpu(), new_bg.cpu(),
                            new_human.cpu())
        return final

    def render_mesh_on_bg_batch(self,
                                mesh,
                                bg_imgs,
                                location=None,
                                x_translation=0,
                                y_translation=0):
        num_bgs = bg_imgs.shape[0]

        images = self.renderer(mesh)  # (num_angles, 416, 416, 4)

        save_image(images[0, ..., :3].cpu().detach().permute(2, 0, 1),
                   "rendered_output_here.png")
        images = torch.cat(num_bgs * [images],
                           dim=0)  # (num_angles * num_bgs, 416, 416, 4)

        bg_shape = bg_imgs.shape

        # bg_imgs: (num_bgs, 3, 416, 416) -> (num_bgs, 416, 416, 3)
        bg_imgs = bg_imgs.permute(0, 2, 3, 1)

        # bg_imgs: (num_bgs, 416, 416, 3) -> (num_bgs * num_angles, 416, 416, 3)
        bg_imgs = bg_imgs.repeat_interleave(repeats=self.num_angles, dim=0)

        # human: RGB channels of render (num_angles * num_bgs, 416, 416, 3)
        human = images[:, ..., :3]
        human_size = self.renderer.rasterizer.raster_settings.image_size

        if location is None:
            dH = bg_shape[2] - human_size
            dW = bg_shape[3] - human_size
            location = (dW // 2 + x_translation,
                        dW - (dW // 2) - x_translation,
                        dH // 2 + y_translation,
                        dH - (dH // 2) - y_translation)

        contour = torch.where((human == 1),
                              torch.zeros(1).to(self.device),
                              torch.ones(1).to(self.device))
        new_contour = torch.zeros(self.num_angles * num_bgs,
                                  bg_shape[2],
                                  bg_shape[3],
                                  3,
                                  device=self.device)

        new_contour[:, :, :, 0] = F.pad(contour[:, :, :, 0],
                                        location,
                                        "constant",
                                        value=0)
        new_contour[:, :, :, 1] = F.pad(contour[:, :, :, 1],
                                        location,
                                        "constant",
                                        value=0)
        new_contour[:, :, :, 2] = F.pad(contour[:, :, :, 2],
                                        location,
                                        "constant",
                                        value=0)

        new_human = torch.zeros(self.num_angles * num_bgs,
                                bg_shape[2],
                                bg_shape[3],
                                3,
                                device=self.device)
        new_human[:, :, :, 0] = F.pad(human[:, :, :, 0],
                                      location,
                                      "constant",
                                      value=0)
        new_human[:, :, :, 1] = F.pad(human[:, :, :, 1],
                                      location,
                                      "constant",
                                      value=0)
        new_human[:, :, :, 2] = F.pad(human[:, :, :, 2],
                                      location,
                                      "constant",
                                      value=0)

        # output: (num_angles * num_bgs, 416, 416, 3)
        final = torch.where((new_contour == 0).cpu(), bg_imgs.cpu(),
                            new_human.cpu())
        return final
def main(args):
    '''' main
    '''
    # Image preprocessing
    transform = transforms.Compose([transforms.ToTensor()])

    num_classes = 80
    yolov3 = Darknet(args.cfg_file)
    yolov3.load_weights(args.weights_file)
    yolov3.net_info["height"] = args.reso
    inp_dim = int(yolov3.net_info["height"])
    assert inp_dim % 32 == 0
    assert inp_dim > 32
    print("yolo-v3 network successfully loaded")

    attribute_size = [15, 7, 3, 5, 8, 4, 15, 7, 3, 5, 3, 3, 4]

    encoder = EncoderClothing(args.embed_size, device, args.roi_size,
                              attribute_size)

    yolov3.to(device)
    encoder.to(device)

    yolov3.eval()
    encoder.eval()

    encoder.load_state_dict(torch.load(args.encoder_path))

    # cap = cv2.VideoCapture('demo2.mp4')

    cap = cv2.VideoCapture(0)
    assert cap.isOpened(), "Cannot capture source"

    frames = 0
    start = time.time()

    counter = Counter()
    color_stream = list()
    pattern_stream = list()
    gender_stream = list()
    season_stream = list()
    class_stream = list()
    sleeves_stream = list()

    ret, frame = cap.read()
    if ret:

        image, orig_img, dim = prep_image2(frame, inp_dim)
        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        image_tensor = image.to(device)
    detections = yolov3(image_tensor, device, True)

    os.system("clear")
    cv2.imshow("frame", orig_img)
    cv2.moveWindow("frame", 50, 50)
    text_img = np.zeros((200, 1750, 3))
    cv2.imshow("text", text_img)
    cv2.moveWindow("text", 50, dim[1] + 110)

    while cap.isOpened():

        ret, frame = cap.read()
        if ret:

            image, orig_img, dim = prep_image2(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)

            image_tensor = image.to(device)
            im_dim = im_dim.to(device)

            # Generate an caption from the image
            # prediction mode for yolo-v3
            detections = yolov3(image_tensor, device, True)
            detections = write_results(
                detections,
                args.confidence,
                device,
                num_classes,
                nms=True,
                nms_conf=args.nms_thresh,
            )

            # original image dimension --> im_dim

            # view_image(detections)
            text_img = np.zeros((200, 1750, 3))

            if type(detections) != int:
                if detections.shape[0]:
                    bboxs = detections[:, 1:5].clone()

                    im_dim = im_dim.repeat(detections.shape[0], 1)
                    scaling_factor = torch.min(inp_dim / im_dim,
                                               1)[0].view(-1, 1)

                    detections[:, [1, 3]] -= (inp_dim - scaling_factor *
                                              im_dim[:, 0].view(-1, 1)) / 2
                    detections[:, [2, 4]] -= (inp_dim - scaling_factor *
                                              im_dim[:, 1].view(-1, 1)) / 2

                    detections[:, 1:5] /= scaling_factor

                    small_object_ratio = \
                        torch.FloatTensor(detections.shape[0])

                    for i in range(detections.shape[0]):
                        detections[i, [1, 3]] = torch.clamp(
                            detections[i, [1, 3]], 0.0, im_dim[i, 0])
                        detections[i, [2, 4]] = torch.clamp(
                            detections[i, [2, 4]], 0.0, im_dim[i, 1])

                        object_area = (detections[i, 3] - detections[i, 1]) * (
                            detections[i, 4] - detections[i, 2])
                        orig_img_area = im_dim[i, 0] * im_dim[i, 1]
                        small_object_ratio[i] = object_area / orig_img_area

                    detections = detections[small_object_ratio > 0.05]
                    im_dim = im_dim[small_object_ratio > 0.05]

                    if detections.size(0) > 0:
                        feature = yolov3.get_feature()
                        feature = feature.repeat(detections.size(0), 1, 1, 1)

                        orig_img_dim = im_dim[:, 1:]
                        orig_img_dim = orig_img_dim.repeat(1, 2)

                        scaling_val = 16

                        bboxs /= scaling_val
                        bboxs = bboxs.round()
                        bboxs_index = torch.arange(bboxs.size(0),
                                                   dtype=torch.int)
                        bboxs_index = bboxs_index.to(device)
                        bboxs = bboxs.to(device)

                        roi_align = RoIAlign(args.roi_size,
                                             args.roi_size,
                                             transform_fpcoor=True).to(device)
                        roi_features = roi_align(feature, bboxs, bboxs_index)

                        outputs = encoder(roi_features)

                        for i in range(detections.shape[0]):

                            sampled_caption = []
                            # attr_fc = outputs[]
                            for j in range(len(outputs)):
                                max_index = torch.max(outputs[j][i].data, 0)[1]
                                word = attribute_pool[j][max_index]
                                sampled_caption.append(word)

                            sentence = " ".join(sampled_caption)

                            sys.stdout.write("                     " + "\r")

                            sys.stdout.write(sentence + "            " + "\r")
                            sys.stdout.flush()
                            write(
                                detections[i],
                                orig_img,
                                sentence,
                                i + 1,
                                coco_classes,
                                colors,
                            )

                            cv2.putText(
                                text_img,
                                sentence,
                                (0, i * 40 + 35),
                                cv2.FONT_HERSHEY_PLAIN,
                                2,
                                [255, 255, 255],
                                1,
                            )

            cv2.imshow("frame", orig_img)
            cv2.imshow("text", text_img)

            key = cv2.waitKey(1)
            if key & 0xFF == ord("q"):
                break
            if key & 0xFF == ord("w"):
                wait(0)
            if key & 0xFF == ord("s"):
                continue
            frames += 1
            # print("FPS of the video is {:5.2f}".
            # format( frames / (time.time() - start)))

        else:
            break
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(
            src_dir) if x.endswith('.jpg')]

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(
                orig_img[pt_1[1]: pt_2[1],
                         pt_1[0]: pt_2[0]][:, :, ::-1])
            # ROI.show()

            # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(orig_img, labels[i], (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                        cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2)

    def process_predict(self,
                        prediction,
                        prob_th,
                        num_cls,
                        nms_th,
                        inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor
        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:
            output[:, [1, 3]] -= (inp_dim - scaling_factor *
                                  orig_img_size[0]) / 2.0  # x, w
            output[:, [2, 4]] -= (inp_dim - scaling_factor *
                                  orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(
                    output[i, [1, 3]], 0.0, orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(
                    output[i, [2, 4]], 0.0, orig_img_size[1])
        return output

    def detect_classify(self):
        """
        detect and classify
        """
        for x in self.imgs_path:
            # read image data
            img = Image.open(x)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction,
                                          self.prob_th,
                                          self.num_classes,
                                          self.nms_th,
                                          self.inp_dim,
                                          orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(
                img), cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                self.cls_draw_bbox(output, orig_img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                if not os.path.exists(dst_path):
                    cv2.imwrite(dst_path, orig_img)
Exemple #28
0
# start Flask application
app = Flask(__name__)
CORS(app)

if METHOD is 'yolo_608_coco':
    MODEL = Darknet(YOLOV3_608_CFG_PATH)

elif METHOD is 'yolo_416_coco':
    MODEL = Darknet(YOLOV3_416_CFG_PATH)

else:
    raise Exception(f'Undefined method: "{METHOD}"')

MODEL.load_weights(YOLOV3_WEIGHTS_PATH)
MODEL.eval()

assert os.path.exists(
    PROJECT_PATH
), f'{PROJECT_PATH} does not exist. Consider to git clone the repo.'

# if there is no folder for archiving, create
if not os.path.exists(ARCHIVE_PATH):
    os.makedirs(ARCHIVE_PATH)


def show_image_w_bboxes_for_server(img_path, model, orientation):
    '''
    Reads an image from the disk and applies a detection algorithm specified in model.

    Arguments
Exemple #29
0
def selection(x, rec, privacy, detected_obj):
    if x[2] in privacy:
        print('[DETECT] {}'.format(x[2]))
        detected_obj.append(x[2])
        up = x[0][1].item()
        left = x[0][0].item()
        height = (x[1][1] - up).item()
        width = (x[1][0] - left).item()
        rec.append([up, left, height, width])

    return rec, detected_obj


if __name__ == '__main__':
    from utils.util import load_classes, write_results
    from darknet import Darknet
    from utils.preprocess import prep_image, inp_to_image
    image = cv2.imread('imgs/dog.jpg')
    conf = 0.5
    nms = 0.4
    rec = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    detecter = Darknet('cfgs/yolov3.cfg')
    detecter.load_weights('weights/yolov3.weights')
    detecter.to(device)
    detecter.eval()

    rec = yolo_detecter(image, detecter, conf, nms, rec, device)
    print(rec)
Exemple #30
0
print("Loading network.....")
model = Darknet(args.cfgfile)
model.load_weights(args.weightsfile)
print("Network successfully loaded")

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32

# If there's a GPU availible, put the model on GPU
if CUDA:
    model.cuda()

# Set the model in evaluation mode
model.eval()

read_dir = time.time()
# Detection phase
try:
    imlist = [
        osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)
    ]
except NotADirectoryError:
    imlist = []
    imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
    print("No file or directory with the name {}".format(images))
    exit()

if not os.path.exists(args.det):
Exemple #31
0
def test(datacfg, cfgfile, weightfile, imgfile):

    # ******************************************#
    #			PARAMETERS PREPARATION			#
    # ******************************************#

    #parse configuration files
    options = read_data_cfg(datacfg)
    meshname = options['mesh']
    name = options['name']

    #Parameters for the network
    seed = int(time.time())
    gpus = '0'  # define gpus to use
    test_width = 544  # define test image size
    test_height = 544
    torch.manual_seed(seed)  # seed torch random
    use_cuda = True
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)  # seed cuda random
    conf_thresh = 0.1
    num_classes = 1

    # Read object 3D model, get 3D Bounding box corners
    mesh = MeshPly(meshname)
    vertices = np.c_[np.array(mesh.vertices),
                     np.ones((len(mesh.vertices), 1))].transpose()
    corners3D = get_3D_corners(vertices)
    diam = float(options['diam'])

    # now configure camera intrinsics
    internal_calibration = get_camera_intrinsic()

    # ******************************************#
    #	NETWORK CREATION						#
    # ******************************************#

    # Create the network based on cfg file
    model = Darknet(cfgfile)
    model.print_network()
    model.load_weights(weightfile)
    model.cuda()
    model.eval()

    # ******************************************#
    #	INPUT IMAGE PREPARATION FOR NN 			#
    # ******************************************#

    # Now prepare image: convert to RGB, resize, transform to Tensor
    # use cuda,
    img = Image.open(imgfile).convert('RGB')
    ori_size = img.size  # store original size
    img = img.resize((test_width, test_height))
    t1 = time.time()
    img = transforms.Compose([
        transforms.ToTensor(),
    ])(img)  #.float()
    img = Variable(img, requires_grad=True)
    img = img.unsqueeze(0)  # add a fake batch dimension
    img = img.cuda()

    # ******************************************#
    #	PASS IT TO NETWORK AND GET PREDICTION	#
    # ******************************************#

    # Forward pass
    output = model(img).data
    #print("Output Size: {}".format(output.size(0)))
    t2 = time.time()

    # ******************************************#
    #		EXTRACT PREDICTIONS 				#
    # ******************************************#

    # Using confidence threshold, eliminate low-confidence predictions
    # and get only boxes over the confidence threshold
    all_boxes = get_region_boxes(output, conf_thresh, num_classes)

    boxes = all_boxes[0]

    # iterate through boxes to find the one with highest confidence
    best_conf_est = -1
    best_box_index = -1
    for j in range(len(boxes)):
        # the confidence is in index = 18
        if (boxes[j][18] > best_conf_est):
            box_pr = boxes[j]  # get bounding box
            best_conf_est = boxes[j][18]
            best_box_index = j
    #print("Best box is: {} and 2D prediction is {}".format(best_box_index,box_pr))

    # Denormalize the corner predictions
    # This are the predicted 2D points with which a bounding cube can be drawn
    corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
    corners2D_pr[:, 0] = corners2D_pr[:, 0] * ori_size[0]  # Width
    corners2D_pr[:, 1] = corners2D_pr[:, 1] * ori_size[1]  # Height
    t3 = time.time()

    # **********************************************#
    #	GET OBJECT POSE ESTIMATION					#
    #  Remember the problem in 6D Pose estimation 	#
    #  is exactly to estimate the pose - position 	#
    #  and orientation of the object of interest 	#
    #  with reference to a camera frame. That is 	#
    #  why although the 2D projection of the 3D 	#
    #  bounding cube are ready, we still need to  	#
    #  compute the rotation matrix -orientation- 	#
    #  and a translation vector -position- for the  #
    #  object 										#
    #											 	#
    # **********************************************#

    # get rotation matrix and transform
    R_pr, t_pr = pnp(
        np.array(np.transpose(
            np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)),
                 dtype='float32'), corners2D_pr,
        np.array(internal_calibration, dtype='float32'))
    t4 = time.time()

    # ******************************************#
    #	DISPLAY IMAGE WITH BOUNDING CUBE		#
    # ******************************************#

    # Reload Original img
    img = cv2.imread(imgfile)

    # create a window to display image
    wname = "Prediction"
    cv2.namedWindow(wname)
    # draw each predicted 2D point
    for i, (x, y) in enumerate(corners2D_pr):
        # get colors to draw the lines
        col1 = 28 * i
        col2 = 255 - (28 * i)
        col3 = np.random.randint(0, 256)
        cv2.circle(img, (x, y), 3, (col1, col2, col3), -1)
        cv2.putText(img, str(i), (int(x) + 5, int(y) + 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (col1, col2, col3), 1)

    # Get each predicted point and the centroid
    p1 = corners2D_pr[1]
    p2 = corners2D_pr[2]
    p3 = corners2D_pr[3]
    p4 = corners2D_pr[4]
    p5 = corners2D_pr[5]
    p6 = corners2D_pr[6]
    p7 = corners2D_pr[7]
    p8 = corners2D_pr[8]
    center = corners2D_pr[0]

    # Draw cube lines around detected object
    # draw front face
    line_point = 3
    cv2.line(img, (p1[0], p1[1]), (p2[0], p2[1]), (0, 255, 0), line_point)
    cv2.line(img, (p2[0], p2[1]), (p4[0], p4[1]), (0, 255, 0), line_point)
    cv2.line(img, (p4[0], p4[1]), (p3[0], p3[1]), (0, 255, 0), line_point)
    cv2.line(img, (p3[0], p3[1]), (p1[0], p1[1]), (0, 255, 0), line_point)

    # draw back face
    cv2.line(img, (p5[0], p5[1]), (p6[0], p6[1]), (0, 255, 0), line_point)
    cv2.line(img, (p7[0], p7[1]), (p8[0], p8[1]), (0, 255, 0), line_point)
    cv2.line(img, (p6[0], p6[1]), (p8[0], p8[1]), (0, 255, 0), line_point)
    cv2.line(img, (p5[0], p5[1]), (p7[0], p7[1]), (0, 255, 0), line_point)

    # draw right face
    cv2.line(img, (p2[0], p2[1]), (p6[0], p6[1]), (0, 255, 0), line_point)
    cv2.line(img, (p1[0], p1[1]), (p5[0], p5[1]), (0, 255, 0), line_point)

    # draw left face
    cv2.line(img, (p3[0], p3[1]), (p7[0], p7[1]), (0, 255, 0), line_point)
    cv2.line(img, (p4[0], p4[1]), (p8[0], p8[1]), (0, 255, 0), line_point)

    # Show the image and wait key press
    cv2.imshow(wname, img)
    cv2.waitKey()

    print("Rotation: {}".format(R_pr))
    print("Translation: {}".format(t_pr))
    print(" Predict time: {}".format(t2 - t1))
    print(" 2D Points extraction time: {}".format(t3 - t2))
    print(" Pose calculation time: {}:".format(t4 - t3))
    print(" Total time: {}".format(t4 - t1))
    print("Press any key to close.")