Ejemplo n.º 1
def get_positive_features(train_path_pos, feature_params):
    FUNC: This function should return all positive training examples (faces)
        from 36x36 images in 'train_path_pos'. Each face should be converted
        into a HoG template according to 'feature_params'. For improved performances,
        try mirroring or warping the positive training examples.
        - train_path_pos: a string; directory that contains 36x36 images
                          of faces.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell.
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they
                          make things slower because the feature dimenionality
                          increases and more importantly the step size of the
                          classifier decreases at test time.
        - features_pos: (N,D) ndarray; N is the number of faces and D is the
                        template dimensionality, which would be,
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
    ##          you code here              ##

    print("START: get_positive_features")

    temp_size = feature_params['template_size']
    cell_size = feature_params['hog_cell_size']

    features_pos = []
    for filename in glob.glob(train_path_pos + '/*.jpg'):
        img = imread(filename)
        img_mirror = img[:, ::-1]
        hog_features = hog(img, cell_size=cell_size)
        hog_features_mirror = hog(img_mirror, cell_size=cell_size)
        hog_features = hog_features.flatten()
        hog_features_mirror = hog_features_mirror.flatten()

    features_pos = np.asarray(features_pos)

    print("DONE: get_positive_features")
    print("Shape:", features_pos.shape)

    ##          you code here              ##

    return features_pos
Ejemplo n.º 2
def calcHog(im,points,patchsize,ncells,idx):
    imsz = im.shape
    halfsize = (np.float32(patchsize) - np.float32(patchsize%2))/2
    valid_points = []
    npoints = len(points[idx][0])
    roi = [1,1,patchsize,patchsize]
    validPointIdx = np.zeros((1,npoints),np.uint32)
    validPointCount = np.zeros((1),np.uint32)
    hogdim = [npoints,36*ncells**2]
    hogvalues = np.zeros(hogdim,np.float32)
    for j in range(npoints):
        roi[0:2] = [np.uint32(points[idx][0][j]-halfsize),np.uint32(points[idx][1][j]-halfsize)]
        if all(roi[0:2]) >= 1 and roi[1]+roi[3]-1 <= imsz[0] and roi[0]+roi[2]-1 <= imsz[1]:
            im_tmp = im[(roi[0]-1):(roi[0]+roi[2]),(roi[1]-1):(roi[1]+roi[3])]
            hogi = hog.hog(im_tmp,cellsize,'DalalTriggs')
            validPointCount = validPointCount + 1;
            hogvalues[validPointCount-1,:] = hogi[:]
            validPointIdx[0,validPointCount-1] = j  #store valid indices
    hogvalues = hogvalues[0:validPointCount,:]
    validPointIdx = validPointIdx[0:validPointCount]
    for m in range(np.size(points[idx][0])):
Ejemplo n.º 3
def extract_hog(img_path,data_path,cellsize=8,hog_orientations = 9):	
	subdirs = [x[0] for x in os.walk(img_path,True)]
# 	cell_size = (cellsize,cellsize)
# 	block_size = (2,2)
# 	nbins=hog_orientations
	for subdir in subdirs:
		print(' ')
		sys.stdout.write("extracting hog " + subdir.split("/")[-1])
		hog_data_subdir = data_path+'hog/'+subdir.split('/')[-1]+'/'
		if not os.path.exists(hog_data_subdir):
		imgs = [x[2] for x in os.walk(subdir,True)][0]
		num_files = len(imgs)
		count = 0
		for img in imgs:
			if count >= round(num_files/10):
				count = 0
# 			别用rgb,雪崩!
#			RGB_f=misc.imread(subdir+'/'+img,mode='RGB')			
# 			height,width,_=RGB_f.shape
# 			ratio = 1.0*max(height,width)/300
# 			RGB_f = misc.imresize(RGB_f,(round(height/ratio),round(width/ratio)),mode='RGB').astype(float)
# 			hog_descriptor = hog(RGB_f,cellsize,n_orientations=32,variant = 'DalalTriggs')

			gray_f = misc.imread(subdir+'/'+img,True)
			gray_f = cv2.imread(subdir+'/'+img,0)
			ratio = 1.0*max(height,width)/300	
			gray_f = misc.imresize(gray_f,(round(height/ratio),round(width/ratio))).astype(float)
			gray_f = cv2.resize(gray_f,(round(height/ratio),round(width/ratio)))
			hog_descriptor = hog(gray_f,cellsize,n_orientations=hog_orientations)			
# 			hogcv = cv2.HOGDescriptor(_winSize=(gray_f.shape[1] // cell_size[1] * cell_size[1],
#                                   gray_f.shape[0] // cell_size[0] * cell_size[0]),
#                         _blockSize=(block_size[1] * cell_size[1],
#                                     block_size[0] * cell_size[0]),
#                         _blockStride=(cell_size[1], cell_size[0]),
#                         _cellSize=(cell_size[1], cell_size[0]),
#                         _nbins=nbins)
# 			n_cells = (gray_f.shape[0] // cell_size[0], gray_f.shape[1] // cell_size[1])
# 			hog_descriptor = hogcv.compute(gray_f).reshape(n_cells[1] - block_size[1] + 1,n_cells[0] - block_size[0] + 1,block_size[0], block_size[1], nbins).transpose((1, 0, 2, 3, 4))
# 			hog_descriptor = hog_descriptor.reshape((hog_descriptor.shape[0],hog_descriptor.shape[1],-1)).astype(float)			
			count += 1
Ejemplo n.º 4
def get_random_negative_features(non_face_scn_path, feature_params,
    FUNC: This funciton should return negative training examples (non-faces) from
        any images in 'non_face_scn_path'. Images should be converted to grayscale,
        because the positive training data is only available in grayscale. For best
        performance, you should sample random negative examples at multiple scales.
        - non_face_scn_path: a string; directory contains many images which have no
                             faces in them.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell. 
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they 
                          make things slower because the feature dimenionality 
                          increases and more importantly the step size of the 
                          classifier decreases at test time.
        - features_neg: (N,D) ndarray; N is the number of non-faces and D is 
                        the template dimensionality, which would be, 
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
        - neg_examples: TODO
    ##          you code here              ##
    # get image filename
    image_paths = glob(os.path.join(non_face_scn_path, '*.jpg'))

    # initilation
    hog_cell_size = feature_params['hog_cell_size']
    N = len(image_paths)
    template_size = feature_params['template_size']
    D = int(((template_size / hog_cell_size)**2) * 31)
    features_neg = np.zeros((num_samples, D))
    neg_examples = 'TODO'

    img_num = np.random.choice(N - 1, num_samples)
    for i in tqdm(range(num_samples)):
        image = imread(image_paths[img_num[i]])
        image = color.rgb2grey(image)
        start_i = random.randint(0, image.shape[0] - template_size)
        start_j = random.randint(0, image.shape[1] - template_size)
        patch = image[start_i:start_i + template_size,
                      start_j:start_j + template_size]

        hog_feats = hog(patch, hog_cell_size)
        features_neg[i, :] = np.reshape(hog_feats, (1, D))

    ##          you code here              ##

    return features_neg, neg_examples
Ejemplo n.º 5
def test_hog_cell_size_32_uoctti_non_square():
    i = half_img.copy()
    output = hog(i, 32)
    assert output.dtype == np.float32
    assert output.shape == (16, 8, 31)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.163912, 0.167787], [0.086294, 0.079365]]),
Ejemplo n.º 6
def test_hog_channel_order(variant):
    img_rgb_chanel_first = img_rgb.transpose([2, 0, 1])
    des_rgb_channel_first, viz_rgb_channel_first = hog(img_rgb_chanel_first,
    des_rgb, viz_rgb = hog(img_rgb,

    assert des_rgb.shape == des_rgb_channel_first.shape
    assert viz_rgb.shape == viz_rgb_channel_first.shape
    assert_allclose(des_rgb, des_rgb_channel_first)
    assert_allclose(viz_rgb, viz_rgb_channel_first)
Ejemplo n.º 7
def test_hog_cell_size_32_dalaltriggs():
    i = img.copy()
    output = hog(i, 32, variant='DalalTriggs')
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 36)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.139408, 0.093407], [0.070996, 0.065033]]),
Ejemplo n.º 8
def test_hog_cell_size_32_dalaltriggs_non_square():
    i = half_img.copy()
    output = hog(i, 32, variant='DalalTriggs')
    assert output.dtype == np.float32
    assert output.shape == (16, 8, 36)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.2, 0.2],
                              [0.2, 0.2]]), rtol=1e-5)
Ejemplo n.º 9
def test_hog_cell_size_32_uoctti():
    i = img.copy()
    output = hog(i, 32)
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 31)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.104189, 0.056746], [0.051333, 0.03721]]),
Ejemplo n.º 10
def test_hog_cell_size_32_dalaltriggs_bilinear_interpolation():
    i = img.copy()
    output = hog(i, 32, variant='DalalTriggs', bilinear_interpolation=True)
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 36)
    assert_allclose(output[:2, -2:, 0],
                    np.array([[0.082442075, 0.13325043],
                              [0.094600961, 0.090005033]]), rtol=1e-5)
Ejemplo n.º 11
def test_hog_cell_size_32_uoctti_4_orientations():
    i = img.copy()
    output = hog(i, 32, n_orientations=4)
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 16)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.158388, 0.085595], [0.078716, 0.062816]]),
Ejemplo n.º 12
def test_hog_cell_size_32_uoctti_4_orientations():
    i = img.copy()
    output = hog(i, 32, n_orientations=4)
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 16)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.23047766, 0.17703892],
                              [0.30189356, 0.25290328]]), rtol=1e-5)
Ejemplo n.º 13
def test_hog_cell_size_32_uoctti_non_square():
    i = half_img.copy()
    output = hog(i, 32)
    assert output.dtype == np.float32
    assert output.shape == (16, 8, 31)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.16276041, 0.13416694],
                              [0.22367628, 0.17556792]]), rtol=1e-5)
Ejemplo n.º 14
def test_hog_cell_size_32_dalaltriggs_4_orientations():
    i = img.copy()
    output = hog(i, 32, n_orientations=4, variant='DalalTriggs')
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 16)
    assert_allclose(output[:2, :2, 0],
                    np.array([[0.2, 0.2],
                              [0.2, 0.2]]))
Ejemplo n.º 15
def test_hog_cell_size_32_dalaltriggs_bilinear_interpolation():
    i = img.copy()
    output = hog(i, 32, variant='DalalTriggs', bilinear_interpolation=True)
    assert output.dtype == np.float32
    assert output.shape == (16, 16, 36)
    assert_allclose(output[:2, -2:, 0],
                    np.array([[0.01523, 0.017774], [0.012941, 0.012733]]),
Ejemplo n.º 16
def test_hog_cell_size_32_dalaltriggs(bilinear_interpolation):
    des_gray = hog(img_gray,
    des_rgb = hog(img_rgb,

    if bilinear_interpolation:
Ejemplo n.º 17
def get_positive_features(train_path_pos, feature_params):
    FUNC: This function should return all positive training examples (faces)
        from 36x36 images in 'train_path_pos'. Each face should be converted
        into a HoG template according to 'feature_params'. For improved performances,
        try mirroring or warping the positive training examples.
        - train_path_pos: a string; directory that contains 36x36 images
                          of faces.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell.
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they
                          make things slower because the feature dimenionality
                          increases and more importantly the step size of the
                          classifier decreases at test time.
        - features_pos: (N,D) ndarray; N is the number of faces and D is the
                        template dimensionality, which would be,
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
    ##          you code here              ##
    # get image filename
    image_paths = glob(os.path.join(train_path_pos, '*.jpg'))

    # initialization
    hog_cell_size = feature_params['hog_cell_size']
    N = len(image_paths)
    template_size = feature_params['template_size']
    D = int(((template_size / hog_cell_size)**2) * 31)
    features_pos = np.zeros((N, D))

    # hog
    for img_idx, img_name in enumerate(image_paths):
        hog_result = hog(imread(img_name), hog_cell_size)
        features_pos[img_idx, :] = np.reshape(hog_result, (1, D))

    ##          you code here              ##

    return features_pos
def get_positive_features(train_path_pos, feature_params):
    FUNC: This function should return all positive training examples (faces)
        from 36x36 images in 'train_path_pos'. Each face should be converted
        into a HoG template according to 'feature_params'. For improved performances,
        try mirroring or warping the positive training examples.
        - train_path_pos: a string; directory that contains 36x36 images
                          of faces.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell.
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they
                          make things slower because the feature dimenionality
                          increases and more importantly the step size of the
                          classifier decreases at test time.
        - features_pos: (N,D) ndarray; N is the number of faces and D is the
                        template dimensionality, which would be,
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
    ##          you code here              ##
    image_files=[f for f in listdir(train_path_pos) if f.endswith('.jpg')]
    # for k, v in feature_params.items():
    #     print(k,v,feature_params[k])
    # print(D)
    for i in range(num_img):
        # print(path)
    ##          you code here              ##
    # print(len(features_pos),len(features_pos[0]))
    return features_pos 
def get_positive_features(train_path_pos, feature_params):
    FUNC: This function should return all positive training examples (faces)
        from 36x36 images in 'train_path_pos'. Each face should be converted
        into a HoG template according to 'feature_params'. For improved performances,
        try mirroring or warping the positive training examples.
        - train_path_pos: a string; directory that contains 36x36 images
                          of faces.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell.
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they
                          make things slower because the feature dimenionality
                          increases and more importantly the step size of the
                          classifier decreases at test time.
        - features_pos: (N,D) ndarray; N is the number of faces and D is the
                        template dimensionality, which would be,
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
    ##          you code here              ##
    template_size = feature_params['template_size']
    hog_cell_size = feature_params['hog_cell_size']
    features_pos = []
    print('Load positive features')

    for image_path in tqdm(os.listdir(train_path_pos)):
        image_path = os.path.join(train_path_pos, image_path)
        im = imread(image_path)
        features_pos.append(np.reshape(hog(im, hog_cell_size), -1))

    features_pos = np.array(features_pos)

    ##          you code here              ##

    return features_pos
Ejemplo n.º 20
def get_positive_features(train_path_pos, feature_params):
    FUNC: This function should return all positive training examples (faces)
        from 36x36 images in 'train_path_pos'. Each face should be converted
        into a HoG template according to 'feature_params'. For improved performances,
        try mirroring or warping the positive training examples.
        - train_path_pos: a string; directory that contains 36x36 images
                          of faces.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell.
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they
                          make things slower because the feature dimenionality
                          increases and more importantly the step size of the
                          classifier decreases at test time.
        - features_pos: (N,D) ndarray; N is the number of faces and D is the
                        template dimensionality, which would be,
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
    ##          you code here              ##
    filenames = glob.glob(train_path_pos+'/*.jpg')
    dim = (feature_params['template_size'] / feature_params['hog_cell_size'])**2 * 31

    features_pos = np.zeros(shape=(len(filenames), int(dim)))
    for idx in range(len(filenames)):
        img = imread(filenames[idx], as_grey=True)
        tmp = np.reshape(hog(img, feature_params['hog_cell_size']), (-1,))
        features_pos[idx] = tmp
    ##          you code here              ##

    return features_pos 
Ejemplo n.º 21
def test_hog_shape(cell_size, n_orientations, variant, visualize):
    output = hog(img_rgb,
    h = int((img_rgb.shape[0] + cell_size / 2) // cell_size)
    w = int((img_rgb.shape[1] + cell_size / 2) // cell_size)
    if visualize:
        viz_h = 21 * h
        viz_w = 21 * w
    if variant == 'UoCTTI':
        c = 4 + 3 * n_orientations
        c = 4 * n_orientations
    if visualize:
        assert output[0].dtype == np.float32
        assert output[0].shape == (h, w, c)
        assert output[1].shape == (viz_h, viz_w)
        assert output.dtype == np.float32
        assert output.shape == (h, w, c)
def run_detector(test_scn_path, model, feature_params):
    FUNC: This function returns detections on all of the images in a given path.
        You will want to use non-maximum suppression on your detections or your
        performance will be poor (the evaluation counts a duplicate detection as
        wrong). The non-maximum suppression is done on a per-image basis. The
        starter code includes a call to a provided non-max suppression function.
        - test_scn_path: a string; This directory contains images which may or
                        may not have faces in them. This function should work for
                        the MIT+CMU test set but also for any other images.
        - model: the linear classifier model
        - feature_params: a dict; 'template_size': the number of pixels spanned
                        by each train / test template (probably 36).
                        'hog_cell_size': the number of pixels in each HoG cell
                        (default 6).
                        Template size should be evenly divisible by hog_cell_size.
                        Smaller HoG cell sizes tend to work better, but they make
                        things slower because the feature dimensionality increases
                        and more importantly the step size of the classifier
                        decreases at test time.
        - MORE...  You can add additional arguments for advanced work like multi-
                   scale, pixel shift and so on.
        - bboxes: (N, 4) ndarray; N is the number of non-overlapping detections, bboxes[i,:] is
                        [x_min, y_min, x_max, y_max] for detection i.
        - confidences: (N, 1) ndarray; confidences[i, :] is the real valued confidence
                        of detection i.
        - image_ids: (N, 1) ndarray;  image_ids[i, :] is the image file name for detection i.
    # The placeholder version of this code will return random bounding boxes in
    # each test image. It will even do non-maximum suppression on the random
    # bounding boxes to give you an example of how to call the function.

    # Your actual code should convert each test image to HoG feature space with
    # a _single_ call to vl_hog for each scale. Then step over the HoG cells,
    # taking groups of cells that are the same size as your learned template,
    # and classifying them. If the classification is above some confidence,
    # keep the detection and then pass all the detections for an image to
    # non-maximum suppression. For your initial debugging, you can operate only
    # at a single scale and you can skip calling non-maximum suppression.

    test_images = os.listdir(test_scn_path)
    # print(test_images)
    # img_name=[f for f in test_images if f.endswith('.jpg')]
    # print(test_scn_path,img_name)
    # initialize these as empty and incrementally expand them.
    bboxes = np.zeros([0, 4])
    confidences = np.zeros([0, 1])
    image_ids = np.zeros([0, 1])

    cell_size = feature_params['hog_cell_size']
    cell_num = int(
        feature_params['template_size'] /
        feature_params['hog_cell_size'])  # cell number of each template

    for i in range(len(test_images)):

        ##          you code here              ##
        path = test_scn_path + '/' + test_images[i]
        # img=np.array(imread(path)).astype('float32')
        # dimension=len(img.shape)
        # print(dimension)
        # if dimension>2:
        #     img=color.rbg2gray(img)
        img = imread(path, as_grey=True)
        mindim = min(len(img), len(img[0]))
        count = 1
        cur_bboxes = np.zeros([0, 4])
        cur_image_ids = np.zeros([0, 1])
        cur_confidences = np.zeros([0, 1])
        while count * mindim > feature_params['template_size']:
            frame = resize(img,
                           [int(len(img) * count),
                            int(len(img[0]) * count)])
            # print('82!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            hogged = hog(frame, cell_size)
            # print('84!!!!!!!!!!!')
            D = pow(cell_num, 2) * 31
            D = int(D)
            # print(len(hogged)-cell_size,len(hogged[0])-cell_size)
            for k in range(int(len(hogged) - cell_num + 1)):
                for j in range(int(len(hogged[0]) - cell_num + 1)):

                    curr_y_min = k
                    curr_x_min = j
                    mini_hog = hogged[curr_y_min:curr_y_min + cell_num,
                                      curr_x_min:curr_x_min + cell_num]
                    bfeat = np.reshape(mini_hog, (1, -1))
                    # score=model.predict(bfeat)
                    tmp_score = np.reshape(model.decision_function(bfeat),
                                           (1, -1))
                    if tmp_score[0, 0] > 0:
                        # hog_bbox=[curr_x_min,curr_y_min,curr_x_min+6,curr_y_min+6]
                        # cur_bboxes.append([h*cellsize for h in hog_bbox])
                        # cur_confidences.append(score)
                        # cur_image_ids.append(test_images[i])
                        # count+=1
                        rowS = int(j * cell_size / count)
                        rowE = int((j + cell_num) * cell_size / count)
                        colS = int(k * cell_size / count)
                        colE = int((k + cell_num) * cell_size / count)
                        cur_bboxes = np.concatenate(
                             np.array([[rowS, colS, rowE, colE]])],
                        cur_image_ids = np.concatenate(
                            [cur_image_ids, [[test_images[i]]]], axis=0)
                        cur_confidences = np.concatenate(
                            [cur_confidences, tmp_score], axis=0)
        # if count==1:
        #     continue
            count *= 0.9
        ##          you code here              ##

        # non_max_supr_bbox can actually get somewhat slow with thousands of
        # initial detections. You could pre-filter the detections by confidence,
        # e.g. a detection with confidence -1.1 will probably never be
        # meaningful. You probably _don't_ want to threshold at 0.0, though. You
        # can get higher recall with a lower threshold. You don't need to modify
        # anything in non_max_supr_bbox, but you can.
        is_maximum = non_max_supr_bbox(cur_bboxes, cur_confidences, img.shape)

        cur_bboxes = cur_bboxes[is_maximum[:, 0], :]
        cur_confidences = cur_confidences[is_maximum[:, 0], :]
        cur_image_ids = cur_image_ids[is_maximum[:, 0]]

        bboxes = np.concatenate([bboxes, cur_bboxes], 0)
        confidences = np.concatenate([confidences, cur_confidences], 0)
        image_ids = np.concatenate([image_ids, cur_image_ids], 0)

    return bboxes, confidences, image_ids
Ejemplo n.º 23
     top = 0
     top = d.top()
 if d.left() < 0:
     left = 0
     left = d.left()
 d = dlib.rectangle(left, top, right, bottom)
 w = d.right() - d.left()
 h = d.bottom() - d.top()
 crop = image[d.top():d.top() + h, d.left():d.left() + w]
 frame = cv2.rectangle(frame, (d.left(), d.top()),
                       (d.right(), d.bottom()), (0, 0, 255), 2)
 crop = cv2.resize(crop, (face_width, face_height),
 hog_image = hog(crop, cell)
 landmarks_vectorised = landmark_feats(
     crop, dlib.rectangle(0, 0, face_width - 1, face_height - 1),
     predictor, width, height)
 hog_feats = np.reshape(hog_image, [1, hog_d])
 feats = np.concatenate([landmarks_vectorised, hog_feats], axis=1)
 conf = clf.predict_proba(feats)
 tmp_conf = conf[0, :].copy()
 idx = np.argmax(tmp_conf)
 if conf[0, idx] >= THRESH1:
     text = emotions[idx]
     text = emotions[-1]
 #tmp_conf[idx] = -1
 cv2.putText(frame, text, (d.left(), d.top()), cv2.FONT_HERSHEY_SIMPLEX,
             1, (255, 255, 255), 2, cv2.LINE_AA)
def get_random_negative_features(non_face_scn_path, feature_params,
    FUNC: This funciton should return negative training examples (non-faces) from
        any images in 'non_face_scn_path'. Images should be converted to grayscale,
        because the positive training data is only available in grayscale. For best
        performance, you should sample random negative examples at multiple scales.
        - non_face_scn_path: a string; directory contains many images which have no
                             faces in them.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell. 
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they 
                          make things slower because the feature dimenionality 
                          increases and more importantly the step size of the 
                          classifier decreases at test time.
        - features_neg: (N,D) ndarray; N is the number of non-faces and D is 
                        the template dimensionality, which would be, 
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
        - neg_examples: TODO
    ##          you code here              ##

    print("START: get_random_negative_features")

    temp_size = feature_params['template_size']
    cell_size = feature_params['hog_cell_size']

    features_neg = []

    i = 0
    while i < num_samples:
        for filename in glob.glob(non_face_scn_path + '/*.jpg'):
            if i >= num_samples:
            img = imread(filename)
            img = color.rgb2gray(img)
            #             img = resize(img, (temp_size, temp_size))

            sometimes = lambda aug: iaa.Sometimes(0.5, aug)

            # Define our sequence of augmentation steps that will be applied to every image
            # All augmenters with per_channel=0.5 will sample one value _per image_
            # in 50% of all cases. In all other cases they will sample new values
            # _per channel_.
            seq = iaa.Sequential(
                    # apply the following augmenters to most images
                    iaa.Fliplr(0.5),  # horizontally flip 50% of all images
                    iaa.Flipud(0.5),  # vertically flip 50% of all images
                    # crop images by -5% to 10% of their height/width
                        iaa.CropAndPad(percent=(-0.05, 0.1),
                                       pad_cval=(0, 255))),
                                "x": (0.8, 1.2),
                                "y": (0.8, 1.2)
                            },  # scale images to 80-120% of their size, individually per axis
                                "x": (-0.2, 0.2),
                                "y": (-0.2, 0.2)
                            },  # translate by -20 to +20 percent (per axis)
                            rotate=(-45, 45),  # rotate by -45 to +45 degrees
                            shear=(-16, 16),  # shear by -16 to +16 degrees
                            ],  # use nearest neighbour or bilinear interpolation (fast)
                                0, 255
                            ),  # if mode is constant, use a cval between 0 and 255
                            ALL  # use any of scikit-image's warping modes (see 2nd image from the top for examples)

            seq_det = seq.to_deterministic()
            img_aug = seq_det.augment_images([img])[0]

            rand_x = np.random.randint(img_aug.shape[0] - temp_size)
            rand_y = np.random.randint(img_aug.shape[1] - temp_size)
            img_aug = img_aug[rand_x:rand_x + temp_size,
                              rand_y:rand_y + temp_size]

            hog_features = hog(img_aug, cell_size=cell_size)
            hog_features = hog_features.flatten()
            i += 1

    features_neg = np.asarray(features_neg)
    neg_examples = num_samples

    print("DONE: get_random_negative_features")
    print("Shape:", features_neg.shape)

    ##          you code here              ##

    return features_neg, neg_examples
Ejemplo n.º 25
def get_data_fer2013(clahe, detector, predictor, selected_label, save_images):
    image_height = FER2013.height
    image_width = FER2013.width
    cell = FER2013.cell

    print("importing csv file")
    data = pd.read_csv(OUTPUT_FOLDER_NAME + '/fer2013.csv')

    for category in data['Usage'].unique():
        print("converting fer2013 set: " + category + "...")
        # create folder
        if not os.path.exists(category):
                os.makedirs(OUTPUT_FOLDER_NAME + '/' + category)
            except OSError as e:
                if e.errno == errno.EEXIST and os.path.isdir(

        # get samples and labels of the actual category
        category_data = data[data['Usage'] == category]
        samples = category_data['pixels'].values
        labels = category_data['emotion'].values

        # initialize
        feats_data = np.zeros([0, 274])
        feats_labels = []
        hog_d = int((image_height / cell) * (image_width / cell) * 31)
        hog_feats = np.zeros([0, hog_d])

        # get images and extract features
        for i in range(len(samples)):
            if labels[i] in selected_label:
                image = np.fromstring(samples[i], dtype=np.uint8,
                                      sep=" ").reshape(
                                          (image_height, image_width))
                # save images
                if save_images:
                        OUTPUT_FOLDER_NAME + '/' + category + '/' + str(i) +
                        '.jpg', image)
                # image pre-processing
                image = cv2.imread(OUTPUT_FOLDER_NAME + '/' + category + '/' +
                                   str(i) + '.jpg')
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                image = clahe.apply(image)
                # detect the face
                detections = detector(image, 1)
                if len(detections) < 1:
                # extract HOG features
                hog_image = hog(image, cell)
                #for all detected face
                for k, d in enumerate(detections):
                    landmarks_vectorised = landmark_feats(
                        image, d, predictor, FER2013.width, FER2013.height)
                    # concat feature
                    feats_data = np.concatenate(
                        [feats_data, landmarks_vectorised], axis=0)
                    hog_feats = np.concatenate(
                         np.reshape(hog_image, [1, hog_d])], axis=0)
        print("saving features and labels...")
        with open(OUTPUT_FOLDER_NAME + '/' + category + '/landmarks_feats.pkl',
                  'wb') as f:
            pickle.dump(feats_data, f)
        with open(OUTPUT_FOLDER_NAME + '/' + category + '/hog_feats.pkl',
                  'wb') as f:
            pickle.dump(hog_feats, f)
        with open(OUTPUT_FOLDER_NAME + '/' + category + '/labels.pkl',
                  'wb') as f:
            pickle.dump(feats_labels, f)
Ejemplo n.º 26
def run_detector(test_scn_path, model, feature_params):
    FUNC: This function returns detections on all of the images in a given path.
        You will want to use non-maximum suppression on your detections or your
        performance will be poor (the evaluation counts a duplicate detection as
        wrong). The non-maximum suppression is done on a per-image basis. The
        starter code includes a call to a provided non-max suppression function.
        - test_scn_path: a string; This directory contains images which may or
                        may not have faces in them. This function should work for
                        the MIT+CMU test set but also for any other images.
        - model: the linear classifier model
        - feature_params: a dict; 'template_size': the number of pixels spanned
                        by each train / test template (probably 36).
                        'hog_cell_size': the number of pixels in each HoG cell
                        (default 6).
                        Template size should be evenly divisible by hog_cell_size.
                        Smaller HoG cell sizes tend to work better, but they make
                        things slower because the feature dimensionality increases
                        and more importantly the step size of the classifier
                        decreases at test time.
        - MORE...  You can add additional arguments for advanced work like multi-
                   scale, pixel shift and so on.
        - bboxes: (N, 4) ndarray; N is the number of non-overlapping detections, bboxes[i,:] is
                        [x_min, y_min, x_max, y_max] for detection i.
        - confidences: (N, 1) ndarray; confidences[i, :] is the real valued confidence
                        of detection i.
        - image_ids: (N, 1) ndarray;  image_ids[i, :] is the image file name for detection i.
    # The placeholder version of this code will return random bounding boxes in
    # each test image. It will even do non-maximum suppression on the random
    # bounding boxes to give you an example of how to call the function.

    # Your actual code should convert each test image to HoG feature space with
    # a _single_ call to vl_hog for each scale. Then step over the HoG cells,
    # taking groups of cells that are the same size as your learned template,
    # and classifying them. If the classification is above some confidence,
    # keep the detection and then pass all the detections for an image to
    # non-maximum suppression. For your initial debugging, you can operate only
    # at a single scale and you can skip calling non-maximum suppression.

    test_images = os.listdir(test_scn_path)

    # initialize these as empty and incrementally expand them.
    bboxes = np.zeros([0, 4])
    confidences = np.zeros([0, 1])
    image_ids = np.zeros([0, 1])

    cell_size = feature_params['hog_cell_size']
    cell_num = int(
        feature_params['template_size'] /
        feature_params['hog_cell_size'])  # cell number of each template

    for i in tqdm(range(len(test_images))):

        ##          you code here              ##
        cur_bboxes = np.zeros([0, 4])
        cur_confidences = np.zeros([0, 1])
        cur_image_ids = np.zeros([0, 1])

        image_path = os.path.join(test_scn_path, test_images[i])
        img = imread(image_path, as_grey=True)
        scale_arr = np.arange(1.0, 0, -0.1)
        for scale in scale_arr:
            img_resize = resize(
                img, [int(img.shape[0] * scale),
                      int(img.shape[1] * scale)])
            test_image_hog = hog(img_resize, cell_size)
            for h in range(test_image_hog.shape[0] - cell_num + 1):
                for w in range(test_image_hog.shape[1] - cell_num + 1):
                    hog_window = np.reshape(
                        test_image_hog[h:h + cell_num, w:w + cell_num, :],
                        (1, -1))
                    score = model.decision_function(hog_window)
                    if score > -0.5:
                        min_y = int(h * cell_size / scale)
                        min_x = int(w * cell_size / scale)
                        max_y = int((h + cell_num) * cell_size / scale)
                        max_x = int((w + cell_num) * cell_size / scale)

                        cur_confidence = np.array([score])
                        cur_image_id = np.array([[test_images[i]]])
                        cur_bbox = np.array([[min_x, min_y, max_x, max_y]])

                        cur_bboxes = np.concatenate([cur_bboxes, cur_bbox], 0)
                        cur_confidences = np.concatenate(
                            [cur_confidences, cur_confidence], 0)
                        cur_image_ids = np.concatenate(
                            [cur_image_ids, cur_image_id], 0)

        ##          you code here              ##

        # non_max_supr_bbox can actually get somewhat slow with thousands of
        # initial detections. You could pre-filter the detections by confidence,
        # e.g. a detection with confidence -1.1 will probably never be
        # meaningful. You probably _don't_ want to threshold at 0.0, though. You
        # can get higher recall with a lower threshold. You don't need to modify
        # anything in non_max_supr_bbox, but you can.
        is_maximum = non_max_supr_bbox(cur_bboxes, cur_confidences, img.shape)

        cur_bboxes = cur_bboxes[is_maximum[:, 0], :]
        cur_confidences = cur_confidences[is_maximum[:, 0], :]
        cur_image_ids = cur_image_ids[is_maximum[:, 0]]

        bboxes = np.concatenate([bboxes, cur_bboxes], 0)
        confidences = np.concatenate([confidences, cur_confidences], 0)
        image_ids = np.concatenate([image_ids, cur_image_ids], 0)

    return bboxes, confidences, image_ids
Ejemplo n.º 27
def get_random_negative_features(non_face_scn_path, feature_params,
    FUNC: This funciton should return negative training examples (non-faces) from
        any images in 'non_face_scn_path'. Images should be converted to grayscale,
        because the positive training data is only available in grayscale. For best
        performance, you should sample random negative examples at multiple scales.
        - non_face_scn_path: a string; directory contains many images which have no
                             faces in them.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell. 
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they 
                          make things slower because the feature dimenionality 
                          increases and more importantly the step size of the 
                          classifier decreases at test time.
        - features_neg: (N,D) ndarray; N is the number of non-faces and D is 
                        the template dimensionality, which would be, 
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
        - neg_examples: TODO
    ##          you code here              ##
    template_size = feature_params['template_size']
    hog_cell_size = feature_params['hog_cell_size']
    features_neg = []
    images_path = []
    print('Load negative features')
    for image_path in os.listdir(non_face_scn_path):
        if image_path.find('.jpg') != -1:
            image_path = os.path.join(non_face_scn_path, image_path)

    for image_path in tqdm(images_path):
        im = imread(image_path, as_grey=True)
        im_pyramids = tuple(pyramid_gaussian(im))
        for im_pyramid in im_pyramids[:1]:
            num_sample_per_image = math.ceil(num_samples / len(images_path))
            if min(im_pyramid.shape[0], im_pyramid.shape[1]) <= template_size:
            elif min(im_pyramid.shape[0], im_pyramid.shape[1]
                     ) < template_size + num_sample_per_image:
                num_sample_per_image = min(im_pyramid.shape[0],
                                           im_pyramid.shape[1]) - template_size
            height_list = np.random.choice(im_pyramid.shape[0] - template_size,
            weight_list = np.random.choice(im_pyramid.shape[1] - template_size,
            for height, weight in zip(height_list, weight_list):
                            im_pyramid[height:height + template_size,
                                       weight:weight + template_size],
                            hog_cell_size), -1))

    features_neg = np.array(features_neg)
    print('Number of training examples: {0}'.format(len(features_neg)))
    neg_examples = len(features_neg)
    ##          you code here              ##

    return features_neg, neg_examples
def get_random_negative_features(non_face_scn_path, feature_params,
    FUNC: This funciton should return negative training examples (non-faces) from
        any images in 'non_face_scn_path'. Images should be converted to grayscale,
        because the positive training data is only available in grayscale. For best
        performance, you should sample random negative examples at multiple scales.
        - non_face_scn_path: a string; directory contains many images which have no
                             faces in them.
        - feature_params: a dict; with keys,
                          > template_size: int (probably 36); the number of
                            pixels spanned by each train/test template.
                          > hog_cell_size: int (default 6); the number of pixels
                            in each HoG cell. 
                          Template size should be evenly divisible by hog_cell_size.
                          Smaller HoG cell sizez tend to work better, but they 
                          make things slower because the feature dimenionality 
                          increases and more importantly the step size of the 
                          classifier decreases at test time.
        - features_neg: (N,D) ndarray; N is the number of non-faces and D is 
                        the template dimensionality, which would be, 
                        (template_size/hog_cell_size)^2 * 31,
                        if you're using default HoG parameters.
        - neg_examples: TODO
    ##          you code here              ##
    image_files = [f for f in listdir(non_face_scn_path) if f.endswith('.jpg')]
    num_img = len(image_files)
    # print(num_sample_per_img,num_samples/num_img)
    D = pow(feature_params['template_size'] / feature_params['hog_cell_size'],
            2) * 31
    # print(D)
    D = int(D)
    all_images = np.zeros([0, D])
    features_neg = np.zeros((num_samples, D))
    smp_per_img = math.ceil(num_samples / num_img)
    i = 1
    # for j in range(num_img):
    #     path=non_face_scn_path+'/'+image_files[i]
    #     img=np.array(color.rgb2gray(imread(path))).astype('float32')
    #     height,width=len(img),len(img[0])
    #     # print(height,width,len(img),len(img[0]))
    #     for j in range(num_sample_per_img):
    #         top_left_x=math.floor(random.random()*(width-feat_size))
    #         top_left_y=math.floor(random.random()*(height-feat_size))
    #         index=i*num_sample_per_img+j
    #         print(index,num_img,num_sample_per_img,i,j)
    #         cropped=img[top_left_y:top_left_y+feat_size,top_left_x:top_left_x+feat_size]
    #         features_neg[index]=np.reshape(hog(cropped,feature_params['hog_cell_size']),(1,D))
    # neg_examples=len(features_neg)
    for j in range(num_img):
        path = non_face_scn_path + '/' + image_files[i]
        # all_images.append(np.array(imread(path)).astype('float32'))
        img = imread(path, as_grey=True)
        if min(
                len(img[0]) - feature_params['template_size'],
                len(img) - feature_params['template_size']) < smp_per_img:
            num_sampd = min(
                len(img[0]) - feature_params['template_size'],
                len(img) - feature_params['template_size'])
            num_sampd = smp_per_img
        # hogged=hog(all_images[j],feature_params['hog_cell_size'])
        # temp_height,temp_width=len(hogged),len(hogged[0])
        idx_i = np.random.choice(
            np.arange(len(img) - feature_params['template_size']),
        idx_j = np.random.choice(
            np.arange(len(img[0]) - feature_params['template_size']),
        # for y in range(math.floor((temp_height-5)/freq)):
        #     for x in range(math.floor((temp_width-5)/freq)):
        #         xStart=x*freq
        #         yStart=y*freq
        #         xEnd=xStart+6
        #         yEnd=yStart+6
        #         frame=hogged[yStart:yEnd,xStart:xEnd]
        #         features_neg[i]=np.reshape(frame,(1,D))
        #         num_sampd+=1
        #         if i==num_samples:
        #             neg_examples=len(features_neg)
        #             return features_neg, neg_examples
        #         elif num_sampd>=num_samples/smp_per_img:
        #             break
        #     if num_sampd>=num_samples/smp_per_img:
        #         break
        # if num_sampd>=num_samples/smp_per_img:
        #     continue
        for x in range(num_sampd):
            portion = img[idx_i[x]:idx_i[x] + feature_params['template_size'],
                          idx_j[x]:idx_j[x] + feature_params['template_size']]
            hogged = hog(portion, feature_params['hog_cell_size'])
            port = np.reshape(hogged, (1, -1))
            all_images = np.concatenate([all_images, port], axis=0)
    ##          you code here              ##
    neg_examples = len(features_neg)
    features_neg = all_images[np.random.choice(np.arange(len(all_images)),
    return features_neg, neg_examples
Ejemplo n.º 29
def get_data_ckplus(clahe, detector, predictor, selected_label, save_images):
    face_height = CKPLUS.face_height
    face_width = CKPLUS.face_width
    cell = CKPLUS.cell

    # initialize
    feats_data = np.zeros([0, 274])
    feats_labels = []
    hog_d = int((face_height / cell) * (face_width / cell) * 31)
    hog_feats = np.zeros([0, hog_d])
    dirs = os.listdir(OUTPUT_FOLDER_NAME + '/emotion')
    print("converting CK+...")
    for dir in dirs:
        video_dirs = os.listdir(OUTPUT_FOLDER_NAME + '/emotion/' + dir)
        for video_dir in video_dirs:
            txt = glob(OUTPUT_FOLDER_NAME + '/emotion/' + dir + '/' +
                       video_dir + '/*.txt')
            if not txt:
            name = txt[0].split('.')
            name = name[0].split('/')
            im_name = name[-1].split('_')
            im_name = im_name[0] + '_' + im_name[1] + '_' + im_name[2]
            with open(txt[0], 'r') as f:
                label = f.read()
                label = label.split('.')
                label = label[0].split(' ')
                label = int(label[-1])
            if label not in selected_label:
            # label transform to match fer2013
            if label == 0:  #neutral
                label = 6
            elif label == 1:  #anger
                label = 0
            elif label == 3:  # disgust
                label = 1
            elif label == 4:  #fear
                label = 2
            elif label == 5:  #happy
                label = 3
            elif label == 6:  #sad
                label = 4
            elif label == 7:  #surprise
                label = 5
            image = cv2.imread(name[0] + '/image/' + name[2] + '/' + name[3] +
                               '/' + im_name + '.png')
            if image.shape[0:1] != (CKPLUS.height, CKPLUS.width):
                image = cv2.resize(image, (CKPLUS.width, CKPLUS.height))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image = clahe.apply(image)
            # detect the face
            detections = detector(image, 1)
            if len(detections) < 1:
            # for all detected face
            for k, d in enumerate(detections):
                # box clipped
                if d.right() >= CKPLUS.width:
                    right = CKPLUS.width - 1
                    right = d.right()
                if d.bottom() >= CKPLUS.height:
                    bottom = CKPLUS.height - 1
                    bottom = d.bottom()
                if d.top() < 0:
                    top = 0
                    top = d.top()
                if d.left() < 0:
                    left = 0
                    left = d.left()
                d = dlib.rectangle(left, top, right, bottom)
                w = d.right() - d.left()
                h = d.bottom() - d.top()
                crop = image[d.top():d.top() + h, d.left():d.left() + w]
                crop = cv2.resize(crop, (face_width, face_height),
                if save_images:
                    if not os.path.exists(OUTPUT_FOLDER_NAME + '/face_image'):
                        os.makedirs(OUTPUT_FOLDER_NAME + '/face_image')
                        OUTPUT_FOLDER_NAME + '/face_image/' + im_name + '.png',
                # extract HOG features
                hog_image = hog(crop, cell)
                landmarks_vectorised = landmark_feats(image, d, predictor,
                # concat feature
                feats_data = np.concatenate([feats_data, landmarks_vectorised],
                hog_feats = np.concatenate(
                    [hog_feats, np.reshape(hog_image, [1, hog_d])], axis=0)
    print("saving features and labels...")
    with open(OUTPUT_FOLDER_NAME + '/landmarks_feats.pkl', 'wb') as f:
        pickle.dump(feats_data, f)
    with open(OUTPUT_FOLDER_NAME + '/hog_feats.pkl', 'wb') as f:
        pickle.dump(hog_feats, f)
    with open(OUTPUT_FOLDER_NAME + '/labels.pkl', 'wb') as f:
        pickle.dump(feats_labels, f)
Ejemplo n.º 30
def test_hog_cell_size_32_uoctti():
    des_gray = hog(img_gray, 32)
    des_rgb = hog(img_rgb, 32)

    assert_allclose(hog_features['hog_des_gray_uoctti'], des_gray, rtol=1e-3)
    assert_allclose(hog_features['hog_des_rgb_uoctti'], des_rgb, rtol=1e-3)