def get_positive_features(train_path_pos, feature_params): """ FUNC: This function should return all positive training examples (faces) from 36x36 images in 'train_path_pos'. Each face should be converted into a HoG template according to 'feature_params'. For improved performances, try mirroring or warping the positive training examples. ARG: - train_path_pos: a string; directory that contains 36x36 images of faces. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_pos: (N,D) ndarray; N is the number of faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. """ ######################################### ## you code here ## ######################################### print("START: get_positive_features") temp_size = feature_params['template_size'] cell_size = feature_params['hog_cell_size'] features_pos = [] for filename in glob.glob(train_path_pos + '/*.jpg'): img = imread(filename) img_mirror = img[:, ::-1] hog_features = hog(img, cell_size=cell_size) hog_features_mirror = hog(img_mirror, cell_size=cell_size) hog_features = hog_features.flatten() hog_features_mirror = hog_features_mirror.flatten() features_pos.append(hog_features) features_pos.append(hog_features_mirror) features_pos = np.asarray(features_pos) print("DONE: get_positive_features") print("Shape:", features_pos.shape) ######################################### ## you code here ## ######################################### return features_pos
def calcHog(im,points,patchsize,ncells,idx): imsz = im.shape halfsize = (np.float32(patchsize) - np.float32(patchsize%2))/2 valid_points = [] npoints = len(points[idx][0]) roi = [1,1,patchsize,patchsize] validPointIdx = np.zeros((1,npoints),np.uint32) validPointCount = np.zeros((1),np.uint32) hogdim = [npoints,36*ncells**2] hogvalues = np.zeros(hogdim,np.float32) for j in range(npoints): roi[0:2] = [np.uint32(points[idx][0][j]-halfsize),np.uint32(points[idx][1][j]-halfsize)] if all(roi[0:2]) >= 1 and roi[1]+roi[3]-1 <= imsz[0] and roi[0]+roi[2]-1 <= imsz[1]: im_tmp = im[(roi[0]-1):(roi[0]+roi[2]),(roi[1]-1):(roi[1]+roi[3])] hogi = hog.hog(im_tmp,cellsize,'DalalTriggs') validPointCount = validPointCount + 1; hogvalues[validPointCount-1,:] = hogi[:] validPointIdx[0,validPointCount-1] = j #store valid indices hogvalues = hogvalues[0:validPointCount,:] validPointIdx = validPointIdx[0:validPointCount] for m in range(np.size(points[idx][0])): valid_points.append([points[idx][0][m],points[idx][1][m]]) return([(hogvalues,valid_points)])
def extract_hog(img_path,data_path,cellsize=8,hog_orientations = 9): subdirs = [x[0] for x in os.walk(img_path,True)] subdirs.pop(0) #============================================================================== # cell_size = (cellsize,cellsize) # # block_size = (2,2) # # nbins=hog_orientations #============================================================================== for subdir in subdirs: print(' ') sys.stdout.write("extracting hog " + subdir.split("/")[-1]) hog_data_subdir = data_path+'hog/'+subdir.split('/')[-1]+'/' if not os.path.exists(hog_data_subdir): os.makedirs(hog_data_subdir) imgs = [x[2] for x in os.walk(subdir,True)][0] num_files = len(imgs) count = 0 for img in imgs: if count >= round(num_files/10): sys.stdout.write('.') count = 0 #============================================================================== # 别用rgb,雪崩! # RGB_f=misc.imread(subdir+'/'+img,mode='RGB') # height,width,_=RGB_f.shape # ratio = 1.0*max(height,width)/300 # RGB_f = misc.imresize(RGB_f,(round(height/ratio),round(width/ratio)),mode='RGB').astype(float) # hog_descriptor = hog(RGB_f,cellsize,n_orientations=32,variant = 'DalalTriggs') #============================================================================== gray_f = misc.imread(subdir+'/'+img,True) gray_f = cv2.imread(subdir+'/'+img,0) height,width=gray_f.shape ratio = 1.0*max(height,width)/300 gray_f = misc.imresize(gray_f,(round(height/ratio),round(width/ratio))).astype(float) gray_f = cv2.resize(gray_f,(round(height/ratio),round(width/ratio))) hog_descriptor = hog(gray_f,cellsize,n_orientations=hog_orientations) hog_descriptor=hog_descriptor.astype(float) #============================================================================== # hogcv = cv2.HOGDescriptor(_winSize=(gray_f.shape[1] // cell_size[1] * cell_size[1], # gray_f.shape[0] // cell_size[0] * cell_size[0]), # _blockSize=(block_size[1] * cell_size[1], # block_size[0] * cell_size[0]), # _blockStride=(cell_size[1], cell_size[0]), # _cellSize=(cell_size[1], cell_size[0]), # _nbins=nbins) # # n_cells = (gray_f.shape[0] // cell_size[0], gray_f.shape[1] // cell_size[1]) # hog_descriptor = hogcv.compute(gray_f).reshape(n_cells[1] - block_size[1] + 1,n_cells[0] - block_size[0] + 1,block_size[0], block_size[1], nbins).transpose((1, 0, 2, 3, 4)) # hog_descriptor = hog_descriptor.reshape((hog_descriptor.shape[0],hog_descriptor.shape[1],-1)).astype(float) #============================================================================== pickle.dump(hog_descriptor,open(hog_data_subdir+img.split('.')[-2]+'.pkl','wb')) count += 1
def get_random_negative_features(non_face_scn_path, feature_params, num_samples): ''' FUNC: This funciton should return negative training examples (non-faces) from any images in 'non_face_scn_path'. Images should be converted to grayscale, because the positive training data is only available in grayscale. For best performance, you should sample random negative examples at multiple scales. ARG: - non_face_scn_path: a string; directory contains many images which have no faces in them. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_neg: (N,D) ndarray; N is the number of non-faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. - neg_examples: TODO ''' ######################################### ## you code here ## ######################################### # get image filename image_paths = glob(os.path.join(non_face_scn_path, '*.jpg')) # initilation hog_cell_size = feature_params['hog_cell_size'] N = len(image_paths) template_size = feature_params['template_size'] D = int(((template_size / hog_cell_size)**2) * 31) features_neg = np.zeros((num_samples, D)) neg_examples = 'TODO' img_num = np.random.choice(N - 1, num_samples) for i in tqdm(range(num_samples)): image = imread(image_paths[img_num[i]]) image = color.rgb2grey(image) start_i = random.randint(0, image.shape[0] - template_size) start_j = random.randint(0, image.shape[1] - template_size) patch = image[start_i:start_i + template_size, start_j:start_j + template_size] #hog hog_feats = hog(patch, hog_cell_size) features_neg[i, :] = np.reshape(hog_feats, (1, D)) ######################################### ## you code here ## ######################################### return features_neg, neg_examples
def test_hog_cell_size_32_uoctti_non_square(): i = half_img.copy() output = hog(i, 32) assert output.dtype == np.float32 assert output.shape == (16, 8, 31) assert_allclose(output[:2, :2, 0], np.array([[0.163912, 0.167787], [0.086294, 0.079365]]), rtol=1e-5)
def test_hog_channel_order(variant): img_rgb_chanel_first = img_rgb.transpose([2, 0, 1]) des_rgb_channel_first, viz_rgb_channel_first = hog(img_rgb_chanel_first, 32, variant=variant, channels_first=True, visualize=True) des_rgb, viz_rgb = hog(img_rgb, 32, variant=variant, channels_first=False, visualize=True) assert des_rgb.shape == des_rgb_channel_first.shape assert viz_rgb.shape == viz_rgb_channel_first.shape assert_allclose(des_rgb, des_rgb_channel_first) assert_allclose(viz_rgb, viz_rgb_channel_first)
def test_hog_cell_size_32_dalaltriggs(): i = img.copy() output = hog(i, 32, variant='DalalTriggs') assert output.dtype == np.float32 assert output.shape == (16, 16, 36) assert_allclose(output[:2, :2, 0], np.array([[0.139408, 0.093407], [0.070996, 0.065033]]), rtol=1e-5)
def test_hog_cell_size_32_dalaltriggs_non_square(): i = half_img.copy() output = hog(i, 32, variant='DalalTriggs') assert output.dtype == np.float32 assert output.shape == (16, 8, 36) assert_allclose(output[:2, :2, 0], np.array([[0.2, 0.2], [0.2, 0.2]]), rtol=1e-5)
def test_hog_cell_size_32_uoctti(): i = img.copy() output = hog(i, 32) assert output.dtype == np.float32 assert output.shape == (16, 16, 31) assert_allclose(output[:2, :2, 0], np.array([[0.104189, 0.056746], [0.051333, 0.03721]]), rtol=1e-4)
def test_hog_cell_size_32_dalaltriggs_bilinear_interpolation(): i = img.copy() output = hog(i, 32, variant='DalalTriggs', bilinear_interpolation=True) assert output.dtype == np.float32 assert output.shape == (16, 16, 36) assert_allclose(output[:2, -2:, 0], np.array([[0.082442075, 0.13325043], [0.094600961, 0.090005033]]), rtol=1e-5)
def test_hog_cell_size_32_uoctti_4_orientations(): i = img.copy() output = hog(i, 32, n_orientations=4) assert output.dtype == np.float32 assert output.shape == (16, 16, 16) assert_allclose(output[:2, :2, 0], np.array([[0.158388, 0.085595], [0.078716, 0.062816]]), rtol=1e-5)
def test_hog_cell_size_32_uoctti_4_orientations(): i = img.copy() output = hog(i, 32, n_orientations=4) assert output.dtype == np.float32 assert output.shape == (16, 16, 16) assert_allclose(output[:2, :2, 0], np.array([[0.23047766, 0.17703892], [0.30189356, 0.25290328]]), rtol=1e-5)
def test_hog_cell_size_32_uoctti_non_square(): i = half_img.copy() output = hog(i, 32) assert output.dtype == np.float32 assert output.shape == (16, 8, 31) assert_allclose(output[:2, :2, 0], np.array([[0.16276041, 0.13416694], [0.22367628, 0.17556792]]), rtol=1e-5)
def test_hog_cell_size_32_dalaltriggs_4_orientations(): i = img.copy() output = hog(i, 32, n_orientations=4, variant='DalalTriggs') assert output.dtype == np.float32 assert output.shape == (16, 16, 16) assert_allclose(output[:2, :2, 0], np.array([[0.2, 0.2], [0.2, 0.2]]))
def test_hog_cell_size_32_dalaltriggs_bilinear_interpolation(): i = img.copy() output = hog(i, 32, variant='DalalTriggs', bilinear_interpolation=True) assert output.dtype == np.float32 assert output.shape == (16, 16, 36) assert_allclose(output[:2, -2:, 0], np.array([[0.01523, 0.017774], [0.012941, 0.012733]]), rtol=1e-4)
def test_hog_cell_size_32_dalaltriggs(bilinear_interpolation): des_gray = hog(img_gray, 32, variant='DalalTriggs', bilinear_interpolation=bilinear_interpolation) des_rgb = hog(img_rgb, 32, variant='DalalTriggs', bilinear_interpolation=bilinear_interpolation) if bilinear_interpolation: assert_allclose(hog_features['hog_des_gray_dalaltriggs_bilinear'], des_gray, rtol=1e-4) assert_allclose(hog_features['hog_des_rgb_dalaltriggs_bilinear'], des_rgb, rtol=1e-4) else: assert_allclose(hog_features['hog_des_gray_dalaltriggs'], des_gray, rtol=1e-4) assert_allclose(hog_features['hog_des_rgb_dalaltriggs'], des_rgb, rtol=1e-4)
def get_positive_features(train_path_pos, feature_params): """ FUNC: This function should return all positive training examples (faces) from 36x36 images in 'train_path_pos'. Each face should be converted into a HoG template according to 'feature_params'. For improved performances, try mirroring or warping the positive training examples. ARG: - train_path_pos: a string; directory that contains 36x36 images of faces. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_pos: (N,D) ndarray; N is the number of faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. """ ######################################### ## you code here ## ######################################### # get image filename image_paths = glob(os.path.join(train_path_pos, '*.jpg')) # initialization hog_cell_size = feature_params['hog_cell_size'] N = len(image_paths) template_size = feature_params['template_size'] D = int(((template_size / hog_cell_size)**2) * 31) features_pos = np.zeros((N, D)) # hog for img_idx, img_name in enumerate(image_paths): hog_result = hog(imread(img_name), hog_cell_size) features_pos[img_idx, :] = np.reshape(hog_result, (1, D)) ######################################### ## you code here ## ######################################### return features_pos
def get_positive_features(train_path_pos, feature_params): """ FUNC: This function should return all positive training examples (faces) from 36x36 images in 'train_path_pos'. Each face should be converted into a HoG template according to 'feature_params'. For improved performances, try mirroring or warping the positive training examples. ARG: - train_path_pos: a string; directory that contains 36x36 images of faces. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_pos: (N,D) ndarray; N is the number of faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. """ ######################################### ## you code here ## ######################################### image_files=[f for f in listdir(train_path_pos) if f.endswith('.jpg')] num_img=len(image_files) # for k, v in feature_params.items(): # print(k,v,feature_params[k]) D=pow(feature_params['template_size']/feature_params['hog_cell_size'],2)*31 D=int(D) # print(D) features_pos=np.zeros((num_img,D)) for i in range(num_img): path=train_path_pos+'/'+image_files[i] # print(path) img=imread(path,as_grey=True) features_pos[i]=np.reshape(hog(img,feature_params['hog_cell_size']),(-1,)) ######################################### ## you code here ## ######################################### # print(len(features_pos),len(features_pos[0])) return features_pos
def get_positive_features(train_path_pos, feature_params): """ FUNC: This function should return all positive training examples (faces) from 36x36 images in 'train_path_pos'. Each face should be converted into a HoG template according to 'feature_params'. For improved performances, try mirroring or warping the positive training examples. ARG: - train_path_pos: a string; directory that contains 36x36 images of faces. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_pos: (N,D) ndarray; N is the number of faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. """ ######################################### ## you code here ## ######################################### template_size = feature_params['template_size'] hog_cell_size = feature_params['hog_cell_size'] features_pos = [] print('Load positive features') for image_path in tqdm(os.listdir(train_path_pos)): image_path = os.path.join(train_path_pos, image_path) im = imread(image_path) features_pos.append(np.reshape(hog(im, hog_cell_size), -1)) features_pos = np.array(features_pos) ######################################### ## you code here ## ######################################### return features_pos
def get_positive_features(train_path_pos, feature_params): """ FUNC: This function should return all positive training examples (faces) from 36x36 images in 'train_path_pos'. Each face should be converted into a HoG template according to 'feature_params'. For improved performances, try mirroring or warping the positive training examples. ARG: - train_path_pos: a string; directory that contains 36x36 images of faces. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_pos: (N,D) ndarray; N is the number of faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. """ ######################################### ## you code here ## ######################################### filenames = glob.glob(train_path_pos+'/*.jpg') dim = (feature_params['template_size'] / feature_params['hog_cell_size'])**2 * 31 features_pos = np.zeros(shape=(len(filenames), int(dim))) for idx in range(len(filenames)): img = imread(filenames[idx], as_grey=True) tmp = np.reshape(hog(img, feature_params['hog_cell_size']), (-1,)) features_pos[idx] = tmp ######################################### ## you code here ## ######################################### return features_pos
def test_hog_shape(cell_size, n_orientations, variant, visualize): output = hog(img_rgb, cell_size=cell_size, n_orientations=n_orientations, variant=variant, visualize=visualize) h = int((img_rgb.shape[0] + cell_size / 2) // cell_size) w = int((img_rgb.shape[1] + cell_size / 2) // cell_size) if visualize: viz_h = 21 * h viz_w = 21 * w if variant == 'UoCTTI': c = 4 + 3 * n_orientations else: c = 4 * n_orientations if visualize: assert output[0].dtype == np.float32 assert output[0].shape == (h, w, c) assert output[1].shape == (viz_h, viz_w) else: assert output.dtype == np.float32 assert output.shape == (h, w, c)
def run_detector(test_scn_path, model, feature_params): """ FUNC: This function returns detections on all of the images in a given path. You will want to use non-maximum suppression on your detections or your performance will be poor (the evaluation counts a duplicate detection as wrong). The non-maximum suppression is done on a per-image basis. The starter code includes a call to a provided non-max suppression function. ARG: - test_scn_path: a string; This directory contains images which may or may not have faces in them. This function should work for the MIT+CMU test set but also for any other images. - model: the linear classifier model - feature_params: a dict; 'template_size': the number of pixels spanned by each train / test template (probably 36). 'hog_cell_size': the number of pixels in each HoG cell (default 6). Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizes tend to work better, but they make things slower because the feature dimensionality increases and more importantly the step size of the classifier decreases at test time. - MORE... You can add additional arguments for advanced work like multi- scale, pixel shift and so on. RET: - bboxes: (N, 4) ndarray; N is the number of non-overlapping detections, bboxes[i,:] is [x_min, y_min, x_max, y_max] for detection i. - confidences: (N, 1) ndarray; confidences[i, :] is the real valued confidence of detection i. - image_ids: (N, 1) ndarray; image_ids[i, :] is the image file name for detection i. """ # The placeholder version of this code will return random bounding boxes in # each test image. It will even do non-maximum suppression on the random # bounding boxes to give you an example of how to call the function. # Your actual code should convert each test image to HoG feature space with # a _single_ call to vl_hog for each scale. Then step over the HoG cells, # taking groups of cells that are the same size as your learned template, # and classifying them. If the classification is above some confidence, # keep the detection and then pass all the detections for an image to # non-maximum suppression. For your initial debugging, you can operate only # at a single scale and you can skip calling non-maximum suppression. test_images = os.listdir(test_scn_path) # print(test_images) # img_name=[f for f in test_images if f.endswith('.jpg')] # print(test_scn_path,img_name) # initialize these as empty and incrementally expand them. bboxes = np.zeros([0, 4]) confidences = np.zeros([0, 1]) image_ids = np.zeros([0, 1]) cell_size = feature_params['hog_cell_size'] cell_num = int( feature_params['template_size'] / feature_params['hog_cell_size']) # cell number of each template for i in range(len(test_images)): ######################################### ## you code here ## ######################################### path = test_scn_path + '/' + test_images[i] # img=np.array(imread(path)).astype('float32') # dimension=len(img.shape) # print(dimension) # if dimension>2: # img=color.rbg2gray(img) img = imread(path, as_grey=True) mindim = min(len(img), len(img[0])) count = 1 cur_bboxes = np.zeros([0, 4]) cur_image_ids = np.zeros([0, 1]) cur_confidences = np.zeros([0, 1]) while count * mindim > feature_params['template_size']: frame = resize(img, [int(len(img) * count), int(len(img[0]) * count)]) # print('82!!!!!!!!!!!!!!!!!!!!!!!!!!!!') hogged = hog(frame, cell_size) # print('84!!!!!!!!!!!') D = pow(cell_num, 2) * 31 D = int(D) # print(len(hogged)-cell_size,len(hogged[0])-cell_size) for k in range(int(len(hogged) - cell_num + 1)): for j in range(int(len(hogged[0]) - cell_num + 1)): curr_y_min = k curr_x_min = j mini_hog = hogged[curr_y_min:curr_y_min + cell_num, curr_x_min:curr_x_min + cell_num] bfeat = np.reshape(mini_hog, (1, -1)) # score=model.predict(bfeat) tmp_score = np.reshape(model.decision_function(bfeat), (1, -1)) if tmp_score[0, 0] > 0: # hog_bbox=[curr_x_min,curr_y_min,curr_x_min+6,curr_y_min+6] # cur_bboxes.append([h*cellsize for h in hog_bbox]) # cur_confidences.append(score) # cur_image_ids.append(test_images[i]) # count+=1 rowS = int(j * cell_size / count) rowE = int((j + cell_num) * cell_size / count) colS = int(k * cell_size / count) colE = int((k + cell_num) * cell_size / count) cur_bboxes = np.concatenate( [cur_bboxes, np.array([[rowS, colS, rowE, colE]])], axis=0) cur_image_ids = np.concatenate( [cur_image_ids, [[test_images[i]]]], axis=0) cur_confidences = np.concatenate( [cur_confidences, tmp_score], axis=0) # if count==1: # continue count *= 0.9 ######################################### ## you code here ## ######################################### # non_max_supr_bbox can actually get somewhat slow with thousands of # initial detections. You could pre-filter the detections by confidence, # e.g. a detection with confidence -1.1 will probably never be # meaningful. You probably _don't_ want to threshold at 0.0, though. You # can get higher recall with a lower threshold. You don't need to modify # anything in non_max_supr_bbox, but you can. is_maximum = non_max_supr_bbox(cur_bboxes, cur_confidences, img.shape) cur_bboxes = cur_bboxes[is_maximum[:, 0], :] cur_confidences = cur_confidences[is_maximum[:, 0], :] cur_image_ids = cur_image_ids[is_maximum[:, 0]] bboxes = np.concatenate([bboxes, cur_bboxes], 0) confidences = np.concatenate([confidences, cur_confidences], 0) image_ids = np.concatenate([image_ids, cur_image_ids], 0) return bboxes, confidences, image_ids
top = 0 else: top = d.top() if d.left() < 0: left = 0 else: left = d.left() d = dlib.rectangle(left, top, right, bottom) w = d.right() - d.left() h = d.bottom() - d.top() crop = image[d.top():d.top() + h, d.left():d.left() + w] frame = cv2.rectangle(frame, (d.left(), d.top()), (d.right(), d.bottom()), (0, 0, 255), 2) crop = cv2.resize(crop, (face_width, face_height), interpolation=cv2.INTER_CUBIC) hog_image = hog(crop, cell) landmarks_vectorised = landmark_feats( crop, dlib.rectangle(0, 0, face_width - 1, face_height - 1), predictor, width, height) hog_feats = np.reshape(hog_image, [1, hog_d]) feats = np.concatenate([landmarks_vectorised, hog_feats], axis=1) conf = clf.predict_proba(feats) tmp_conf = conf[0, :].copy() idx = np.argmax(tmp_conf) if conf[0, idx] >= THRESH1: text = emotions[idx] else: text = emotions[-1] #tmp_conf[idx] = -1 cv2.putText(frame, text, (d.left(), d.top()), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
def get_random_negative_features(non_face_scn_path, feature_params, num_samples): ''' FUNC: This funciton should return negative training examples (non-faces) from any images in 'non_face_scn_path'. Images should be converted to grayscale, because the positive training data is only available in grayscale. For best performance, you should sample random negative examples at multiple scales. ARG: - non_face_scn_path: a string; directory contains many images which have no faces in them. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_neg: (N,D) ndarray; N is the number of non-faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. - neg_examples: TODO ''' ######################################### ## you code here ## ######################################### print("START: get_random_negative_features") temp_size = feature_params['template_size'] cell_size = feature_params['hog_cell_size'] features_neg = [] i = 0 while i < num_samples: for filename in glob.glob(non_face_scn_path + '/*.jpg'): if i >= num_samples: break img = imread(filename) img = color.rgb2gray(img) # img = resize(img, (temp_size, temp_size)) sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. seq = iaa.Sequential( [ # apply the following augmenters to most images iaa.Fliplr(0.5), # horizontally flip 50% of all images iaa.Flipud(0.5), # vertically flip 50% of all images # crop images by -5% to 10% of their height/width sometimes( iaa.CropAndPad(percent=(-0.05, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255))), sometimes( iaa.Affine( scale={ "x": (0.8, 1.2), "y": (0.8, 1.2) }, # scale images to 80-120% of their size, individually per axis translate_percent={ "x": (-0.2, 0.2), "y": (-0.2, 0.2) }, # translate by -20 to +20 percent (per axis) rotate=(-45, 45), # rotate by -45 to +45 degrees shear=(-16, 16), # shear by -16 to +16 degrees order=[ 0, 1 ], # use nearest neighbour or bilinear interpolation (fast) cval=( 0, 255 ), # if mode is constant, use a cval between 0 and 255 mode=ia. ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples) )), ], random_order=True) seq_det = seq.to_deterministic() img_aug = seq_det.augment_images([img])[0] rand_x = np.random.randint(img_aug.shape[0] - temp_size) rand_y = np.random.randint(img_aug.shape[1] - temp_size) img_aug = img_aug[rand_x:rand_x + temp_size, rand_y:rand_y + temp_size] hog_features = hog(img_aug, cell_size=cell_size) hog_features = hog_features.flatten() features_neg.append(hog_features) i += 1 features_neg = np.asarray(features_neg) neg_examples = num_samples print("DONE: get_random_negative_features") print("Shape:", features_neg.shape) ######################################### ## you code here ## ######################################### return features_neg, neg_examples
def get_data_fer2013(clahe, detector, predictor, selected_label, save_images): OUTPUT_FOLDER_NAME = FER2013.name image_height = FER2013.height image_width = FER2013.width cell = FER2013.cell print("importing csv file") data = pd.read_csv(OUTPUT_FOLDER_NAME + '/fer2013.csv') for category in data['Usage'].unique(): print("converting fer2013 set: " + category + "...") # create folder if not os.path.exists(category): try: os.makedirs(OUTPUT_FOLDER_NAME + '/' + category) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir( OUTPUT_FOLDER_NAME): pass else: raise # get samples and labels of the actual category category_data = data[data['Usage'] == category] samples = category_data['pixels'].values labels = category_data['emotion'].values # initialize feats_data = np.zeros([0, 274]) feats_labels = [] hog_d = int((image_height / cell) * (image_width / cell) * 31) hog_feats = np.zeros([0, hog_d]) # get images and extract features for i in range(len(samples)): if labels[i] in selected_label: image = np.fromstring(samples[i], dtype=np.uint8, sep=" ").reshape( (image_height, image_width)) # save images if save_images: cv2.imwrite( OUTPUT_FOLDER_NAME + '/' + category + '/' + str(i) + '.jpg', image) # image pre-processing image = cv2.imread(OUTPUT_FOLDER_NAME + '/' + category + '/' + str(i) + '.jpg') image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = clahe.apply(image) # detect the face detections = detector(image, 1) if len(detections) < 1: continue # extract HOG features hog_image = hog(image, cell) #for all detected face for k, d in enumerate(detections): landmarks_vectorised = landmark_feats( image, d, predictor, FER2013.width, FER2013.height) # concat feature feats_data = np.concatenate( [feats_data, landmarks_vectorised], axis=0) hog_feats = np.concatenate( [hog_feats, np.reshape(hog_image, [1, hog_d])], axis=0) feats_labels.append(labels[i]) print("saving features and labels...") with open(OUTPUT_FOLDER_NAME + '/' + category + '/landmarks_feats.pkl', 'wb') as f: pickle.dump(feats_data, f) with open(OUTPUT_FOLDER_NAME + '/' + category + '/hog_feats.pkl', 'wb') as f: pickle.dump(hog_feats, f) with open(OUTPUT_FOLDER_NAME + '/' + category + '/labels.pkl', 'wb') as f: pickle.dump(feats_labels, f)
def run_detector(test_scn_path, model, feature_params): """ FUNC: This function returns detections on all of the images in a given path. You will want to use non-maximum suppression on your detections or your performance will be poor (the evaluation counts a duplicate detection as wrong). The non-maximum suppression is done on a per-image basis. The starter code includes a call to a provided non-max suppression function. ARG: - test_scn_path: a string; This directory contains images which may or may not have faces in them. This function should work for the MIT+CMU test set but also for any other images. - model: the linear classifier model - feature_params: a dict; 'template_size': the number of pixels spanned by each train / test template (probably 36). 'hog_cell_size': the number of pixels in each HoG cell (default 6). Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizes tend to work better, but they make things slower because the feature dimensionality increases and more importantly the step size of the classifier decreases at test time. - MORE... You can add additional arguments for advanced work like multi- scale, pixel shift and so on. RET: - bboxes: (N, 4) ndarray; N is the number of non-overlapping detections, bboxes[i,:] is [x_min, y_min, x_max, y_max] for detection i. - confidences: (N, 1) ndarray; confidences[i, :] is the real valued confidence of detection i. - image_ids: (N, 1) ndarray; image_ids[i, :] is the image file name for detection i. """ # The placeholder version of this code will return random bounding boxes in # each test image. It will even do non-maximum suppression on the random # bounding boxes to give you an example of how to call the function. # Your actual code should convert each test image to HoG feature space with # a _single_ call to vl_hog for each scale. Then step over the HoG cells, # taking groups of cells that are the same size as your learned template, # and classifying them. If the classification is above some confidence, # keep the detection and then pass all the detections for an image to # non-maximum suppression. For your initial debugging, you can operate only # at a single scale and you can skip calling non-maximum suppression. test_images = os.listdir(test_scn_path) # initialize these as empty and incrementally expand them. bboxes = np.zeros([0, 4]) confidences = np.zeros([0, 1]) image_ids = np.zeros([0, 1]) cell_size = feature_params['hog_cell_size'] cell_num = int( feature_params['template_size'] / feature_params['hog_cell_size']) # cell number of each template for i in tqdm(range(len(test_images))): ######################################### ## you code here ## ######################################### cur_bboxes = np.zeros([0, 4]) cur_confidences = np.zeros([0, 1]) cur_image_ids = np.zeros([0, 1]) image_path = os.path.join(test_scn_path, test_images[i]) img = imread(image_path, as_grey=True) scale_arr = np.arange(1.0, 0, -0.1) for scale in scale_arr: img_resize = resize( img, [int(img.shape[0] * scale), int(img.shape[1] * scale)]) test_image_hog = hog(img_resize, cell_size) for h in range(test_image_hog.shape[0] - cell_num + 1): for w in range(test_image_hog.shape[1] - cell_num + 1): hog_window = np.reshape( test_image_hog[h:h + cell_num, w:w + cell_num, :], (1, -1)) score = model.decision_function(hog_window) if score > -0.5: min_y = int(h * cell_size / scale) min_x = int(w * cell_size / scale) max_y = int((h + cell_num) * cell_size / scale) max_x = int((w + cell_num) * cell_size / scale) cur_confidence = np.array([score]) cur_image_id = np.array([[test_images[i]]]) cur_bbox = np.array([[min_x, min_y, max_x, max_y]]) cur_bboxes = np.concatenate([cur_bboxes, cur_bbox], 0) cur_confidences = np.concatenate( [cur_confidences, cur_confidence], 0) cur_image_ids = np.concatenate( [cur_image_ids, cur_image_id], 0) ######################################### ## you code here ## ######################################### # non_max_supr_bbox can actually get somewhat slow with thousands of # initial detections. You could pre-filter the detections by confidence, # e.g. a detection with confidence -1.1 will probably never be # meaningful. You probably _don't_ want to threshold at 0.0, though. You # can get higher recall with a lower threshold. You don't need to modify # anything in non_max_supr_bbox, but you can. is_maximum = non_max_supr_bbox(cur_bboxes, cur_confidences, img.shape) cur_bboxes = cur_bboxes[is_maximum[:, 0], :] cur_confidences = cur_confidences[is_maximum[:, 0], :] cur_image_ids = cur_image_ids[is_maximum[:, 0]] bboxes = np.concatenate([bboxes, cur_bboxes], 0) confidences = np.concatenate([confidences, cur_confidences], 0) image_ids = np.concatenate([image_ids, cur_image_ids], 0) return bboxes, confidences, image_ids
def get_random_negative_features(non_face_scn_path, feature_params, num_samples): ''' FUNC: This funciton should return negative training examples (non-faces) from any images in 'non_face_scn_path'. Images should be converted to grayscale, because the positive training data is only available in grayscale. For best performance, you should sample random negative examples at multiple scales. ARG: - non_face_scn_path: a string; directory contains many images which have no faces in them. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_neg: (N,D) ndarray; N is the number of non-faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. - neg_examples: TODO ''' ######################################### ## you code here ## ######################################### template_size = feature_params['template_size'] hog_cell_size = feature_params['hog_cell_size'] features_neg = [] images_path = [] print('Load negative features') for image_path in os.listdir(non_face_scn_path): if image_path.find('.jpg') != -1: image_path = os.path.join(non_face_scn_path, image_path) images_path.append(image_path) for image_path in tqdm(images_path): im = imread(image_path, as_grey=True) im_pyramids = tuple(pyramid_gaussian(im)) for im_pyramid in im_pyramids[:1]: num_sample_per_image = math.ceil(num_samples / len(images_path)) if min(im_pyramid.shape[0], im_pyramid.shape[1]) <= template_size: break elif min(im_pyramid.shape[0], im_pyramid.shape[1] ) < template_size + num_sample_per_image: num_sample_per_image = min(im_pyramid.shape[0], im_pyramid.shape[1]) - template_size height_list = np.random.choice(im_pyramid.shape[0] - template_size, num_sample_per_image, replace=False) weight_list = np.random.choice(im_pyramid.shape[1] - template_size, num_sample_per_image, replace=False) for height, weight in zip(height_list, weight_list): features_neg.append( np.reshape( hog( im_pyramid[height:height + template_size, weight:weight + template_size], hog_cell_size), -1)) features_neg = np.array(features_neg) print('Number of training examples: {0}'.format(len(features_neg))) neg_examples = len(features_neg) ######################################### ## you code here ## ######################################### return features_neg, neg_examples
def get_random_negative_features(non_face_scn_path, feature_params, num_samples): ''' FUNC: This funciton should return negative training examples (non-faces) from any images in 'non_face_scn_path'. Images should be converted to grayscale, because the positive training data is only available in grayscale. For best performance, you should sample random negative examples at multiple scales. ARG: - non_face_scn_path: a string; directory contains many images which have no faces in them. - feature_params: a dict; with keys, > template_size: int (probably 36); the number of pixels spanned by each train/test template. > hog_cell_size: int (default 6); the number of pixels in each HoG cell. Template size should be evenly divisible by hog_cell_size. Smaller HoG cell sizez tend to work better, but they make things slower because the feature dimenionality increases and more importantly the step size of the classifier decreases at test time. RET: - features_neg: (N,D) ndarray; N is the number of non-faces and D is the template dimensionality, which would be, (template_size/hog_cell_size)^2 * 31, if you're using default HoG parameters. - neg_examples: TODO ''' ######################################### ## you code here ## ######################################### image_files = [f for f in listdir(non_face_scn_path) if f.endswith('.jpg')] num_img = len(image_files) # print(num_sample_per_img,num_samples/num_img) D = pow(feature_params['template_size'] / feature_params['hog_cell_size'], 2) * 31 # print(D) D = int(D) all_images = np.zeros([0, D]) features_neg = np.zeros((num_samples, D)) smp_per_img = math.ceil(num_samples / num_img) i = 1 # for j in range(num_img): # path=non_face_scn_path+'/'+image_files[i] # img=np.array(color.rgb2gray(imread(path))).astype('float32') # height,width=len(img),len(img[0]) # # print(height,width,len(img),len(img[0])) # for j in range(num_sample_per_img): # top_left_x=math.floor(random.random()*(width-feat_size)) # top_left_y=math.floor(random.random()*(height-feat_size)) # index=i*num_sample_per_img+j # print(index,num_img,num_sample_per_img,i,j) # cropped=img[top_left_y:top_left_y+feat_size,top_left_x:top_left_x+feat_size] # features_neg[index]=np.reshape(hog(cropped,feature_params['hog_cell_size']),(1,D)) # neg_examples=len(features_neg) for j in range(num_img): path = non_face_scn_path + '/' + image_files[i] # all_images.append(np.array(imread(path)).astype('float32')) img = imread(path, as_grey=True) if min( len(img[0]) - feature_params['template_size'], len(img) - feature_params['template_size']) < smp_per_img: num_sampd = min( len(img[0]) - feature_params['template_size'], len(img) - feature_params['template_size']) else: num_sampd = smp_per_img # hogged=hog(all_images[j],feature_params['hog_cell_size']) # temp_height,temp_width=len(hogged),len(hogged[0]) idx_i = np.random.choice( np.arange(len(img) - feature_params['template_size']), num_sampd, replace=False) idx_j = np.random.choice( np.arange(len(img[0]) - feature_params['template_size']), num_sampd, replace=False) # for y in range(math.floor((temp_height-5)/freq)): # for x in range(math.floor((temp_width-5)/freq)): # xStart=x*freq # yStart=y*freq # xEnd=xStart+6 # yEnd=yStart+6 # frame=hogged[yStart:yEnd,xStart:xEnd] # features_neg[i]=np.reshape(frame,(1,D)) # num_sampd+=1 # if i==num_samples: # neg_examples=len(features_neg) # return features_neg, neg_examples # elif num_sampd>=num_samples/smp_per_img: # break # if num_sampd>=num_samples/smp_per_img: # break # if num_sampd>=num_samples/smp_per_img: # continue for x in range(num_sampd): portion = img[idx_i[x]:idx_i[x] + feature_params['template_size'], idx_j[x]:idx_j[x] + feature_params['template_size']] hogged = hog(portion, feature_params['hog_cell_size']) port = np.reshape(hogged, (1, -1)) all_images = np.concatenate([all_images, port], axis=0) ######################################### ## you code here ## ######################################### neg_examples = len(features_neg) features_neg = all_images[np.random.choice(np.arange(len(all_images)), size=(num_samples), replace=False)] return features_neg, neg_examples
def get_data_ckplus(clahe, detector, predictor, selected_label, save_images): OUTPUT_FOLDER_NAME = CKPLUS.name face_height = CKPLUS.face_height face_width = CKPLUS.face_width cell = CKPLUS.cell # initialize feats_data = np.zeros([0, 274]) feats_labels = [] hog_d = int((face_height / cell) * (face_width / cell) * 31) hog_feats = np.zeros([0, hog_d]) dirs = os.listdir(OUTPUT_FOLDER_NAME + '/emotion') print("converting CK+...") for dir in dirs: video_dirs = os.listdir(OUTPUT_FOLDER_NAME + '/emotion/' + dir) for video_dir in video_dirs: txt = glob(OUTPUT_FOLDER_NAME + '/emotion/' + dir + '/' + video_dir + '/*.txt') if not txt: continue name = txt[0].split('.') name = name[0].split('/') im_name = name[-1].split('_') im_name = im_name[0] + '_' + im_name[1] + '_' + im_name[2] with open(txt[0], 'r') as f: label = f.read() print(label) label = label.split('.') label = label[0].split(' ') label = int(label[-1]) if label not in selected_label: continue # label transform to match fer2013 if label == 0: #neutral label = 6 elif label == 1: #anger label = 0 elif label == 3: # disgust label = 1 elif label == 4: #fear label = 2 elif label == 5: #happy label = 3 elif label == 6: #sad label = 4 elif label == 7: #surprise label = 5 image = cv2.imread(name[0] + '/image/' + name[2] + '/' + name[3] + '/' + im_name + '.png') if image.shape[0:1] != (CKPLUS.height, CKPLUS.width): image = cv2.resize(image, (CKPLUS.width, CKPLUS.height)) #pdb.set_trace() #print(image.shape) image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = clahe.apply(image) # detect the face detections = detector(image, 1) if len(detections) < 1: continue # for all detected face for k, d in enumerate(detections): # box clipped if d.right() >= CKPLUS.width: right = CKPLUS.width - 1 else: right = d.right() if d.bottom() >= CKPLUS.height: bottom = CKPLUS.height - 1 else: bottom = d.bottom() if d.top() < 0: top = 0 else: top = d.top() if d.left() < 0: left = 0 else: left = d.left() d = dlib.rectangle(left, top, right, bottom) w = d.right() - d.left() h = d.bottom() - d.top() crop = image[d.top():d.top() + h, d.left():d.left() + w] crop = cv2.resize(crop, (face_width, face_height), interpolation=cv2.INTER_LINEAR) if save_images: if not os.path.exists(OUTPUT_FOLDER_NAME + '/face_image'): os.makedirs(OUTPUT_FOLDER_NAME + '/face_image') cv2.imwrite( OUTPUT_FOLDER_NAME + '/face_image/' + im_name + '.png', crop) # extract HOG features hog_image = hog(crop, cell) landmarks_vectorised = landmark_feats(image, d, predictor, CKPLUS.width, CKPLUS.height) # concat feature feats_data = np.concatenate([feats_data, landmarks_vectorised], axis=0) hog_feats = np.concatenate( [hog_feats, np.reshape(hog_image, [1, hog_d])], axis=0) feats_labels.append(label) print("saving features and labels...") with open(OUTPUT_FOLDER_NAME + '/landmarks_feats.pkl', 'wb') as f: pickle.dump(feats_data, f) with open(OUTPUT_FOLDER_NAME + '/hog_feats.pkl', 'wb') as f: pickle.dump(hog_feats, f) with open(OUTPUT_FOLDER_NAME + '/labels.pkl', 'wb') as f: pickle.dump(feats_labels, f)
def test_hog_cell_size_32_uoctti(): des_gray = hog(img_gray, 32) des_rgb = hog(img_rgb, 32) assert_allclose(hog_features['hog_des_gray_uoctti'], des_gray, rtol=1e-3) assert_allclose(hog_features['hog_des_rgb_uoctti'], des_rgb, rtol=1e-3)