def loadDataSet(): if not os.path.exists('tmp.pkl'): samples = [] # 遍历数据集目录 for img_name in sorted(glob.glob('datasets/original/face/*.jpg')): # 读取正样本图像 print(img_name) img = np.array(Image.open(img_name).resize((24, 24)).convert("L")) # 提取NPD特征 features = NPDFeature(img).extract() sample = np.r_[features, 1] # 在正样本特征后面加一个Label为1 samples.append(sample) for img_name in sorted(glob.glob('datasets/original/nonface/*.jpg')): # 读取负样本图像 print(img_name) img = np.array(Image.open(img_name).resize((24, 24)).convert("L")) # 提取NPD特征 features = NPDFeature(img).extract() sample = np.r_[features, -1] # 在负样本特征后面加一个Label为-1 samples.append(sample) # 数据集打乱 random.shuffle(samples) dataset = np.array(samples) with open('tmp.pkl', 'wb') as output: pickle.dump(dataset, output, True) with open('tmp.pkl', 'rb') as input: dataset = pickle.load(input) print(dataset.shape) # 将数据集切分为训练集和验证集 X_train = dataset[:dataset.shape[0] * 3 // 4, :dataset.shape[1] - 1] y_train = dataset[:dataset.shape[0] * 3 // 4, dataset.shape[1] - 1] X_validation = dataset[dataset.shape[0] * 3 // 4:, :dataset.shape[1] - 1] y_validation = dataset[dataset.shape[0] * 3 // 4:, dataset.shape[1] - 1] return X_train, X_validation, y_train, y_validation
def getFeature(Path, savePath): for file in os.listdir(Path): file_abs_path = os.path.join(Path, file) if os.path.isdir(file_abs_path): getFeature(file_abs_path, savePath) if os.path.isfile(file_abs_path): if os.path.exists(file_abs_path): #using NPDFeature class to get feature im = np.array(Image.open(file_abs_path)) #print("image.shape = ", im.shape) #print("**8*************======", im) npdf = NPDFeature(im) pic_features = npdf.extract() print("pic_features", pic_features.shape) #to get path for saving features as files parent_path_name = os.path.dirname(file_abs_path).split("/")[-1] save_path = os.path.join(savePath, parent_path_name) save_path = os.path.join(save_path, os.path.splitext(file)[0]) if not os.path.exists(save_path): #dump to file output = open(save_path, "wb") PROROCOL = 0 pickle.dump(pic_features, output, PROROCOL)
def extract_to_list(path_from): result = [] for f in os.listdir(path_from): i = Image.open(path_from + "/" + f) im_array = np.array(i) npd = NPDFeature(im_array) features = npd.extract() result.append(features) return result
def get_npdArray_from_diskImg(pathDir): list = [] file_names = os.listdir(pathDir) for file_name in file_names: img = Image.open('%s%s' % (pathDir, file_name)) img = img.resize((24, 24)) img = np.array(img.convert('L')) npdFeature = NPDFeature(img) npdArray = npdFeature.extract() list.append(npdArray) faces_npdArray = np.array(list) return faces_npdArray
def exact_nonface(): #处理nonface i=0; feature=[] label=[] while i<500: img_wait_deal=np.array(Image.open("/home/kodgv/第三次实验/ML2017-lab-03/huidutu/non" + str(i) + ".jpg")) NPD=NPDFeature(img_wait_deal) label.append(-1) feature.append(list(NPD.extract())) i+=1 np.save("nonfeature.npy",np.array(feature)) np.save("nonlabel.npy",np.array(label).reshape(1,len(label)))
def get_feature(path): features = numpy.array([]) files = os.listdir(path) for k in range(len(files)): im = Image.open(path + files[k]) image = numpy.ones(shape=(24, 24), dtype=int) for i in range(24): for j in range(24): image[i][j] = im.getpixel((i, j)) NPDFeature1 = NPDFeature(image) feature = NPDFeature1.extract() features = numpy.concatenate((features, feature)) return features
def npd_features(imgs): features = np.empty(shape=(0, SCALE_SIZE[0] * (SCALE_SIZE[0] - 1) // 2)) for i in range(len(imgs)): feature = NPDFeature((imgs[i] * 255).astype(np.int8)).extract() features = np.vstack((features, feature)) print(features.shape) return features
def extract_fea(img_dirs, img_labels, store_name): ''' 预处理阶段,处理为24*24,灰度图 ''' fea_list = [] for i in range(len(img_dirs)): temp_img = io.imread(img_dirs[i]) temp_gray_img = color.rgb2gray(temp_img) temp_resized_img = transform.resize(temp_gray_img, (24, 24)) temp_resized_img = img_as_ubyte(temp_resized_img) #提取特征 npd_fea = NPDFeature(temp_resized_img) temp_fea = npd_fea.extract() temp_label = img_labels[i] fea_list.append((temp_fea, temp_label)) o_file = open(store_name, 'wb') pickle.dump(fea_list, o_file, -1) o_file.close()
def getNPDFeature(imageList): ''' 函数作用:提取灰度格式图片的NPD特征 :param imageList:列表,元素是灰度格式图片 :return:列表,元素是每个图片的NPD特征 ''' #temp = NPDFeature(imageList) featureList = [NPDFeature(im).extract() for im in imageList] return featureList
def get_grayscale(path, x=None, y=None): for file in os.listdir(path): file_path = os.path.join(path, file) labels = None if path == 'datasets/original/face/': labels = [1] elif path == 'datasets/original/nonface/': labels = [-1] image = Image.open(file_path).convert('L').resize( (24, 24)) #将rgb图片转化成灰度图 if (x is None) & (y is None): x = np.array([NPDFeature(np.asarray(image)).extract()]) y = np.array([labels]) else: #把脸的和非脸的图片拼接起来 x = np.vstack((x, NPDFeature(np.asarray(image)).extract())) y = np.vstack((y, labels)) return x, y
def to_gray_resize(path, x=None, y=None): for file in os.listdir(path): file_path = os.path.join(path, file) label = None if path == 'datasets/original/face/': label = [1] elif path == 'datasets/original/nonface/': label = [-1] image = Image.open(file_path).convert('L').resize( (24, 24)) #打开图片并转换成灰度图 if (x is None) & (y is None): #不能使用==:==是element-wise的 x = np.array([NPDFeature(np.asarray(image)).extract()]) y = np.array([label]) else: #把是脸的和非脸的图片都拼接起来 x = np.vstack((x, NPDFeature( np.asarray(image)).extract())) #与np.concat([x1, x2], axis=0)等价 y = np.vstack((y, label)) return x, y
def convert_images(path, label): imgs = os.listdir(path) X = [] y = [] for img in imgs: img_path = path + "/" + img im = Image.open(img_path).convert("L").resize((24, 24)) X.append(np.array(NPDFeature(np.asarray(im)).extract())) y.append(label) X = np.array(X).reshape(len(imgs), -1) y = np.array(y).reshape(len(imgs), 1) return X, y
def pre_process(dir, file): features = np.array([]) for filename in os.listdir(dir): img = Image.open(os.path.join(dir, filename)) resize_img = img.resize((24, 24)) gray_img = np.array(resize_img.convert("L")) feature = NPDFeature(gray_img).extract() features = np.append(features, feature).reshape(-1, 165600) print(features.shape) pass with open(file, "wb") as f: pickle.dump(features, f)
def get_data(): List = [] facelist = os.listdir('./datasets/original/face/') # for i in range(len(facelist)): location = './datasets/original/face/' + facelist[i] im = Image.open(location).convert('L') #读取图片,将全部图片转成大小为24*24的灰度图 array = np.array(im).astype(float) array = imresize( array, (24, 24)) # Convert between PIL image and NumPy ndarray #im2 = Image.fromarray(array) # Convert between PIL image and NumPy ndarray npdfeature = NPDFeature(array) #处理数据集数据,提取NPD特征。 feature = npdfeature.extract() List.append(feature) nonfacelist = os.listdir('./datasets/original/nonface/') # for i in range(len(nonfacelist)): location = './datasets/original/nonface/' + nonfacelist[i] im = Image.open(location).convert('L') #读取图片,将全部图片转成大小为24*24的灰度图 array = np.array(im).astype(float) array = imresize( array, (24, 24)) # Convert between PIL image and NumPy ndarray #im2 = Image.fromarray(array) # Convert between PIL image and NumPy ndarray npdfeature = NPDFeature(array) #处理数据集数据,提取NPD特征。 feature = npdfeature.extract() List.append(feature) file = open('feature.txt', 'wb') # 用pickle库中的dump()函数将预处理后的特征数据保存到缓存中 pickle.dump(List, file) file.close()
def mk_dataset(): face_dir = "/Users/zoushuai/Python/lab/datasets/original/face" nonface_dir = "/Users/zoushuai/Python/lab/datasets/original/nonface" face_list = resize_image(face_dir) nonface_list = resize_image(nonface_dir) train_set = face_list[0:250] train_set.extend( nonface_list[0:250]) # trainset contains 250 faces and 250 nonfaces train_set = np.array(train_set) validate_set = face_list[250:500] validate_set.extend(nonface_list[250:500] ) # validateset contains 250 faces and 250 nonfaces validate_set = np.array(validate_set) train_img2feature_list = [] validate_img2feature_list = [] for i in range(500): npdFeature_train = NPDFeature(train_set[i]) train_img2feature_list.append(npdFeature_train.extract()) npdFeature_validate = NPDFeature(validate_set[i]) validate_img2feature_list.append(npdFeature_validate.extract()) train_img2feature_list = np.array(train_img2feature_list) validate_img2feature_list = np.array(validate_img2feature_list) AdaBoostClassifier.save(train_img2feature_list, 'train') AdaBoostClassifier.save(validate_img2feature_list, 'validate')
def preprocess_data(dataset): '''convert image to greyscale,resize to (24,24) and extract NPD feature''' # feature size: 165600 num_feature = 165600 num_sample = dataset.shape[0] dataset_processed = np.ndarray(shape=(num_sample, num_feature), dtype=np.float32) for i in range(num_sample): img_grey = convert_to_grey(dataset[i]) img_grey_resize = imresize(img_grey, (24,24)) npdfeature = NPDFeature(img_grey_resize).extract() dataset_processed[i] = npdfeature return dataset_processed
def extract_feature(img_path, feature_path): assert os.path.exists(img_path), "image path is not exist" sum = 0 # 特征总数 fds = [] # 特征列表 for childDir in os.listdir(img_path): f = os.path.join(img_path, childDir) pil_im = Image.open(f).convert('L').resize((24, 24), Image.ANTIALIAS) im = np.array(pil_im) im_feat = NPDFeature(im).extract() fds.append(im_feat) sum += 1 joblib.dump(fds, feature_path) print("%d sample features are extracted and saved." % sum) return fds
def read_data(): X = [] y = [] for i in range(500): path = get_i_face_image_path(i) image = Image.open(path) image = image.convert('L') image = image.resize((16, 16)) X.append(image) y.append(1) for i in range(500): path = get_i_nonface_image_path(i) image = Image.open(path) image = image.convert('L') image = image.resize((16, 16)) X.append(image) y.append(-1) feature = [] for i in tqdm(range(len(X)), desc='pre_train', leave=True): array_image = np.array(X[i]) fea = NPDFeature(array_image) feature.append(fea.extract()) return feature, y
def load_data(): face_path = u'C:/Users/47864/Desktop/Data/datasets/original/face' nonface_path = u'C:/Users/47864/Desktop/Data/datasets/original/nonface' face_image = os.listdir(face_path) nonface_image = os.listdir(nonface_path) num_face_image = len(face_image) num_nonface_image = len(nonface_image) dataset = [] for i in range(num_face_image): img = Image.open(face_path + '/' + face_image[i]) img = img.convert('L') img = img.resize((24, 24), Image.ANTIALIAS) img = NPDFeature(np.array(img)) dataset.append(np.concatenate((img.extract(), np.array([1])))) for i in range(num_nonface_image): img = Image.open(nonface_path + '/' + nonface_image[i]) img = img.convert('L') img = img.resize((24, 24), Image.ANTIALIAS) img = NPDFeature(np.array(img)) dataset.append(np.concatenate((img.extract(), np.array([-1])))) return dataset
def readimg(): currentpath1='./datasets/original/face/face_' currentpath2='./datasets/original/nonface/nonface_' for i in range(0,500): img_face = mpimg.imread(currentpath1+"{:0>3d}".format(i)+".jpg") img_face_=rgb2gray(img_face) f=NPDFeature(img_face_) feature_=f.extract() feature.append(feature_) label.append(1) for i in range(0,500): img_nonface=mpimg.imread(currentpath2+"{:0>3d}".format(i)+".jpg") img_nonface_=rgb2gray(img_nonface) f=NPDFeature(img_nonface_) feature_=f.extract() feature.append(feature_) label.append(-1)
def preprocess_image(): os.makedirs("datasets\\features") # preprocess face images for i in range(500): # set the fetch address fetch_address = "datasets\\original\\face\\face_%03d.jpg" % i # read jpg file # im = matplotlib.image.imread(fetch_address) # transform to the 24*24 gray image # xi = transform_image(im) # create a NPDFeature class xi = Image.open(fetch_address).convert('L').resize((24, 24)) xi = np.array(xi) npd = NPDFeature(xi) # extract feature feature = npd.extract() # set the filename filename = "datasets\\features\\face%03d.pickle" % i # save the feature in pickle file save_data(filename, feature) # preprocess nonface images for i in range(500): # set the fetch address fetch_address = "datasets\\original\\nonface\\nonface_%03d.jpg" % i # read jpg file # im = matplotlib.image.imread(fetch_address) # transform to the 24*24 gray image # xi = transform_image(im) # create a NPDFeature class xi = Image.open(fetch_address).convert('L').resize((24, 24)) xi = np.array(xi) npd = NPDFeature(xi) # extract feature feature = npd.extract() # set the filename filename = "datasets\\features\\nonface%03d.pickle" % i # save the feature in pickle file save_data(filename, feature)
def Feature_extract(): #提取特征 face_path = '.\\datasets\\original\\face\\face_%03d.jpg' faces_path = [] for i in range(500): faces_path.append(face_path % i) nonface_path = '.\\datasets\\original\\nonface\\nonface_%03d.jpg' nonfaces_path = [] for i in range(500): nonfaces_path.append(nonface_path % i) train = np.zeros((1000, 165600)) for i in range(500): img = Image.open(faces_path[i]) img = img.convert('L').resize((24, 24)) nf = NPDFeature(np.array(img)) train[i * 2] = nf.extract() img = Image.open(nonfaces_path[i]) img = img.convert('L').resize((24, 24)) nf = NPDFeature(np.array(img)) train[i * 2 + 1] = nf.extract() AdaBoostClassifier.save(train, 'train.txt')
def getFeature(image): NPD = NPDFeature(image) return NPD
nonfacepath = 'datasets/original/nonface' face = [] nonface = [] #for each image, convert it into grayscale presentation #scale to 24x24 #and extract its NPD feature facedir = os.listdir(facepath) for i in range(0, len(facedir)): if facedir[i].endswith('jpg'): path = os.path.join(facepath, facedir[i]) img = mpimg.imread(path) img = rgb2gray(img) img = misc.imresize(img, [24, 24]) face.append(NPDFeature(img).extract()) nonfacedir = os.listdir(nonfacepath) for i in range(0, len(nonfacedir)): if nonfacedir[i].endswith('jpg'): path = os.path.join(nonfacepath, nonfacedir[i]) img = mpimg.imread(path) img = rgb2gray(img) img = misc.imresize(img, [24, 24]) nonface.append(NPDFeature(img).extract()) X = np.array(face + nonface) y = np.ones([1000]) y[500:999] = -1 X_train, X_vali, y_train, y_vali = train_test_split(X,
def extra_img_features(): for i in range(0, len(img)): f = NPDFeature(img[i]) features = f.extract() img_features.append(features)
n_face = 0 n_noface = 0 for file in os.listdir(pos_img): n_face+=1 for file in os.listdir(neg_img): n_noface+=1 pos_ds = [] n=0 for file in os.listdir(pos_img): n+=1 print("Face: %4d / %4d" %(n, n_face),end='') sys.stdout.write('\r') img = cv2.imread(os.path.join(pos_img,file),cv2.IMREAD_GRAYSCALE) # load image file in Gray_mode img = cv2.resize(img,(24,24)) feature = NPDFeature(img).extract() pos_ds.append(feature) pos_ds = np.asarray(pos_ds) print(pos_ds.shape) neg_ds = [] n=0 for file in os.listdir(neg_img): n += 1 print("No Face: %4d / %4d" %(n, n_noface),end='') sys.stdout.write('\r') img = cv2.imread(os.path.join(neg_img,file),cv2.IMREAD_GRAYSCALE) img = cv2.resize(img,(24,24)) feature = NPDFeature(img).extract() neg_ds.append(feature) neg_ds = np.asarray(neg_ds)
from PIL import Image import pylab import numpy as np import os from feature import NPDFeature import pickle def save(model, filename): with open(filename, "wb") as f: pickle.dump(model, f) pos_img_path = './datasets/original/nonface/' #pos_img_path = '.\\datasets\\original\\nonface' #pos_img_path = '/Users/limuyi/Downloads/study/大三上/机器学习/6班谭明奎全英班/机器学习实验/机器学习实验内容/3AdaBoost人脸分类/ML2017-lab-03/datasets/original/nonface' features_face = [] pos_list_dir = os.listdir(pos_img_path) count = 1 for filename in pos_list_dir: im = Image.open(pos_img_path + filename).convert('L').resize((24, 24)) im_array = np.array(im) im_feature = NPDFeature(im_array).extract() print('pos:', count) count += 1 features_face.append(im_feature) save(features_face, 'features_nonface')
def get_features(img): img = resize_image(img) features = NPDFeature(img).extract() return features
from sklearn.model_selection import train_test_split from ensemble import AdaBoostClassifier from feature import NPDFeature from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report if __name__ == "__main__": # write your code here X = [] y = [] for i in range(0, 500): path = "C:\\Users\\Administrator\\Desktop\\ML2017-lab-03\\datasets\\original\\face\\face_%.3d.jpg" % ( i) img = Image.open(path).convert('L').resize((24, 24)) X.append(NPDFeature(np.array(img)).extract()) y.append(1) print(i) for i in range(0, 500): path = "C:\\Users\\Administrator\\Desktop\\ML2017-lab-03\\datasets\\original\\nonface\\nonface_%.3d.jpg" % ( i) img = Image.open(path).convert('L').resize((24, 24)) X.append(NPDFeature(np.array(img)).extract()) y.append(-1) print(i) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
def npd_feature(): for i in range(0, len(imgs)): print(i) features = NPDFeature(imgs[i]).extract() img_features.append(features)