from sklearn.preprocessing import StandardScaler basedir = 'SimpleImageDataset/' haralick, labels, chistogram = [], [], [] print( 'This script will test (with cross-validation) classification of the simple 3 class dataset' ) print('Computing features...') # Use glob to get all the images images = glob('{}/*.jpg'.format(basedir)) for fname in sorted(images): imc = mh.imread(fname) haralick.append(texture(mh.colors.rgb2gray(imc))) chistogram.append(chist(imc)) labels.append(fname[:-len('xx.jpg')]) print('Finished computing features.') haralick = np.array(haralick) chistogram = np.array(chistogram) labels = np.array(labels) haralick_plus_chist = np.hstack([chistogram, haralick]) clf = Pipeline([('preproc', StandardScaler()), ('classifier', LogisticRegression())]) from sklearn import cross_validation cv = cross_validation.LeaveOneOut(len(images))
def chist_file(fname): from features import chist im = mh.imread(fname) return chist(im)
def features_for(im): from features import chist im = mh.imread(im) img = mh.colors.rgb2grey(im).astype(np.uint8) return np.concatenate([mh.features.haralick(img).ravel(), chist(im)])
haralicks = [] chists = [] print('Computing features...') # Use glob to get all the images images = glob('{}/*.jpg'.format(basedir)) # We sort the images to ensure that they are always processed in the same order # Otherwise, this would introduce some variation just based on the random # ordering that the filesystem uses images.sort() for fname in images: imc = mh.imread(fname) imc = imc[200:-200, 200:-200] haralicks.append(texture(mh.colors.rgb2grey(imc))) chists.append(chist(imc)) haralicks = np.array(haralicks) chists = np.array(chists) features = np.hstack([chists, haralicks]) print('Computing neighbors...') sc = StandardScaler() features = sc.fit_transform(features) #计算距离矩阵 dists = distance.squareform(distance.pdist(features)) print('Plotting...') fig, axes = plt.subplots(2, 9, figsize=(16, 8)) # Remove ticks from all subplots for ax in axes.flat:
.format(k, v.mean())) to_array = TaskGenerator(np.array) hstack = TaskGenerator(np.hstack) haralicks = [] chists = [] lbps = [] labels = [] # Use glob to get all the images images = glob('{0}/*.jpg'.format(basedir)) for fname in sorted(images): haralicks.append(compute_texture(fname)) chists.append(chist(fname)) lbps.append(compute_lbp(fname)) labels.append(fname[:-len('00.jpg')] ) # The class is encoded in the filename as xxxx00.jpg haralicks = to_array(haralicks) chists = to_array(chists) lbps = to_array(lbps) labels = to_array(labels) scores_base = accuracy(haralicks, labels) scores_chist = accuracy(chists, labels) scores_lbps = accuracy(lbps, labels) combined = hstack([chists, haralicks]) scores_combined = accuracy(combined, labels)
haralicks = [] labels = [] chists = [] print('This script will test (with cross-validation) classification of the simple 3 class dataset') print('Computing features...') # Use glob to get all the images images = glob('{}/*.jpg'.format(basedir)) # We sort the images to ensure that they are always processed in the same order # Otherwise, this would introduce some variation just based on the random # ordering that the filesystem uses for fname in sorted(images): imc = mh.imread(fname) haralicks.append(texture(mh.colors.rgb2grey(imc))) chists.append(chist(imc)) # Files are named like building00.jpg, scene23.jpg... labels.append(fname[:-len('xx.jpg')]) print('Finished computing features.') haralicks = np.array(haralicks) labels = np.array(labels) chists = np.array(chists) haralick_plus_chists = np.hstack([chists, haralicks]) # We use Logistic Regression because it achieves high accuracy on small(ish) datasets # Feel free to experiment with other classifiers