def compute_color_moments(img, x_window_size=100, y_window_size=100): x_num_windows = img.shape[0] / x_window_size y_num_windows = img.shape[1] / y_window_size x_splitted = np.split(img, x_num_windows) y_splitted = map(lambda x: np.asarray(np.split(x, y_num_windows, axis=1)), x_splitted) y_splitted_xsize_ysize = np.asarray(y_splitted) mean_Y = np.mean(y_splitted_xsize_ysize[:, :, :, :, 0], (2, 3), dtype=np.float32) mean_U = np.mean(y_splitted_xsize_ysize[:, :, :, :, 2], (2, 3), dtype=np.float32) mean_V = np.mean(y_splitted_xsize_ysize[:, :, :, :, 1], (2, 3), dtype=np.float32) std_Y = np.std(y_splitted_xsize_ysize[:, :, :, :, 0], (2, 3), dtype=np.float32) std_U = np.std(y_splitted_xsize_ysize[:, :, :, :, 1], (2, 3), dtype=np.float32) std_V = np.std(y_splitted_xsize_ysize[:, :, :, :, 2], (2, 3), dtype=np.float32) skew_Y = skew(y_splitted_xsize_ysize[:, :, :, :, 0].reshape(x_num_windows, y_num_windows, x_window_size * y_window_size), axis=2) skew_U = skew(y_splitted_xsize_ysize[:, :, :, :, 1].reshape(x_num_windows, y_num_windows, x_window_size * y_window_size), axis=2) skew_V = skew(y_splitted_xsize_ysize[:, :, :, :, 2].reshape(x_num_windows, y_num_windows, x_window_size * y_window_size), axis=2) all_features = np.dstack((mean_Y, std_Y, skew_Y, mean_U, std_U, skew_U, mean_V, std_V, skew_V)) return all_features.reshape(x_num_windows * y_num_windows * all_features.shape[2])
def test_skewness(self): y = mstats.skew(self.testmathworks) assert_almost_equal(y,-0.29322304336607,10) y = mstats.skew(self.testmathworks,bias=0) assert_almost_equal(y,-0.437111105023940,10) y = mstats.skew(self.testcase) assert_almost_equal(y,0.0,10)
def test_skewness(self): # sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5 y = mstats.skew(self.testmathworks) assert_almost_equal(y,-0.29322304336607,10) y = mstats.skew(self.testmathworks,bias=0) assert_almost_equal(y,-0.437111105023940,10) y = mstats.skew(self.testcase) assert_almost_equal(y,0.0,10)
def skew(self, i=-1, **kwargs): if i >= 0: a = self.masked_data(i) r = np.asscalar(mst.skew(a, **kwargs).data) else: r = [] for k in range(self.nmaps): a = self.masked_data(k) r.append(np.asscalar(mst.skew(a, **kwargs).data)) r = np.array(r) return r
def skewness_(self): """ Calculates the skewness of the image over the binarised segmentation :return: """ return mstats.skew(self.masked_img, 0)
def extractFeatV2(image_file): region_feat = extractFeatV1(image_file) image = imread(image_file, as_grey=True) image = image.copy() # global features idx = np.nonzero(255-image) nonzero = image[idx] global_feat = [ np.mean(nonzero), np.std(nonzero), kurtosis(nonzero), skew(nonzero), gini(nonzero,image_file), ] global_feat = np.asarray(global_feat, dtype='float32' ) # concat all the features image2 = mh.imread(image_file, as_grey=True) haralick = mh.features.haralick(image2, ignore_zeros=False, preserve_haralick_bug=False, compute_14th_feature=False) lbp = mh.features.lbp(image2, radius=20, points=7, ignore_zeros=False) pftas = mh.features.pftas(image2) zernike_moments = mh.features.zernike_moments(image2, radius=20, degree=8) #surf_feat = surf.surf(image2) haralick = np.reshape(haralick,(np.prod(haralick.shape))) #surf_feat = np.reshape(surf_feat,(np.prod(surf_feat.shape))) #mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments, surf_feat)) mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments)) feat = np.hstack((global_feat, region_feat, mh_feat)) return feat
def basicstats_calc(data): """ Calculate stats """ # Minimum, maximum, mean, std dev, median, median abs deviation # no samples, no samples in x dir, no samples in y dir, band stats = [] for i in data: srow = [] dtmp = i.data.compressed() srow.append(dtmp.min()) srow.append(dtmp.max()) srow.append(dtmp.mean()) srow.append(dtmp.std()) srow.append(np.median(dtmp)) srow.append(np.median(abs(dtmp - srow[-1]))) srow.append(i.data.size) srow.append(i.data.shape[1]) srow.append(i.data.shape[0]) srow.append(st.skew(dtmp)) srow.append(st.kurtosis(dtmp)) srow = np.array(srow).tolist() stats.append([i.dataid] + srow) bands = ['Data Column'] cols = ['Band', 'Minimum', 'Maximum', 'Mean', 'Std Dev', 'Median', 'Median Abs Dev', 'No Samples', 'No cols (samples in x-dir)', 'No rows (samples in y-dir)', 'Skewness', 'Kurtosis'] dattmp = [np.array(stats, dtype=object)] return bands, cols, dattmp
def CMForSingleImage(self, filename): # Computing feature descriptors for color moments for task 1. input_image = cv2.imread(filename) # converting the input image to yuv before computing image color moments yuv_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2YUV) windows_set = get_100_by_100_windows(yuv_image) y_channel_descriptor = [] u_channel_descriptor = [] v_channel_descriptor = [] for i in windows_set: y_channel = i[:, :, 0] u_channel = i[:, :, 1] v_channel = i[:, :, 2] # computing the mean(first moment) for each channel first_moment_y = np.mean(y_channel) first_moment_u = np.mean(u_channel) first_moment_v = np.mean(v_channel) # computing the standard deviation(second moment) for each channel second_moment_y = np.std(y_channel) second_moment_u = np.std(u_channel) second_moment_v = np.std(v_channel) # computing the skewness(third moment) for each channel third_moment_y = skew(y_channel, axis=None) third_moment_u = skew(u_channel, axis=None) third_moment_v = skew(v_channel, axis=None) # each of the moment value is rounded to three decimals. Easy to read np.around([0.37, 1.64], decimals=1) y_channel_descriptor.extend([ np.around(first_moment_y, 3), np.around(second_moment_y, 3), np.around(third_moment_y, 3) ]) u_channel_descriptor.extend([ np.around(first_moment_u, 3), np.around(second_moment_u, 3), np.around(third_moment_v, 3) ]) v_channel_descriptor.extend([ np.around(first_moment_v, 3), np.around(second_moment_v, 3), np.around(third_moment_v, 3) ]) return np.asarray(y_channel_descriptor + u_channel_descriptor + v_channel_descriptor)
def color_moments(input_image, filename): # converting the input image to yuv before computing image color moments yuv_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2YUV) windows_set = get_100_by_100_windows(yuv_image) y_channel_descriptor = [] u_channel_descriptor = [] v_channel_descriptor = [] for i in windows_set: y_channel = i[:, :, 0] u_channel = i[:, :, 1] v_channel = i[:, :, 2] # computing the mean(first moment) for each channel first_moment_y = np.mean(y_channel) first_moment_u = np.mean(u_channel) first_moment_v = np.mean(v_channel) # computing the standard deviation(second moment) for each channel second_moment_y = np.std(y_channel) second_moment_u = np.std(u_channel) second_moment_v = np.std(v_channel) # computing the skewness(third moment) for each channel third_moment_y = skew(y_channel, axis=None) third_moment_u = skew(u_channel, axis=None) third_moment_v = skew(v_channel, axis=None) # each of the moment value is rounded to three decimals. Easy to read y_channel_descriptor.extend([ round(first_moment_y, 3), round(second_moment_y, 3), round(third_moment_y, 3) ]) u_channel_descriptor.extend([ round(first_moment_u, 3), round(second_moment_u, 3), round(third_moment_u, 3) ]) v_channel_descriptor.extend([ round(first_moment_v, 3), round(second_moment_v, 3), round(third_moment_v, 3) ]) return [ filename ] + y_channel_descriptor + u_channel_descriptor + v_channel_descriptor
def __init__(self, stats=None): self._statsfn = { 'mean': lambda x_: x_.mean(), 'std': lambda x_: x_.std(), 'kurtosis': lambda x_: kurtosis(x_, axis=None, fisher=True), 'skewness': lambda x_: skew(x_, axis=None, bias=True) } if stats is None: self.stats = ['mean', 'std'] else: self.stats = [s.lower() for s in stats] for s in self.stats: if s not in self._statsfn: raise ValueError('Unknown summary statistic')
def basicstats_calc(data): """ Calculate statistics. Parameters ---------- data : PyGMI Data. PyGMI raster dataset. Returns ------- bands : list Band list, currently only 'Data Column' cols : list Columns for the table dattmp : list List of arrays containing statistics. """ # Minimum, maximum, mean, std dev, median, median abs deviation # no samples, no samples in x dir, no samples in y dir, band stats = [] for i in data: srow = [] dtmp = i.data.compressed() srow.append(dtmp.min()) srow.append(dtmp.max()) srow.append(dtmp.mean()) srow.append(dtmp.std()) srow.append(np.median(dtmp)) srow.append(np.median(abs(dtmp - srow[-1]))) srow.append(i.data.size) srow.append(i.data.shape[1]) srow.append(i.data.shape[0]) srow.append(st.skew(dtmp)) srow.append(st.kurtosis(dtmp)) srow = np.array(srow).tolist() stats.append([i.dataid] + srow) bands = ['Data Column'] cols = [ 'Band', 'Minimum', 'Maximum', 'Mean', 'Std Dev', 'Median', 'Median Abs Dev', 'No Samples', 'No cols (samples in x-dir)', 'No rows (samples in y-dir)', 'Skewness', 'Kurtosis' ] dattmp = [np.array(stats, dtype=object)] return bands, cols, dattmp
def get_features( f, dsift_learn=False, debug=False, tiny_img=False, tiny_grad=False, image_statistics=False, color_histogram=False, orientation_histogram=False, transform=0 ): img_color = cv2.imread( f ).astype('float') if transform == 1: img_color = img_color[::-1,:].copy() if transform == 2: img_color = img_color[:,::-1].copy() original_img = img_color.copy() # denoise img_color = cv2.GaussianBlur(img_color,(0,0),2.0) img = cv2.cvtColor( img_color.astype('uint8'), cv2.COLOR_BGR2GRAY ).astype('float') # TEDYouth 2013 - Filmed November 2013 - 6:43 # Henry Lin: What we can learn from galaxies far, far away # "I subtract away all of the starlight" #t = img[np.nonzero(img)].mean() #t = img.mean() #t = np.max(img_color[np.nonzero(img)].mean(axis=0)) t = np.max(np.median(img_color[np.nonzero(img)],axis=0)) img_color[img_color<t] = t img_color = rescale(img_color).astype('uint8') if debug: cv2.imwrite("start.png",img_color) img = cv2.cvtColor( img_color.astype('uint8'), cv2.COLOR_BGR2GRAY ) saturated = saturate(img,q1=0.75) labels,nb_maxima = nd.label(saturated==saturated.max(), output='int64') if debug: cv2.imwrite("adaptive_labels.png",random_colors(labels)) thresholded = largest_connected_component( img, labels, nb_maxima ) center = nd.center_of_mass( img, thresholded ) # features from original image original_size = img_color.shape[0]*img_color.shape[1] original_shape = img_color.shape[:2] idx = np.nonzero(original_img.mean(axis=2)) nonzero = original_img[idx] mean = nonzero.mean(axis=0) std = nonzero.std(axis=0) mean_center = original_img[thresholded>0].mean(axis=0) features = np.array( [ mean_center[0],mean_center[1],mean_center[2], mean[0],mean[1],mean[2], std[0],std[1],std[2], kurtosis(nonzero[:,0]), kurtosis(nonzero[:,1]), kurtosis(nonzero[:,2]), skew(nonzero[:,0]), skew(nonzero[:,1]), skew(nonzero[:,2]), gini(nonzero[:,0],f), gini(nonzero[:,1],f), gini(nonzero[:,1],f) ], dtype='float' ) # features = np.empty( 0, dtype='float' ) img_color = recenter( img_color, (center[1],center[0]), interpolation=cv2.INTER_LINEAR ) img = cv2.cvtColor( img_color.astype('uint8'), cv2.COLOR_BGR2GRAY ) # offset from center center_offset = np.linalg.norm(np.array([center[0],center[1]],dtype='float') - np.array(original_shape,dtype='float')/2) # adaptive thresholding thresholded = cv2.adaptiveThreshold( img, maxValue=255, adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=301, C=0 ) # select largest connected component # which is closest to the center # use a weighting size/distance**2 labels, nb_labels = nd.label( thresholded, output='int64' ) if debug: cv2.imwrite("debug.png",random_colors(labels)) thresholded = largest_connected_component( img, labels, nb_labels ) if debug: cv2.imwrite( "debug_tresholded.png", thresholded ) # ellipse fitting # cv2.fitEllipse returns a tuple. # Tuples are immutable, they can't have their values changed. # we have the choice between the least-square minimization implemented in # OpenCV or a plain PCA. XY = np.transpose( np.nonzero(thresholded) )[:,::-1] #((cx,cy),(w,h),angle) = cv2.fitEllipse(XY) # eccentricity = math.sqrt(1-(np.min((w,h))/np.max((w,h)))**2) ((cx,cy),(w,h),angle) = fit_ellipse(XY,f=f) #eccentricity = math.sqrt(1-(h/w)**2) eccentricity = h/w if w == 0 or h == 0: print "bad ellipse:", ((cx,cy),(w,h),angle), f exit(1) if debug: print ((cx,cy),(w,h),angle) # rotate image img_color = rotate( img_color, (cx,cy), angle, interpolation=cv2.INTER_LINEAR ) thresholded = rotate( thresholded, (cx,cy), angle, interpolation=cv2.INTER_NEAREST ) cx = float(img.shape[1])/2 cy = float(img.shape[0])/2 if debug: cv2.imwrite( "rotated.png", img_color ) # crop img_color = img_color[max(0,int(cy-h/2)):min(img.shape[0],int(cy+h/2+1)), max(0,int(cx-w/2)):min(img.shape[1],int(cx+w/2+1))] thresholded = thresholded[max(0,int(cy-h/2)):min(img.shape[0],int(cy+h/2+1)), max(0,int(cx-w/2)):min(img.shape[1],int(cx+w/2+1))] if debug: cv2.imwrite("cropped_thresholded.png",thresholded) color_hist = get_color_histogram(img_color) if color_histogram: return color_hist if orientation_histogram: return get_orientation_histogram(img_color) img = cv2.cvtColor( img_color, cv2.COLOR_BGR2GRAY ).astype('float') img = rescale(img) saturated = saturate(img,q1=0.95) labels,nb_maxima = nd.label(saturated==saturated.max(), output='int64') if debug: cv2.imwrite("labels.png",random_colors(labels)) if img_color.shape[0] == 0 or img_color.shape[1] == 0: print "bad size", img_color.shape, f exit(1) img_thumbnail = cv2.resize( img.astype('uint8'), (64,64), interpolation=cv2.INTER_AREA ) if tiny_img: return img_thumbnail.flatten() if debug: cv2.imwrite( "tiny_img.png", img_thumbnail ) grad_color = nd.gaussian_gradient_magnitude(img_color,1.0) grad_img = rescale(img_color[:,:,0]+img_color[:,:,2]) if debug: cv2.imwrite( "channel.png", grad_img ) grad_thumbnail = cv2.resize( grad_img, (64,64), interpolation=cv2.INTER_AREA ) if debug: cv2.imwrite( "tiny_grad.png", grad_thumbnail ) if tiny_grad == True: # return np.append( [eccentricity*100], # grad_thumbnail.flatten() ) return grad_thumbnail.flatten() if debug: cv2.imwrite( "cropped.png", img_color ) # chirality # http://en.wikipedia.org/wiki/Chirality # An object is chiral if it is not identical to its mirror image. #mirror_spiral_img = labels[:,::-1] mirror_grad_img = grad_img[:,::-1] chirality = np.sum(np.sqrt( (grad_img - mirror_grad_img)**2 ))/ (grad_img.sum()) # compare size of the thresholded area to the size of the fitted ellipse # and to the size of the whole image size_to_ellipse = float(thresholded.sum()) / (math.pi * w * h / 4) box_to_image = float(img.shape[0]*img.shape[1]) / original_size if size_to_ellipse < 0.1: print "SIZE_TO_ELLIPSE debug", f if box_to_image > 0.5: print "BOX_TO_IMAGE debug", f # color features # central pixel and mean channel values idx = np.nonzero(thresholded) mean = img_color[idx].mean(axis=0) grey_mean = img[idx].mean() img_center = img[img.shape[0]/2-img.shape[0]/4:img.shape[0]/2+img.shape[0]/4, img.shape[1]/2-img.shape[1]/4:img.shape[1]/2+img.shape[1]/4] img_center_color = img_color[img.shape[0]/2-img.shape[0]/4:img.shape[0]/2+img.shape[0]/4, img.shape[1]/2-img.shape[1]/4:img.shape[1]/2+img.shape[1]/4] center_mean = img_center[np.nonzero(img_center)].mean() center_mean_color = img_center_color[np.nonzero(img_center)].mean(axis=0) color_features = [ img_color[img_color.shape[0]/2, img_color.shape[1]/2,0], img_color[img_color.shape[0]/2, img_color.shape[1]/2,1], img_color[img_color.shape[0]/2, img_color.shape[1]/2,2], mean[0],mean[1],mean[2], center_mean_color[0],center_mean_color[1],center_mean_color[2], float(img[img.shape[0]/2, img.shape[1]/2])/grey_mean, float(center_mean)/grey_mean] entropy = get_entropy(img.astype('uint8')) light_radius = get_light_radius(img) features = np.append( features, [ eccentricity, w,h, thresholded.sum(), entropy, chirality, size_to_ellipse, box_to_image, center_offset, light_radius[0], light_radius[1], nb_maxima, kurtosis(img_color[idx][:,0]), kurtosis(img_color[idx][:,1]), kurtosis(img_color[idx][:,2]), skew(img_color[idx][:,0]), skew(img_color[idx][:,1]), skew(img_color[idx][:,2]), gini(img_color[idx][:,0],f), gini(img_color[idx][:,1],f), gini(img_color[idx][:,2],f), kurtosis(img[idx]), skew(img[idx]), gini(img[idx],f) ] ) features = np.append( features, color_features ) # Hu moments from segmentation m = cv2.moments( thresholded.astype('uint8' ), binaryImage=True ) hu1 = cv2.HuMoments( m ) # Hu moments from taking pixel intensities into account m = cv2.moments( img, binaryImage=False ) hu2 = cv2.HuMoments( m ) m = cv2.moments( grad_img, binaryImage=False ) hu3 = cv2.HuMoments( m ) hu = np.append( hu1.flatten(), hu2.flatten() ) hu = np.append( hu.flatten(), hu3.flatten() ) features = np.append( features, hu.flatten() ) features = np.append( features, hu ) if image_statistics: return features # features = np.empty( 0, dtype='float' ) average_prediction = np.zeros( 37, dtype='float' ) # PCA features if not debug: image_statistics = features for Class in xrange(1,12): scaler = joblib.load(get_data_folder()+"/scaler_statistics_Class"+ str(Class)+"_") clf = joblib.load(get_data_folder()+"/svm_statistics_Class"+ str(Class)+"_") features = np.append( features, clf.predict_proba(scaler.transform(image_statistics))) average_prediction += features[-37:] grad_thumbnail = grad_thumbnail.flatten() for Class in xrange(1,12): pca = joblib.load(get_data_folder()+"/pca_Class"+ str(Class)+"_") thumbnail_pca = pca.transform(grad_thumbnail) clf = joblib.load(get_data_folder()+"/pca_SVM_Class" + str(Class)+"_") features = np.append( features, clf.predict_proba(thumbnail_pca).flatten() ) average_prediction += features[-37:] img_thumbnail = img_thumbnail.flatten() for Class in xrange(1,12): pca = joblib.load(get_data_folder()+"/pca_img_Class"+ str(Class)+"_") thumbnail_pca = pca.transform(img_thumbnail) clf = joblib.load(get_data_folder()+"/pca_img_SVM_Class" + str(Class)+"_") features = np.append( features, clf.predict_proba(thumbnail_pca).flatten() ) average_prediction += features[-37:] for Class in xrange(1,12): pca = joblib.load(get_data_folder()+"/pca_color_Class"+ str(Class)+"_") hist_pca = pca.transform(color_hist) clf = joblib.load(get_data_folder()+"/pca_color_SVM_Class" + str(Class)+"_") features = np.append( features, clf.predict_proba(hist_pca).flatten() ) average_prediction += features[-37:] average_prediction /= 4 features = np.append( features, average_prediction ) return features
def calc_vertical_profile( self, field, height_axis=1, points_thresh_fraction=0.5, start_time=None, end_time=None, ): ''' Calculate vertical profile statistics. Parameters ---------- field : str Name of the field to use in CFAD calculation. quantiles : list A list of percentage values for quantile calculations. height_axis : int The dimension to perform quantile calculation over (non-height). start_time : str UTC time to use as start time for subsetting in datetime format. (e.g. 2014-08-20 12:30:00) end_time : str UTC time to use as an end time for subsetting in datetime format. (e.g. 2014-08-20 16:30:00) ''' # Snag the data from requested field xarr = self._get_fields_variable_dict_data_time_subset( field, start_time, end_time) # Reshape the array so that height axis is first dimension if height_axis != 0: ht = np.rollaxis(self.heightfield['data'], height_axis) xarr = np.rollaxis(xarr, height_axis) else: ht = self.heightfield['data'].copy() # Create arrays to fill nh = xarr.shape[0] mean = np.ma.empty((nh)) median = np.ma.empty((nh)) std_dev = np.ma.empty((nh)) min = np.ma.empty((nh)) max = np.ma.empty((nh)) var = np.ma.empty((nh)) skew = np.ma.empty((nh)) for nn in range(nh): # Check data for good points condition = np.isfinite(xarr[nn, ...].ravel()) # Sort the good data from low to high values data = np.sort(xarr[nn, ...].ravel())[condition] # Calculate the fraction of points out of possible total ptsfrac = float(len(data)) / float(len(xarr[nn, ...].ravel())) if ptsfrac > points_thresh_fraction: mean[nn] = np.ma.mean(data) median[nn] = np.ma.median(data) std_dev[nn] = np.ma.std(data) min[nn] = np.ma.min(data) max[nn] = np.ma.max(data) var[nn] = np.ma.var(data) skew[nn] = mstats.skew(data) else: mean[nn] = np.nan median[nn] = np.nan std_dev[nn] = np.nan min[nn] = np.nan max[nn] = np.nan var[nn] = np.nan skew[nn] = np.nan vp_dict = { 'field': field, 'vp_mean': mean, 'vp_median': median, 'vp_std_dev': std_dev, 'vp_min': min, 'vp_max': max, 'vp_variance': var, 'vp_skew': skew, 'yaxis': self.height['data'][:] } return vp_dict
def get_features(f, dsift_learn=False, debug=False, tiny_img=False, tiny_grad=False, image_statistics=False, color_histogram=False, orientation_histogram=False, transform=0): img_color = cv2.imread(f).astype('float') if transform == 1: img_color = img_color[::-1, :].copy() if transform == 2: img_color = img_color[:, ::-1].copy() original_img = img_color.copy() # denoise img_color = cv2.GaussianBlur(img_color, (0, 0), 2.0) img = cv2.cvtColor(img_color.astype('uint8'), cv2.COLOR_BGR2GRAY).astype('float') # TEDYouth 2013 - Filmed November 2013 - 6:43 # Henry Lin: What we can learn from galaxies far, far away # "I subtract away all of the starlight" #t = img[np.nonzero(img)].mean() #t = img.mean() #t = np.max(img_color[np.nonzero(img)].mean(axis=0)) t = np.max(np.median(img_color[np.nonzero(img)], axis=0)) img_color[img_color < t] = t img_color = rescale(img_color).astype('uint8') if debug: cv2.imwrite("start.png", img_color) img = cv2.cvtColor(img_color.astype('uint8'), cv2.COLOR_BGR2GRAY) saturated = saturate(img, q1=0.75) labels, nb_maxima = nd.label(saturated == saturated.max(), output='int64') if debug: cv2.imwrite("adaptive_labels.png", random_colors(labels)) thresholded = largest_connected_component(img, labels, nb_maxima) center = nd.center_of_mass(img, thresholded) # features from original image original_size = img_color.shape[0] * img_color.shape[1] original_shape = img_color.shape[:2] idx = np.nonzero(original_img.mean(axis=2)) nonzero = original_img[idx] mean = nonzero.mean(axis=0) std = nonzero.std(axis=0) mean_center = original_img[thresholded > 0].mean(axis=0) features = np.array([ mean_center[0], mean_center[1], mean_center[2], mean[0], mean[1], mean[2], std[0], std[1], std[2], kurtosis(nonzero[:, 0]), kurtosis(nonzero[:, 1]), kurtosis(nonzero[:, 2]), skew(nonzero[:, 0]), skew(nonzero[:, 1]), skew(nonzero[:, 2]), gini(nonzero[:, 0], f), gini(nonzero[:, 1], f), gini(nonzero[:, 1], f) ], dtype='float') # features = np.empty( 0, dtype='float' ) img_color = recenter(img_color, (center[1], center[0]), interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img_color.astype('uint8'), cv2.COLOR_BGR2GRAY) # offset from center center_offset = np.linalg.norm( np.array([center[0], center[1]], dtype='float') - np.array(original_shape, dtype='float') / 2) # adaptive thresholding thresholded = cv2.adaptiveThreshold( img, maxValue=255, adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=301, C=0) # select largest connected component # which is closest to the center # use a weighting size/distance**2 labels, nb_labels = nd.label(thresholded, output='int64') if debug: cv2.imwrite("debug.png", random_colors(labels)) thresholded = largest_connected_component(img, labels, nb_labels) if debug: cv2.imwrite("debug_tresholded.png", thresholded) # ellipse fitting # cv2.fitEllipse returns a tuple. # Tuples are immutable, they can't have their values changed. # we have the choice between the least-square minimization implemented in # OpenCV or a plain PCA. XY = np.transpose(np.nonzero(thresholded))[:, ::-1] #((cx,cy),(w,h),angle) = cv2.fitEllipse(XY) # eccentricity = math.sqrt(1-(np.min((w,h))/np.max((w,h)))**2) ((cx, cy), (w, h), angle) = fit_ellipse(XY, f=f) #eccentricity = math.sqrt(1-(h/w)**2) eccentricity = h / w if w == 0 or h == 0: print "bad ellipse:", ((cx, cy), (w, h), angle), f exit(1) if debug: print((cx, cy), (w, h), angle) # rotate image img_color = rotate(img_color, (cx, cy), angle, interpolation=cv2.INTER_LINEAR) thresholded = rotate(thresholded, (cx, cy), angle, interpolation=cv2.INTER_NEAREST) cx = float(img.shape[1]) / 2 cy = float(img.shape[0]) / 2 if debug: cv2.imwrite("rotated.png", img_color) # crop img_color = img_color[ max(0, int(cy - h / 2)):min(img.shape[0], int(cy + h / 2 + 1)), max(0, int(cx - w / 2)):min(img.shape[1], int(cx + w / 2 + 1))] thresholded = thresholded[ max(0, int(cy - h / 2)):min(img.shape[0], int(cy + h / 2 + 1)), max(0, int(cx - w / 2)):min(img.shape[1], int(cx + w / 2 + 1))] if debug: cv2.imwrite("cropped_thresholded.png", thresholded) color_hist = get_color_histogram(img_color) if color_histogram: return color_hist if orientation_histogram: return get_orientation_histogram(img_color) img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY).astype('float') img = rescale(img) saturated = saturate(img, q1=0.95) labels, nb_maxima = nd.label(saturated == saturated.max(), output='int64') if debug: cv2.imwrite("labels.png", random_colors(labels)) if img_color.shape[0] == 0 or img_color.shape[1] == 0: print "bad size", img_color.shape, f exit(1) img_thumbnail = cv2.resize(img.astype('uint8'), (64, 64), interpolation=cv2.INTER_AREA) if tiny_img: return img_thumbnail.flatten() if debug: cv2.imwrite("tiny_img.png", img_thumbnail) grad_color = nd.gaussian_gradient_magnitude(img_color, 1.0) grad_img = rescale(img_color[:, :, 0] + img_color[:, :, 2]) if debug: cv2.imwrite("channel.png", grad_img) grad_thumbnail = cv2.resize(grad_img, (64, 64), interpolation=cv2.INTER_AREA) if debug: cv2.imwrite("tiny_grad.png", grad_thumbnail) if tiny_grad == True: # return np.append( [eccentricity*100], # grad_thumbnail.flatten() ) return grad_thumbnail.flatten() if debug: cv2.imwrite("cropped.png", img_color) # chirality # http://en.wikipedia.org/wiki/Chirality # An object is chiral if it is not identical to its mirror image. #mirror_spiral_img = labels[:,::-1] mirror_grad_img = grad_img[:, ::-1] chirality = np.sum(np.sqrt( (grad_img - mirror_grad_img)**2)) / (grad_img.sum()) # compare size of the thresholded area to the size of the fitted ellipse # and to the size of the whole image size_to_ellipse = float(thresholded.sum()) / (math.pi * w * h / 4) box_to_image = float(img.shape[0] * img.shape[1]) / original_size if size_to_ellipse < 0.1: print "SIZE_TO_ELLIPSE debug", f if box_to_image > 0.5: print "BOX_TO_IMAGE debug", f # color features # central pixel and mean channel values idx = np.nonzero(thresholded) mean = img_color[idx].mean(axis=0) grey_mean = img[idx].mean() img_center = img[img.shape[0] / 2 - img.shape[0] / 4:img.shape[0] / 2 + img.shape[0] / 4, img.shape[1] / 2 - img.shape[1] / 4:img.shape[1] / 2 + img.shape[1] / 4] img_center_color = img_color[img.shape[0] / 2 - img.shape[0] / 4:img.shape[0] / 2 + img.shape[0] / 4, img.shape[1] / 2 - img.shape[1] / 4:img.shape[1] / 2 + img.shape[1] / 4] center_mean = img_center[np.nonzero(img_center)].mean() center_mean_color = img_center_color[np.nonzero(img_center)].mean(axis=0) color_features = [ img_color[img_color.shape[0] / 2, img_color.shape[1] / 2, 0], img_color[img_color.shape[0] / 2, img_color.shape[1] / 2, 1], img_color[img_color.shape[0] / 2, img_color.shape[1] / 2, 2], mean[0], mean[1], mean[2], center_mean_color[0], center_mean_color[1], center_mean_color[2], float(img[img.shape[0] / 2, img.shape[1] / 2]) / grey_mean, float(center_mean) / grey_mean ] entropy = get_entropy(img.astype('uint8')) light_radius = get_light_radius(img) features = np.append(features, [ eccentricity, w, h, thresholded.sum(), entropy, chirality, size_to_ellipse, box_to_image, center_offset, light_radius[0], light_radius[1], nb_maxima, kurtosis(img_color[idx][:, 0]), kurtosis(img_color[idx][:, 1]), kurtosis(img_color[idx][:, 2]), skew(img_color[idx][:, 0]), skew(img_color[idx][:, 1]), skew(img_color[idx][:, 2]), gini(img_color[idx][:, 0], f), gini(img_color[idx][:, 1], f), gini(img_color[idx][:, 2], f), kurtosis(img[idx]), skew(img[idx]), gini(img[idx], f) ]) features = np.append(features, color_features) # Hu moments from segmentation m = cv2.moments(thresholded.astype('uint8'), binaryImage=True) hu1 = cv2.HuMoments(m) # Hu moments from taking pixel intensities into account m = cv2.moments(img, binaryImage=False) hu2 = cv2.HuMoments(m) m = cv2.moments(grad_img, binaryImage=False) hu3 = cv2.HuMoments(m) hu = np.append(hu1.flatten(), hu2.flatten()) hu = np.append(hu.flatten(), hu3.flatten()) features = np.append(features, hu.flatten()) features = np.append(features, hu) if image_statistics: return features # features = np.empty( 0, dtype='float' ) average_prediction = np.zeros(37, dtype='float') # PCA features if not debug: image_statistics = features for Class in xrange(1, 12): scaler = joblib.load(get_data_folder() + "/scaler_statistics_Class" + str(Class) + "_") clf = joblib.load(get_data_folder() + "/svm_statistics_Class" + str(Class) + "_") features = np.append( features, clf.predict_proba(scaler.transform(image_statistics))) average_prediction += features[-37:] grad_thumbnail = grad_thumbnail.flatten() for Class in xrange(1, 12): pca = joblib.load(get_data_folder() + "/pca_Class" + str(Class) + "_") thumbnail_pca = pca.transform(grad_thumbnail) clf = joblib.load(get_data_folder() + "/pca_SVM_Class" + str(Class) + "_") features = np.append(features, clf.predict_proba(thumbnail_pca).flatten()) average_prediction += features[-37:] img_thumbnail = img_thumbnail.flatten() for Class in xrange(1, 12): pca = joblib.load(get_data_folder() + "/pca_img_Class" + str(Class) + "_") thumbnail_pca = pca.transform(img_thumbnail) clf = joblib.load(get_data_folder() + "/pca_img_SVM_Class" + str(Class) + "_") features = np.append(features, clf.predict_proba(thumbnail_pca).flatten()) average_prediction += features[-37:] for Class in xrange(1, 12): pca = joblib.load(get_data_folder() + "/pca_color_Class" + str(Class) + "_") hist_pca = pca.transform(color_hist) clf = joblib.load(get_data_folder() + "/pca_color_SVM_Class" + str(Class) + "_") features = np.append(features, clf.predict_proba(hist_pca).flatten()) average_prediction += features[-37:] average_prediction /= 4 features = np.append(features, average_prediction) return features
def calc_vertical_profile(self, field, height_axis=1, points_thresh_fraction=0.5, start_time=None, end_time=None,): ''' Calculate vertical profile statistics. Parameters ---------- field : str Name of the field to use in CFAD calculation. quantiles : list A list of percentage values for quantile calculations. height_axis : int The dimension to perform quantile calculation over (non-height). start_time : str UTC time to use as start time for subsetting in datetime format. (e.g. 2014-08-20 12:30:00) end_time : str UTC time to use as an end time for subsetting in datetime format. (e.g. 2014-08-20 16:30:00) ''' # Snag the data from requested field xarr = self._get_fields_variable_dict_data_time_subset( field, start_time, end_time) # Reshape the array so that height axis is first dimension if height_axis != 0: ht = np.rollaxis(self.heightfield['data'], height_axis) xarr = np.rollaxis(xarr, height_axis) else: ht = self.heightfield['data'].copy() # Create arrays to fill nh = xarr.shape[0] mean = np.ma.empty((nh)) median = np.ma.empty((nh)) std_dev = np.ma.empty((nh)) min = np.ma.empty((nh)) max = np.ma.empty((nh)) var = np.ma.empty((nh)) skew = np.ma.empty((nh)) for nn in range(nh): # Check data for good points condition = np.isfinite(xarr[nn, ...].ravel()) # Sort the good data from low to high values data = np.sort(xarr[nn, ...].ravel())[condition] # Calculate the fraction of points out of possible total ptsfrac = float(len(data))/float(len(xarr[nn, ...].ravel())) if ptsfrac > points_thresh_fraction: mean[nn] = np.ma.mean(data) median[nn] = np.ma.median(data) std_dev[nn] = np.ma.std(data) min[nn] = np.ma.min(data) max[nn] = np.ma.max(data) var[nn] = np.ma.var(data) skew[nn] = mstats.skew(data) else: mean[nn] = np.nan median[nn] = np.nan std_dev[nn] = np.nan min[nn] = np.nan max[nn] = np.nan var[nn] = np.nan skew[nn] = np.nan vp_dict = {'field': field, 'vp_mean': mean, 'vp_median': median, 'vp_std_dev': std_dev, 'vp_min': min, 'vp_max': max, 'vp_variance': var, 'vp_skew': skew, 'yaxis': self.height['data'][:]} return vp_dict
def skewness_(self): return mstats.skew(self.masked_img, 0)