def entropy_(block): """ Function to calculate the variance of the block """ a = entropy(block[:, :, 0]) b = entropy(block[:, :, 1]) c = entropy(block[:, :, 2]) return [a, b, c]
def test_v_measure_and_mutual_information(seed=36): # Check relation between v_measure, entropy and mutual information for i in np.logspace(1, 4, 4).astype(np.int): random_state = np.random.RandomState(seed) labels_a, labels_b = random_state.random_integers(0, 10, i),\ random_state.random_integers(0, 10, i) assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0)
def test_v_measure_and_mutual_information(seed=36): # Check relation between v_measure, entropy and mutual information for i in np.logspace(1, 4, 4).astype(np.int): random_state = np.random.RandomState(seed) labels_a, labels_b = (random_state.randint(0, 10, i), random_state.randint(0, 10, i)) assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0)
def test_v_measure_and_mutual_information(seed=36): """Check relation between v_measure, entropy and mutual information""" for i in np.logspace(1, 4, 4): random_state = np.random.RandomState(seed) labels_a, labels_b = random_state.random_integers(0, 10, i),\ random_state.random_integers(0, 10, i) assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0)
def test_entropy(): h = entropy([0, 0, 42.], log_base='e') assert_almost_equal(h, 0.6365141, 5) h = entropy([0, 0, 42.], log_base=2) assert_almost_equal(h, 0.9182958, 5) h = entropy([], log_base='e') assert_almost_equal(h, 1) h = entropy([], log_base=2) assert_almost_equal(h, 1)
def test_v_measure_and_mutual_information(seed=36): # Check relation between v_measure, entropy and mutual information for i in np.logspace(1, 4, 4).astype(np.int): random_state = np.random.RandomState(seed) labels_a, labels_b = (random_state.randint(0, 10, i), random_state.randint(0, 10, i)) v_m = v_measure_score(labels_a, labels_b) mi = mutual_info_score(labels_a, labels_b, log_base='e') h_a = entropy(labels_a, log_base='e') h_b = entropy(labels_b, log_base='e') assert_almost_equal(v_m, 2.0 * mi / (h_a + h_b), 0)
def _evaluate_VI(index_pair, top_partitions): """Worker for VI evaluations.""" MI = mutual_info_score( top_partitions[index_pair[0]], top_partitions[index_pair[1]], ) Ex = entropy(top_partitions[index_pair[0]]) Ey = entropy(top_partitions[index_pair[1]]) JE = Ex + Ey - MI if abs(JE) < 1e-8: return 0.0 return (JE - MI) / JE
def getEntropy( KF ): entropy_sum = 0 for i in range(1, TOTAL_KEY_FRAMES - 1): while True: try: im1 = cv2.imread(READ_LOCATION + str(KF[i]) + ".jpg",0) im2 = cv2.imread(READ_LOCATION + str(KF[i+1]) + ".jpg",0) entropy_sum += abs(entropy(im1) - entropy(im2)) except: print i, KF, KF[i], KF[i+1] continue break return entropy_sum/(TOTAL_KEY_FRAMES - 1)
def nmi_score(y_true, y_pred): """NMI https://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html this function is not the same with sklearn.metrics.normalized_mutual_info_score in that this function uses [H(y_true)+H(y_pred)/2] while that of sklearn uses sqrt(H(y_true)H(y_pred)) """ labels = align_labels(y_true) mi = mutual_info_score(y_true, y_pred) h1 = entropy(labels) h2 = entropy(y_pred) return 2 * mi / (h1 + h2)
def normalized_information_distance(c1, c2): """ Calculate Normalized Information Distance Taken from Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance, JMLR <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf> """ denom = max(entropy(c1), entropy(c2)) # The clusterings are identical (so return 1.0) if both have zero entropy. return (1.0 - (mutual_info_score(c1, c2) / denom)) if denom else 1.0
def test_v_measure_and_mutual_information(seed=36): # Check relation between v_measure, entropy and mutual information for i in np.logspace(1, 4, 4).astype(int): random_state = np.random.RandomState(seed) labels_a, labels_b = (random_state.randint(0, 10, i), random_state.randint(0, 10, i)) assert_almost_equal(v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) / (entropy(labels_a) + entropy(labels_b)), 0) avg = 'arithmetic' assert_almost_equal(v_measure_score(labels_a, labels_b), normalized_mutual_info_score(labels_a, labels_b, average_method=avg) )
def sklearn_measures(U, V): # http://scikit-learn.org/stable/modules/classes.html#clustering-metrics import sklearn.metrics.cluster as sym U_labels = np.nonzero(U)[1] V_labels = np.nonzero(V)[1] print U_labels, V_labels # V2_labels = np.nonzero(V2)[1] print 'entro(U)=',sym.entropy(U_labels),'entro(V)=',sym.entropy(V_labels), 'entro(U,V)=',sym.mutual_info_score(U_labels, V_labels) res = [ ['ari', 'nmi', 'ami', 'vm' ], \ [ sym.adjusted_rand_score(U_labels, V_labels),\ sym.normalized_mutual_info_score(U_labels, V_labels),\ sym.adjusted_mutual_info_score(U_labels, V_labels),\ sym.v_measure_score(U_labels, V_labels)]] print res return res
def entropia_por_bloco(list_blocos_img): # String de Retorno return_lista_entropia_img = [] for i in range(len(list_blocos_img)): return_lista_entropia_img.append(entropy(list_blocos_img[i])) return return_lista_entropia_img
def extr_beauty_ftrs(imgFlNm): img = os.path.basename(imgFlNm) # print("Extracting beauty features for %s" % img) try: rgbImg = resize_img(io.imread(imgFlNm)) except Exception as e: print("Invalid image!", e, 'image', imgFlNm) return e rgbImg = resize_img(io.imread(imgFlNm)) if len(rgbImg.shape) != 3 or rgbImg.shape[2] != 3: print("Invalid image.. Continuing..") final_ftr_obj_global[img] = None return None hsvImg = color.rgb2hsv(rgbImg) grayImg = color.rgb2gray(rgbImg) red, green, blue = get_arr(rgbImg) hue, saturation, value = get_arr(hsvImg) contrast = calc_contrast(red, green, blue) ftrs = calc_color_ftrs(hue, saturation, value) ftrs['contrast'] = contrast ftrs['entropy'] = entropy( grayImg ) # added to include entropy of the given image: more details: http://stackoverflow.com/a/42059758/5759063 ftrs.update(get_spat_arrng_ftrs(grayImg)) final_ftr_obj_global[img] = ftrs return final_ftr_obj_global
def sklearn_measures(U, V): # http://scikit-learn.org/stable/modules/classes.html#clustering-metrics import sklearn.metrics.cluster as sym U_labels = np.nonzero(U)[1] V_labels = np.nonzero(V)[1] print U_labels, V_labels # V2_labels = np.nonzero(V2)[1] print 'entro(U)=', sym.entropy(U_labels), 'entro(V)=', sym.entropy( V_labels), 'entro(U,V)=', sym.mutual_info_score(U_labels, V_labels) res = [ ['ari', 'nmi', 'ami', 'vm' ], \ [ sym.adjusted_rand_score(U_labels, V_labels),\ sym.normalized_mutual_info_score(U_labels, V_labels),\ sym.adjusted_mutual_info_score(U_labels, V_labels),\ sym.v_measure_score(U_labels, V_labels)]] print res return res
def extr_beauty_ftrs(imgFlNm): img = os.path.basename(imgFlNm) print("Extracting beauty features for %s" %imgFlNm) try: rgbImg = resize_img(io.imread(imgFlNm)) except Exception as e: print("Invalid image") return e if len(rgbImg.shape) != 3 or rgbImg.shape[2] !=3: print("Invalid image.. Continuing..") final_ftr_obj_global[img] = None return None hsvImg = color.rgb2hsv(rgbImg) grayImg = color.rgb2gray(rgbImg) red, green, blue = get_arr(rgbImg) hue, saturation, value = get_arr(hsvImg) contrast = calc_contrast(red, green, blue) ftrs = calc_color_ftrs(hue, saturation, value) ftrs['contrast'] = contrast ftrs['entropy'] = entropy(grayImg) # added to include entropy of the given image: more details: http://stackoverflow.com/a/42059758/5759063 ftrs.update(get_spat_arrng_ftrs(grayImg)) final_ftr_obj_global[img] = ftrs return final_ftr_obj_global
def get_texture(output): # texture Feature grayImg = img_as_ubyte(color.rgb2gray(output)) #"distances" is a list of distances (in pixels) between the pixels being compared distances = [1, 2, 3] #"angles" is a list of angles (in radians) between pixels being compared angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] # properties = ['energy', 'homogeneity'] glcm = greycomatrix(grayImg, distances=distances, angles=angles, symmetric=True, normed=True) # feature1 for texture contrast = greycoprops(glcm, prop='contrast') # feature2 for texture energy = greycoprops(glcm, prop='energy') # feature3 for texture correlation = greycoprops(glcm, prop='correlation') # entropy=shannon_entropy(glcm) # feature4 for texture ent = entropy(glcm) # entropy = shannon_entropy(glcm, base=np.e) return contrast, energy, correlation, ent
def get_features(nodules): features = [] i = 0 for nodule in nodules: nodule_feature = Features(nodule) nodule_feature.max_coord = np.max(nodule.pixels) nodule_feature.conclusion = nodule.conclusion with warnings.catch_warnings(): warnings.simplefilter("ignore") grey_comatrix = skimg.greycomatrix(nodule.pixels, [1], [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], nodule_feature.max_coord + 1) nodule_feature.features['contrast'] = skimg.greycoprops(grey_comatrix, 'contrast').flatten().astype(float) nodule_feature.features['dissimilarity'] = skimg.greycoprops(grey_comatrix, 'dissimilarity').flatten().astype( float) nodule_feature.features['homogeneity'] = skimg.greycoprops(grey_comatrix, 'homogeneity').flatten() nodule_feature.features['energy'] = skimg.greycoprops(grey_comatrix, 'energy').flatten() nodule_feature.features['correlation'] = skimg.greycoprops(grey_comatrix, 'correlation').flatten() nodule_feature.features['ASM'] = skimg.greycoprops(grey_comatrix, 'ASM').flatten().astype(float) nodule_feature.features['entropy'] = entropy(nodule.pixels) features.append(nodule_feature) i += 1 print 'progress =', i, '/', len(nodules), nodule.source_id return np.array(features)
def detect_roughness(patch): blur = cv2.GaussianBlur(patch, (5, 5), 5) patch = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY) if entropy(patch) > 4: return True else: return False
def extract_entropy_full(path_img): ### FEATURE 1 ### # Extraindo a entropia da imagem inteira # Image.open(argv_img).convert('LA') converte em grayscale rgbImg = io.imread(path_img) grayImg = img_as_ubyte(color.rgb2gray(rgbImg)) return_entropia = entropy(grayImg) return return_entropia
def extract_feature(arr): radius = 1 n_points = radius * 8 arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY) distances = [1, 5] angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] glcm = greycomatrix(arr, distances=distances, angles=angles, levels=256, symmetric=False, normed=False) # properties = ['dissimilarity', 'homogeneity', 'contrast', 'ASM', 'energy', 'correlation'] # glcm_feats = np.hstack([greycoprops(glcm, prop=prop).ravel() for prop in properties]) glcm_feats = np.hstack( [adadoc.greycoprops(glcm[:, :, i, :]) for i in range(0, 2)]).ravel() hog_feats = hog(arr, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(1, 1), block_norm='L2-Hys', feature_vector=True) ent = entropy(arr) # # prepare filter bank kernels # kernels = [] # for theta in range(4): # theta = theta / 4. * np.pi # for sigma in (1, 3): # for frequency in (0.05, 0.25): # kernel = np.real(gabor_kernel(frequency, theta=theta, # sigma_x=sigma, sigma_y=sigma)) # kernels.append(kernel) # gabor_feat = compute_feats(arr, kernels).ravel() thresh_sauvola = threshold_sauvola(arr, window_size=31, k=0.2) arr = arr > thresh_sauvola arr = (255 - arr * 255).astype('uint8') # arr = adadoc.adath(arr, method=adadoc.ADATH_SAUVOLA | adadoc.ADATH_INVTHRESH, # xblock=21, yblock=21, k=0.2, dR=64, C=0) lbp_code = local_binary_pattern(arr, n_points, radius, 'uniform') # n_bins = int(lbp_code.max() + 1) n_bins = 16 lbp_feats, _ = np.histogram(lbp_code, normed=True, bins=n_bins, range=(0, n_bins)) data_feat = np.hstack([lbp_feats, ent, glcm_feats, hog_feats]) return data_feat
def _print_clusteringMetrics(_kMean, _X): metrics = [['Clustering K-Means', 'Datos obtenidos'], ['Inercia', _kMean.inertia_], ['Entropy', entropy(_kMean.labels_)], ['Silhouette Score', silhouette_score(_X, _kMean.labels_, random_state = 0)], ['Calinski-Harabaz Score', calinski_harabaz_score(_X, _kMean.labels_)], ] print('\nMinería de Datos - Clustering K-Means - <VORT>', '\n') print(_kMean, '\n') print(look(metrics))
def extract_feature(arr): radius = 1 n_points = radius * 8 # arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY) distances = [1, 5] angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] glcm = greycomatrix(arr.copy(), distances=distances, angles=angles, levels=256, symmetric=False, normed=False) # properties = ['dissimilarity', 'homogeneity', 'contrast', 'ASM', 'energy', 'correlation'] # glcm_feats = np.hstack([greycoprops(glcm, prop=prop).ravel() for prop in properties]) glcm_feats = np.hstack( [adadoc.greycoprops(glcm[:, :, i, :]) for i in range(0, 2)]).ravel() hog_feats = hog(arr, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(1, 1), block_norm='L2-Hys', feature_vector=True) ent = entropy(arr) arr = adadoc.adath(arr, method=adadoc.ADATH_SAUVOLA | adadoc.ADATH_INVTHRESH, xblock=21, yblock=21, k=0.2, dR=64, C=0) # thresh_sauvola = threshold_sauvola(arr, window_size=31, k=0.2) # arr = arr > thresh_sauvola # arr = (255 - arr * 255).astype('uint8') lbp_code = local_binary_pattern(arr, n_points, radius, 'uniform') # n_bins = int(lbp_code.max() + 1) n_bins = 16 lbp_feats, _ = np.histogram(lbp_code, normed=True, bins=n_bins, range=(0, n_bins)) data_feat = np.hstack([lbp_feats, ent, glcm_feats, hog_feats]) return data_feat
def normalized_max_mutual_info_score(labels_true, labels_pred): """ A variant version of NMI that is given as: NMI_max = MI(U, V) / max{ H(U), H(V) } based on 'adjusted mutual info score' in sklearn Parameters ---------- :param labels_true: labels of clustering 1 (as a 1-dimensional ndarray) :param labels_pred: labels of clustering 2 (as a 1-dimensional ndarray) :return: diversity between these two clusterings as a float value Returns ------- :return: NMI-max between these two clusterings as a float value """ labels_true, labels_pred = check_clusterings(labels_true, labels_pred) n_samples = labels_true.shape[0] classes = np.unique(labels_true) clusters = np.unique(labels_pred) # Special limit cases: no clustering since the data is not split. # This is a perfect match hence return 1.0. if (classes.shape[0] == clusters.shape[0] == 1 or classes.shape[0] == clusters.shape[0] == 0): return 1.0 contingency = contingency_matrix(labels_true, labels_pred) contingency = np.array(contingency, dtype='float') # Calculate the MI for the two clusterings mi = metrics.mutual_info_score(labels_true, labels_pred, contingency=contingency) # Calculate the expected value for the mutual information # Calculate entropy for each labeling h_true, h_pred = entropy(labels_true), entropy(labels_pred) nmi_max = mi / max(h_true, h_pred) return nmi_max
def ExtractFeature(fruit_images): list_of_vectors = [] for img in fruit_images: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) row, col = gray_img.shape canvas = np.zeros((row, col, 1), np.uint8) for i in range(row): for j in range(col): if gray_img[i][j] < 220: canvas.itemset((i, j, 0), 255) else: canvas.itemset((i, j, 0), 0) kernel = np.ones((3, 3), np.uint8) canvas = cv2.morphologyEx(canvas, cv2.MORPH_CLOSE, kernel) for i in range(row): for j in range(col): b, g, r = img[i][j] if canvas[i][j] == 255: img.itemset((i, j, 0), b) img.itemset((i, j, 1), g) img.itemset((i, j, 2), r) else: img.itemset((i, j, 0), 0) img.itemset((i, j, 1), 0) img.itemset((i, j, 2), 0) hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) rgb_means, rgb_std = cv2.meanStdDev(img) hsv_means, hsv_std = cv2.meanStdDev(hsv_img) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) coeff = pywt.dwt2(gray_img, "haar") # ---- Dekomposisi lv 1 LL, (LH, HL, HH) = coeff Energy = (LH**2 + HL**2 + HH**2).sum() / img.size Entropy = entropy(gray_img) b, g, r = img[row / 2 - 1, col / 2 - 1] # center_code = tools.CentreClass(b,g,r) list_of_vectors.append([ rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2], rgb_std[1], rgb_std[0], hsv_means[2], hsv_means[1], hsv_means[0], hsv_std[2], hsv_std[1], hsv_std[0], Energy, Entropy ]) list_of_vectors = np.array(list_of_vectors) return (list_of_vectors)
def glcm_f(segment_region): glcm = greycomatrix(segment_region, [5], [0], 256) stats = [ "dissimilarity", "correlation", "contrast", "homogeneity", "ASM", "energy" ] dissimilarity = greycoprops(glcm, stats[0])[0, 0] correlation = greycoprops(glcm, stats[1])[0, 0] contrast = greycoprops(glcm, stats[2])[0, 0] homogeneity = greycoprops(glcm, stats[3])[0, 0] ASM = greycoprops(glcm, stats[4])[0, 0] energy = greycoprops(glcm, stats[5])[0, 0] entropy_f = entropy(segment_region) temp_features = [ dissimilarity, correlation, contrast, homogeneity, ASM, energy, entropy_f ] return temp_features
print("Entropy") import numpy as np from skimage import io, color, img_as_ubyte from skimage.feature import greycomatrix, greycoprops from sklearn.metrics.cluster import entropy rgbImg = io.imread('D:/PAPERS/With_Pratap_2019/figs/airplane_cipher.png') grayImg = img_as_ubyte(color.rgb2gray(rgbImg)) #grayImg = Image.open('Documents/figs/barbara.jpg') print(entropy(grayImg))
def test_entropy(): ent = entropy([0, 0, 42.]) assert_almost_equal(ent, 0.6365141, 5)
def test_entropy(): ent = entropy([0, 0, 42.]) assert_almost_equal(ent, 0.6365141, 5) assert_almost_equal(entropy([]), 1)
Vprocc2 = shatt.in_range(Vprocc2) Vprocf2 = shatt.in_range(Vprocf2) Vprocc3 = shatt.in_range(Vprocc3) Vprocf3 = shatt.in_range(Vprocf3) #save_without_retrieve_color(hsv, Vprocc2, Vprocf2, Vprocc3, Vprocf3) # Retrieve true color to skin muorig = V.mean() Vnewc2 = shatt.retrieve_color(Vprocc2, muorig) Vnewf2 = shatt.retrieve_color(Vprocf2, muorig) Vnewc3 = shatt.retrieve_color(Vprocc3, muorig) Vnewf3 = shatt.retrieve_color(Vprocf3, muorig) # Convert Value into the range 0-1 Vnewc2 = shatt.in_range(Vnewc2) Vnewf2 = shatt.in_range(Vnewf2) Vnewc3 = shatt.in_range(Vnewc3) Vnewf3 = shatt.in_range(Vnewf3) save_with_retrieve_color(hsv, Vnewc2, Vnewf2, Vnewc3, Vnewf3) # Select the image which have least entropy Vlist = [V, Vnewc2, Vnewf2, Vnewc3, Vnewf3] values = [img_as_ubyte(v) for v in Vlist] entropy_vals = [entropy(v) for v in values] print('entropy: ' + str(entropy_vals)) print('index: ' + str(entropy_vals.index(min(entropy_vals))))
img.itemset((i, j, 1), g) img.itemset((i, j, 2), r) else: img.itemset((i, j, 0), 0) img.itemset((i, j, 1), 0) img.itemset((i, j, 2), 0) hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) rgb_means, rgb_std = cv2.meanStdDev(img) hsv_means, hsv_std = cv2.meanStdDev(hsv_img) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) coeff = pywt.dwt2(gray_img, "haar") # ---- Dekomposisi lv 1 LL, (LH, HL, HH) = coeff Energy = (LH**2 + HL**2 + HH**2).sum() / img.size Entropy = entropy(gray_img) b, g, r = img[row / 2 - 1, col / 2 - 1] center_code = tools.CentreClass(b, g, r) list_of_vectors.append([ rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2], rgb_std[1], rgb_std[0], hsv_means[2], hsv_means[1], hsv_means[0], hsv_std[2], hsv_std[1], hsv_std[0], Energy, Entropy ]) list_of_vectors = np.array(list_of_vectors) # X_train, X_test, y_train, y_test = train_test_split(list_of_vectors, label_ids, test_size=0.30, random_state=14) X_tr = list_of_vectors y_tr = label_ids
input = cv2.imread(path + i, cv2.IMREAD_GRAYSCALE) glcm = skimage.feature.greycomatrix( input, [1], [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4], 256, symmetric=True, normed=True) # 对比度,差异性,同质性,能量,相关,二阶矩 # for prop in {'contrast', 'dissimilarity','homogeneity', 'energy', 'correlation', 'ASM'}: contrast = skimage.feature.greycoprops(glcm, "contrast") dissimilarity = skimage.feature.greycoprops(glcm, "dissimilarity") homogeneity = skimage.feature.greycoprops(glcm, "homogeneity") energy = skimage.feature.greycoprops(glcm, "energy") correlation = skimage.feature.greycoprops(glcm, "correlation") ASM = skimage.feature.greycoprops(glcm, "ASM") entropyImg = entropy(input) #得到熵 sheet1.write(flag + 1, 0, i) sheet1.write(flag + 1, 1, contrast[0][0]) sheet1.write(flag + 1, 2, dissimilarity[0][0]) sheet1.write(flag + 1, 3, homogeneity[0][0]) sheet1.write(flag + 1, 4, energy[0][0]) sheet1.write(flag + 1, 5, correlation[0][0]) sheet1.write(flag + 1, 6, ASM[0][0]) sheet1.write(flag + 1, 7, entropyImg) print(contrast[0][0], dissimilarity[0][0], homogeneity[0][0], energy[0][0], correlation[0][0], ASM[0][0], entropyImg) print("----------分界线-----------") flag += 1 wb.save("data/glcm.xls")
def shading_attenuation_method(image, extract, margin): """ Apply the shading attenuation method to an image Parameters ---------- image: 3D array The image source extract: scalar Number of pixel to extract, extract x extract margin: scalar Margin from the borders """ hsv = rgb2hsv(image) V = np.copy(hsv[:, :, 2]) shape = image.shape[0:2] """ Sampling pixels --------------- """ Yc, Xc = shatt.sampling_from_corners(margin=margin, extract=extract, shape=shape) Yf, Xf = shatt.sampling_from_frames(margin=margin, extract=extract, shape=shape) Zc = np.zeros((Xc.shape)) Zf = np.zeros((Xf.shape)) for j in range(0, Zc.shape[0]): Zc[j] = np.copy(V[Yc[j], Xc[j]]) for j in range(0, Zf.shape[0]): Zf[j] = np.copy(V[Yf[j], Xf[j]]) """ Quadratic and cubic polynomial coefficients ------------------------------------------- """ Ac2 = shatt.quadratic_polynomial_function(Yc, Xc) Af2 = shatt.quadratic_polynomial_function(Yf, Xf) Ac3 = shatt.cubic_polynomial_function(Yc, Xc) Af3 = shatt.cubic_polynomial_function(Yf, Xf) """ Fitting polynomial ------------------ """ coeffc2 = np.linalg.lstsq(Ac2, Zc)[0] coefff2 = np.linalg.lstsq(Af2, Zf)[0] coeffc3 = np.linalg.lstsq(Ac3, Zc)[0] coefff3 = np.linalg.lstsq(Af3, Zf)[0] """ Processed --------- """ Vprocc2 = shatt.apply_quadratic_function(V, coeffc2) Vprocf2 = shatt.apply_quadratic_function(V, coefff2) Vprocc3 = shatt.apply_cubic_function(V, coeffc3) Vprocf3 = shatt.apply_cubic_function(V, coefff3) # Convert Value into the range 0-1 Vprocc2 = shatt.in_range(Vprocc2) Vprocf2 = shatt.in_range(Vprocf2) Vprocc3 = shatt.in_range(Vprocc3) Vprocf3 = shatt.in_range(Vprocf3) # Retrieve true color to skin muorig = V.mean() Vnewc2 = shatt.retrieve_color(Vprocc2, muorig) Vnewf2 = shatt.retrieve_color(Vprocf2, muorig) Vnewc3 = shatt.retrieve_color(Vprocc3, muorig) Vnewf3 = shatt.retrieve_color(Vprocf3, muorig) # Convert Value into the range 0-1 Vnewc2 = shatt.in_range(Vnewc2) Vnewf2 = shatt.in_range(Vnewf2) Vnewc3 = shatt.in_range(Vnewc3) Vnewf3 = shatt.in_range(Vnewf3) # Select the image which have least entropy Vlist = [V, Vnewc2, Vnewf2, Vnewc3, Vnewf3] values = [img_as_ubyte(v) for v in Vlist] entropy_vals = [entropy(v) for v in values] print('\tentropy: '+str(entropy_vals)) index = entropy_vals.index(min(entropy_vals)) hsv[:, :, 2] = np.copy(Vlist[index]) attenuated = hsv2rgb(hsv) return attenuated
def normalized_mutual_information(cl: np.ndarray, org: np.ndarray): """ Clustering accuracy measure, which takes into account mutual information between two clusterings and entropy of each cluster """ assert cl.shape == org.shape return mutual_info_score(org, cl) / (abs(entropy(cl) + entropy(org)) / 2)
def upload_pic(request): if request.POST: file_doc = request.FILES['ahoy'] sv = fruit(document=file_doc) sv.save() oyo = fruit.objects.last() oy = oyo.id item = fruit.objects.get(id=oy) #item = fruit.objects.get(id=oy) img_file = item.document ayay = img_file.url print(ayay) print(img_file) image = cv2.imread(img_file.url) image = cv2.medianBlur(image, 5) fruit_images = [] fruit_images.append(image) ##### Feature Extraction list_of_vectors = [] for img in fruit_images: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) row, col = gray_img.shape canvas = np.zeros((row, col, 1), np.uint8) for i in range(row): for j in range(col): if gray_img[i][j] < 220: canvas.itemset((i, j, 0), 255) else: canvas.itemset((i, j, 0), 0) kernel = np.ones((3, 3), np.uint8) canvas = cv2.morphologyEx(canvas, cv2.MORPH_CLOSE, kernel) for i in range(row): for j in range(col): b, g, r = img[i][j] if canvas[i][j] == 255: img.itemset((i, j, 0), b) img.itemset((i, j, 1), g) img.itemset((i, j, 2), r) else: img.itemset((i, j, 0), 0) img.itemset((i, j, 1), 0) img.itemset((i, j, 2), 0) hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) rgb_means, rgb_std = cv2.meanStdDev(img) hsv_means, hsv_std = cv2.meanStdDev(hsv_img) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) coeff = pywt.dwt2(gray_img, "haar") # ---- Dekomposisi lv 1 LL, (LH, HL, HH) = coeff Energy = (LH**2 + HL**2 + HH**2).sum() / img.size Entropy = entropy(gray_img) b, g, r = img[row / 2 - 1, col / 2 - 1] list_of_vectors.append([ rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2], rgb_std[1], rgb_std[0], hsv_means[2], hsv_means[1], hsv_means[0], hsv_std[2], hsv_std[1], hsv_std[0], Energy, Entropy ]) img_file = item.document list_of_vectors = np.array(list_of_vectors) X_tr = list_of_vectors clf = joblib.load('clfRF70-12-g-wcc.pkl') test_predict = clf.predict(X_tr) fr = test_predict[0] fra = int(fr) item.Tipe = fra item.save() return HttpResponseRedirect(reverse('buah:hasil', args=[item.id]))
def info_var(z, zh): """Compute variation of information based on M. Meila (2007).""" return entropy(z) + entropy(zh) - 2*mutual_info_score(z, zh)
def create_clusters(tweetsDF, my_token_pattern, min_dist_thres=0.6, min_max_diff_thres=0.4, max_dist_thres=0.8, iteration_no=1, min_clusters=1, printsize=True, nameprefix='', selection=True, strout=False, user_identifier='screen_name', cluster_list=None): """ Have modes: mode1: get a certain number of clusters. Relax parameters for it. (This is the current Mode!) mode2: get clusters that comply with certain conditions. "min_max_diff_thres" should not be too small. Then You miss thresholds like: min 0.3 - min 0.7: The top is controlled by the maximum anyway. Do not fear from having it big: around 0.4 """ if min_dist_thres > 0.85 and max_dist_thres>0.99: logging.info("The parameter values are too high to allow a good selection. We just finish searching for clusters at that stage.") logging.info("Threshold Parameters are: \nmin_dist_thres="+str(min_dist_thres)+"\tmin_max_diff_thres:="+str(min_max_diff_thres)+ "\tmax_dist_thres="+str(max_dist_thres)) return cluster_list len_clust_list = 0 if cluster_list is None: cluster_list = [] elif not selection and len(cluster_list)>0: return cluster_list else: len_clust_list = len(cluster_list) logging.info("Starting the iteration with:"+str(len_clust_list)+" clusters.") clustered_tweet_ids = [] for clust_dict in cluster_list: clustered_tweet_ids += clust_dict["twids"] logging.info("Number of already clustered tweets are:"+str(len(clustered_tweet_ids))) logging.info("Tweet set size to be clustered:"+str(len(tweetsDF))) tweetsDF = tweetsDF[~tweetsDF.id_str.isin(clustered_tweet_ids)] logging.info("Tweet set size to be clustered(after elimination of the already clustered tweets):"+str(len(tweetsDF))) if len(tweetsDF)==0: logging.info("Please check that the id_str has a unique value for each item.") print("Please check that the id_str has a unique value for each item.") return cluster_list logging.info('Creating clusters was started!!') logging.info("Threshold Parameters are: \nmin_dist_thres="+str(min_dist_thres)+"\tmin_max_diff_thres:="+str(min_max_diff_thres)+ "\tmax_dist_thres="+str(max_dist_thres)) cluster_bigram_cntr = Counter() freqcutoff = int(m.log(len(tweetsDF))/2) if freqcutoff == 0: freqcutoff = 1 # make it at least 1. #freqcutoff = int(m.log(len(tweetsDF))/2) # the bigger freq threshold is the quicker to find similar groups of tweets, although precision will decrease. logging.info("Feature extraction parameters are:\tfrequencyCutoff:"+str(freqcutoff)) word_vectorizer = TfidfVectorizer(ngram_range=(1, 2), lowercase=False, norm='l2', min_df=freqcutoff, token_pattern=my_token_pattern) text_vectors = word_vectorizer.fit_transform(tweetsDF[active_column]) # logging.info("Number of features:"+str(len(word_vectorizer.get_feature_names()))) logging.info("Features are:"+str(word_vectorizer.get_feature_names())) n_clust = int(m.sqrt(len(tweetsDF))/2)+iteration_no*(min_clusters-len_clust_list) # The more clusters we need, the more clusters we will create. n_initt = int(m.log10(len(tweetsDF)))+iteration_no # up to 1 million, in KMeans setting, having many iterations is not a problem. # more iterations higher chance of having candidate clusters. logging.info("Clustering parameters are:\nnclusters="+str(n_clust)+"\tn_initt="+str(n_initt)) if len(tweetsDF) < 1000000: km = KMeans(n_clusters=n_clust, init='k-means++', max_iter=500, n_init=n_initt) # , n_jobs=16 logging.info("The data set is small enough to use Kmeans") else: km = MiniBatchKMeans(n_clusters=n_clust, init='k-means++', max_iter=500, n_init=n_initt, batch_size=1000) logging.info("The data set is BIG, MiniBatchKMeans is used.") km.fit(text_vectors) # Cluster = namedtuple('Cluster', ['cno', 'cstr','tw_ids']) clustersizes = get_cluster_sizes(km, tweetsDF[active_column].values) logging.info("Cluster sizes are:"+str(clustersizes)) for cn, csize in clustersizes.most_common(): # range(args.ksize): cn = int(cn) similar_indices = (km.labels_ == cn).nonzero()[0] similar = [] similar_tuple_list = [] for i in similar_indices: dist = sp.linalg.norm((km.cluster_centers_[cn] - text_vectors[i])) similar_tuple_list.append((dist, tweetsDF['id_str'].values[i], tweetsDF[active_column].values[i], tweetsDF[user_identifier].values[i])) if strout: similar.append(str(dist) + "\t" + tweetsDF['id_str'].values[i] + "\t" + tweetsDF[active_column].values[i] + "\t" + tweetsDF[user_identifier].values[i]) similar_tuple_list = sorted(similar_tuple_list, key=itemgetter(0)) # sort based on the 0th, which is the distance from the center, element. # test sortedness! if strout: similar = sorted(similar, reverse=False) cluster_info_str = '' user_list = [t[3] for t in similar_tuple_list] # t[3] means the third element in the similar_tuple_list. if selection: if (len(similar_tuple_list)>2) and (similar_tuple_list[0][0] < min_dist_thres) and (similar_tuple_list[-1][0] < max_dist_thres) and ((similar_tuple_list[0][0] + min_max_diff_thres) > similar_tuple_list[-1][0]): # the smallest and biggest distance to the centroid should not be very different, we allow 0.4 for now! cluster_info_str += "cluster number and size are: " + str(cn) + ' ' + str(clustersizes[str(cn)]) + "\n" for txt in tweetsDF[active_column].values[similar_indices]: cluster_bigram_cntr.update(get_uni_bigrams(txt)) # regex.findall(r"\b\w+[-]?\w+\s\w+", txt, overlapped=True)) # cluster_bigram_cntr.update(txt.split()) # unigrams frequency = reverse_index_frequency(cluster_bigram_cntr) if strout: topterms = [k+":" + str(v) for k, v in cluster_bigram_cntr.most_common() if k in word_vectorizer.get_feature_names()] cluster_info_str += "Top terms are:" + ", ".join(topterms) + "\n" if strout: cluster_info_str += "distance_to_centroid" + "\t" + "tweet_id" + "\t" + "tweet_text\n" if len(similar) > 20: cluster_info_str += 'First 10 documents:\n' cluster_info_str += "\n".join(similar[:10]) + "\n" # print(*similar[:10], sep='\n', end='\n') cluster_info_str += 'Last 10 documents:\n' cluster_info_str += "\n".join(similar[-10:]) + "\n" else: cluster_info_str += "Tweets for this cluster are:\n" cluster_info_str += "\n".join(similar) + "\n" else: logging.info("Cluster is not good. Smallest and largest distance to the cluster center are:"+str(similar_tuple_list[0][0])+"\t"+str(similar_tuple_list[-1][0])) else: cluster_info_str += "cluster number and size are: " + str(cn) + ' ' + str(clustersizes[str(cn)]) + "\n" cluster_bigram_cntr = Counter() for txt in tweetsDF[active_column].values[similar_indices]: cluster_bigram_cntr.update(get_uni_bigrams(txt)) frequency = reverse_index_frequency(cluster_bigram_cntr) if strout: topterms = [k+":"+str(v) for k, v in cluster_bigram_cntr.most_common() if k in word_vectorizer.get_feature_names()] cluster_info_str += "Top terms are:" + ", ".join(topterms) + "\n" if strout: cluster_info_str += "distance_to_centroid" + "\t" + "tweet_id" + "\t" + "tweet_text\n" if len(similar) > 20: cluster_info_str += 'First 10 documents:\n' cluster_info_str += "\n".join(similar[:10]) + "\n" # print(*similar[:10], sep='\n', end='\n') cluster_info_str += 'Last 10 documents:\n' cluster_info_str += "\n".join(similar[-10:]) + "\n" else: cluster_info_str += "Tweets for this cluster are:\n" cluster_info_str += "\n".join(similar) + "\n" if len(cluster_info_str) > 0: # that means there is some information in the cluster. logging.info("\nCluster was appended. cluster_info_str:"+cluster_info_str+"\tmin_dist="+str(similar_tuple_list[0][0])+"\tmax_dist="+str(similar_tuple_list[-1][0])) cluster_list.append({'cno': cn, 'cnoprefix': nameprefix+str(cn), 'user_entropy': entropy(user_list), 'rif': frequency, 'cstr': cluster_info_str, 'ctweettuplelist': similar_tuple_list, 'twids': list(tweetsDF[np.in1d(km.labels_, [cn])]["id_str"].values)}) # 'user_ent':entropy(user_list), logging.info("length of cluster_list:"+str(len(cluster_list))) len_clust_list = len(cluster_list) # use to adjust the threshold steps for the next iteration. If you are closer to the target step smaller. if len_clust_list<min_clusters: logging.info("There is not enough clusters, call the create_clusters again with relaxed threshold parameters (recursively). Iteration no:"+str(iteration_no)) factor = (min_clusters-len_clust_list)/1000 # if it needs more clusters, it will make a big step min_dist_thres2, max_dist_thres2, min_max_diff_thres2 = relax_parameters(min_dist_thres, max_dist_thres, min_max_diff_thres, factor) logging.info("Threshold step sizes are: \nmin_dist_thres="+str(min_dist_thres-min_dist_thres2)+"\tmax_dist_thres="+str(max_dist_thres-max_dist_thres2)+"\tmin_max_diff_thres="+str(min_max_diff_thres-min_max_diff_thres2)) return create_clusters(tweetsDF, my_token_pattern, min_dist_thres=min_dist_thres2, min_max_diff_thres=min_max_diff_thres2, max_dist_thres=max_dist_thres2, \ iteration_no=iteration_no+1, min_clusters=min_clusters, user_identifier=user_identifier, cluster_list=cluster_list) return cluster_list