def entropy_(block):
    """
    Function to calculate the variance of the block

    """
    a = entropy(block[:, :, 0])
    b = entropy(block[:, :, 1])
    c = entropy(block[:, :, 2])
    return [a, b, c]
Beispiel #2
0
def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(np.int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = random_state.random_integers(0, 10, i),\
            random_state.random_integers(0, 10, i)
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(np.int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (random_state.randint(0, 10, i),
                              random_state.randint(0, 10, i))
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
def test_v_measure_and_mutual_information(seed=36):
    """Check relation between v_measure, entropy and mutual information"""
    for i in np.logspace(1, 4, 4):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = random_state.random_integers(0, 10, i),\
            random_state.random_integers(0, 10, i)
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
def test_entropy():
    h = entropy([0, 0, 42.], log_base='e')
    assert_almost_equal(h, 0.6365141, 5)
    h = entropy([0, 0, 42.], log_base=2)
    assert_almost_equal(h, 0.9182958, 5)
    h = entropy([], log_base='e')
    assert_almost_equal(h, 1)
    h = entropy([], log_base=2)
    assert_almost_equal(h, 1)
def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(np.int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (random_state.randint(0, 10, i),
                              random_state.randint(0, 10, i))

        v_m = v_measure_score(labels_a, labels_b)
        mi = mutual_info_score(labels_a, labels_b, log_base='e')
        h_a = entropy(labels_a, log_base='e')
        h_b = entropy(labels_b, log_base='e')
        assert_almost_equal(v_m, 2.0 * mi / (h_a + h_b), 0)
def _evaluate_VI(index_pair, top_partitions):
    """Worker for VI evaluations."""
    MI = mutual_info_score(
        top_partitions[index_pair[0]],
        top_partitions[index_pair[1]],
    )
    Ex = entropy(top_partitions[index_pair[0]])
    Ey = entropy(top_partitions[index_pair[1]])
    JE = Ex + Ey - MI
    if abs(JE) < 1e-8:
        return 0.0
    return (JE - MI) / JE
Beispiel #8
0
def getEntropy( KF ):
    entropy_sum = 0
    for i in range(1, TOTAL_KEY_FRAMES - 1):
        while True:
            try:
                im1 = cv2.imread(READ_LOCATION + str(KF[i]) + ".jpg",0)
                im2 = cv2.imread(READ_LOCATION + str(KF[i+1]) + ".jpg",0)
                entropy_sum += abs(entropy(im1) - entropy(im2))
            except:
                print i, KF, KF[i], KF[i+1]
                continue
            break
    return entropy_sum/(TOTAL_KEY_FRAMES - 1)
def nmi_score(y_true, y_pred):
    """NMI
    https://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-clustering-1.html
    this function is not the same with
    sklearn.metrics.normalized_mutual_info_score
    in that this function uses [H(y_true)+H(y_pred)/2] while
    that of sklearn uses sqrt(H(y_true)H(y_pred))
    """
    labels = align_labels(y_true)
    mi = mutual_info_score(y_true, y_pred)
    h1 = entropy(labels)
    h2 = entropy(y_pred)
    return 2 * mi / (h1 + h2)
Beispiel #10
0
def normalized_information_distance(c1, c2):
    """
    Calculate Normalized Information Distance

    Taken from Vinh, Epps, and Bailey, (2010). Information Theoretic Measures
    for Clusterings Comparison: Variants, Properties, Normalization and
    Correction for Chance, JMLR
    <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>
    """
    denom = max(entropy(c1), entropy(c2))

    # The clusterings are identical (so return 1.0) if both have zero entropy.
    return (1.0 - (mutual_info_score(c1, c2) / denom)) if denom else 1.0
def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (random_state.randint(0, 10, i),
                              random_state.randint(0, 10, i))
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
        avg = 'arithmetic'
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            normalized_mutual_info_score(labels_a, labels_b,
                                                         average_method=avg)
                            )
def sklearn_measures(U, V):
    #     http://scikit-learn.org/stable/modules/classes.html#clustering-metrics
    import sklearn.metrics.cluster as sym
    U_labels = np.nonzero(U)[1]
    V_labels = np.nonzero(V)[1]
    print U_labels, V_labels
#     V2_labels = np.nonzero(V2)[1]
    print 'entro(U)=',sym.entropy(U_labels),'entro(V)=',sym.entropy(V_labels), 'entro(U,V)=',sym.mutual_info_score(U_labels, V_labels)
    res = [ ['ari', 'nmi', 'ami', 'vm' ], \
            [ sym.adjusted_rand_score(U_labels, V_labels),\
              sym.normalized_mutual_info_score(U_labels, V_labels),\
              sym.adjusted_mutual_info_score(U_labels, V_labels),\
              sym.v_measure_score(U_labels, V_labels)]]
    print res
    return res
Beispiel #13
0
def entropia_por_bloco(list_blocos_img):
    # String de Retorno
    return_lista_entropia_img = []

    for i in range(len(list_blocos_img)):
        return_lista_entropia_img.append(entropy(list_blocos_img[i]))
    return return_lista_entropia_img
Beispiel #14
0
def extr_beauty_ftrs(imgFlNm):
    img = os.path.basename(imgFlNm)

    # print("Extracting beauty features for %s" % img)

    try:
        rgbImg = resize_img(io.imread(imgFlNm))
    except Exception as e:
        print("Invalid image!", e, 'image', imgFlNm)
        return e
    rgbImg = resize_img(io.imread(imgFlNm))
    if len(rgbImg.shape) != 3 or rgbImg.shape[2] != 3:
        print("Invalid image.. Continuing..")
        final_ftr_obj_global[img] = None
        return None

    hsvImg = color.rgb2hsv(rgbImg)
    grayImg = color.rgb2gray(rgbImg)

    red, green, blue = get_arr(rgbImg)
    hue, saturation, value = get_arr(hsvImg)

    contrast = calc_contrast(red, green, blue)
    ftrs = calc_color_ftrs(hue, saturation, value)
    ftrs['contrast'] = contrast
    ftrs['entropy'] = entropy(
        grayImg
    )  # added to include entropy of the given image: more details: http://stackoverflow.com/a/42059758/5759063
    ftrs.update(get_spat_arrng_ftrs(grayImg))

    final_ftr_obj_global[img] = ftrs

    return final_ftr_obj_global
def sklearn_measures(U, V):
    #     http://scikit-learn.org/stable/modules/classes.html#clustering-metrics
    import sklearn.metrics.cluster as sym
    U_labels = np.nonzero(U)[1]
    V_labels = np.nonzero(V)[1]
    print U_labels, V_labels
    #     V2_labels = np.nonzero(V2)[1]
    print 'entro(U)=', sym.entropy(U_labels), 'entro(V)=', sym.entropy(
        V_labels), 'entro(U,V)=', sym.mutual_info_score(U_labels, V_labels)
    res = [ ['ari', 'nmi', 'ami', 'vm' ], \
            [ sym.adjusted_rand_score(U_labels, V_labels),\
              sym.normalized_mutual_info_score(U_labels, V_labels),\
              sym.adjusted_mutual_info_score(U_labels, V_labels),\
              sym.v_measure_score(U_labels, V_labels)]]
    print res
    return res
def extr_beauty_ftrs(imgFlNm):
	img = os.path.basename(imgFlNm)
	
	print("Extracting beauty features for %s" %imgFlNm)

	try:
		rgbImg = resize_img(io.imread(imgFlNm))
	except Exception as e:
		print("Invalid image")
		return e
		
	if len(rgbImg.shape) != 3 or rgbImg.shape[2] !=3:
		print("Invalid image.. Continuing..")
		final_ftr_obj_global[img] = None
		return None

	hsvImg = color.rgb2hsv(rgbImg)
	grayImg = color.rgb2gray(rgbImg)

	red, green, blue = get_arr(rgbImg)
	hue, saturation, value = get_arr(hsvImg)

	contrast = calc_contrast(red, green, blue)
	ftrs = calc_color_ftrs(hue, saturation, value)
	ftrs['contrast'] = contrast
	ftrs['entropy'] = entropy(grayImg) # added to include entropy of the given image: more details: http://stackoverflow.com/a/42059758/5759063
	ftrs.update(get_spat_arrng_ftrs(grayImg))
	
	final_ftr_obj_global[img] = ftrs
	
	return final_ftr_obj_global
Beispiel #17
0
def get_texture(output):
    # texture Feature
    grayImg = img_as_ubyte(color.rgb2gray(output))
    #"distances" is a list of distances (in pixels) between the pixels being compared
    distances = [1, 2, 3]
    #"angles" is a list of angles (in radians) between pixels being compared
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    # properties = ['energy', 'homogeneity']

    glcm = greycomatrix(grayImg,
                        distances=distances,
                        angles=angles,
                        symmetric=True,
                        normed=True)
    # feature1 for texture
    contrast = greycoprops(glcm, prop='contrast')
    # feature2 for texture
    energy = greycoprops(glcm, prop='energy')
    # feature3 for texture
    correlation = greycoprops(glcm, prop='correlation')
    # entropy=shannon_entropy(glcm)
    # feature4 for texture
    ent = entropy(glcm)
    # entropy = shannon_entropy(glcm, base=np.e)
    return contrast, energy, correlation, ent
def get_features(nodules):
    features = []

    i = 0

    for nodule in nodules:
        nodule_feature = Features(nodule)

        nodule_feature.max_coord = np.max(nodule.pixels)
        nodule_feature.conclusion = nodule.conclusion

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grey_comatrix = skimg.greycomatrix(nodule.pixels, [1], [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],
                                               nodule_feature.max_coord + 1)

        nodule_feature.features['contrast'] = skimg.greycoprops(grey_comatrix, 'contrast').flatten().astype(float)
        nodule_feature.features['dissimilarity'] = skimg.greycoprops(grey_comatrix, 'dissimilarity').flatten().astype(
            float)
        nodule_feature.features['homogeneity'] = skimg.greycoprops(grey_comatrix, 'homogeneity').flatten()
        nodule_feature.features['energy'] = skimg.greycoprops(grey_comatrix, 'energy').flatten()
        nodule_feature.features['correlation'] = skimg.greycoprops(grey_comatrix, 'correlation').flatten()
        nodule_feature.features['ASM'] = skimg.greycoprops(grey_comatrix, 'ASM').flatten().astype(float)
        nodule_feature.features['entropy'] = entropy(nodule.pixels)

        features.append(nodule_feature)

        i += 1
        print 'progress =', i, '/', len(nodules), nodule.source_id

    return np.array(features)
def detect_roughness(patch):
    blur = cv2.GaussianBlur(patch, (5, 5), 5)
    patch = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)

    if entropy(patch) > 4:
        return True
    else:
        return False
Beispiel #20
0
def extract_entropy_full(path_img):
    ### FEATURE 1 ###
    # Extraindo a entropia da imagem inteira
    # Image.open(argv_img).convert('LA') converte em grayscale
    rgbImg = io.imread(path_img)
    grayImg = img_as_ubyte(color.rgb2gray(rgbImg))
    return_entropia = entropy(grayImg)
    return return_entropia
Beispiel #21
0
def extract_feature(arr):
    radius = 1
    n_points = radius * 8

    arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

    distances = [1, 5]
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    glcm = greycomatrix(arr,
                        distances=distances,
                        angles=angles,
                        levels=256,
                        symmetric=False,
                        normed=False)
    # properties = ['dissimilarity', 'homogeneity', 'contrast', 'ASM', 'energy', 'correlation']
    # glcm_feats = np.hstack([greycoprops(glcm, prop=prop).ravel() for prop in properties])
    glcm_feats = np.hstack(
        [adadoc.greycoprops(glcm[:, :, i, :]) for i in range(0, 2)]).ravel()

    hog_feats = hog(arr,
                    orientations=9,
                    pixels_per_cell=(8, 8),
                    cells_per_block=(1, 1),
                    block_norm='L2-Hys',
                    feature_vector=True)

    ent = entropy(arr)

    # # prepare filter bank kernels
    # kernels = []
    # for theta in range(4):
    #     theta = theta / 4. * np.pi
    #     for sigma in (1, 3):
    #         for frequency in (0.05, 0.25):
    #             kernel = np.real(gabor_kernel(frequency, theta=theta,
    #                                           sigma_x=sigma, sigma_y=sigma))
    #             kernels.append(kernel)
    # gabor_feat = compute_feats(arr, kernels).ravel()

    thresh_sauvola = threshold_sauvola(arr, window_size=31, k=0.2)
    arr = arr > thresh_sauvola
    arr = (255 - arr * 255).astype('uint8')

    # arr = adadoc.adath(arr, method=adadoc.ADATH_SAUVOLA | adadoc.ADATH_INVTHRESH,
    #                    xblock=21, yblock=21, k=0.2, dR=64, C=0)

    lbp_code = local_binary_pattern(arr, n_points, radius, 'uniform')

    # n_bins = int(lbp_code.max() + 1)
    n_bins = 16
    lbp_feats, _ = np.histogram(lbp_code,
                                normed=True,
                                bins=n_bins,
                                range=(0, n_bins))

    data_feat = np.hstack([lbp_feats, ent, glcm_feats, hog_feats])

    return data_feat
Beispiel #22
0
def _print_clusteringMetrics(_kMean, _X):
	metrics = [['Clustering K-Means', 'Datos obtenidos'],
			   ['Inercia', _kMean.inertia_],
			   ['Entropy', entropy(_kMean.labels_)],
			   ['Silhouette Score', silhouette_score(_X, _kMean.labels_, random_state = 0)],
			   ['Calinski-Harabaz Score', calinski_harabaz_score(_X, _kMean.labels_)], ]

	print('\nMinería de Datos - Clustering K-Means - <VORT>', '\n')
	print(_kMean, '\n')
	print(look(metrics))
Beispiel #23
0
def extract_feature(arr):
    radius = 1
    n_points = radius * 8

    # arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

    distances = [1, 5]
    angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4]
    glcm = greycomatrix(arr.copy(),
                        distances=distances,
                        angles=angles,
                        levels=256,
                        symmetric=False,
                        normed=False)
    # properties = ['dissimilarity', 'homogeneity', 'contrast', 'ASM', 'energy', 'correlation']
    # glcm_feats = np.hstack([greycoprops(glcm, prop=prop).ravel() for prop in properties])
    glcm_feats = np.hstack(
        [adadoc.greycoprops(glcm[:, :, i, :]) for i in range(0, 2)]).ravel()

    hog_feats = hog(arr,
                    orientations=9,
                    pixels_per_cell=(8, 8),
                    cells_per_block=(1, 1),
                    block_norm='L2-Hys',
                    feature_vector=True)

    ent = entropy(arr)

    arr = adadoc.adath(arr,
                       method=adadoc.ADATH_SAUVOLA | adadoc.ADATH_INVTHRESH,
                       xblock=21,
                       yblock=21,
                       k=0.2,
                       dR=64,
                       C=0)
    # thresh_sauvola = threshold_sauvola(arr, window_size=31, k=0.2)
    # arr = arr > thresh_sauvola
    # arr = (255 - arr * 255).astype('uint8')

    lbp_code = local_binary_pattern(arr, n_points, radius, 'uniform')

    # n_bins = int(lbp_code.max() + 1)
    n_bins = 16
    lbp_feats, _ = np.histogram(lbp_code,
                                normed=True,
                                bins=n_bins,
                                range=(0, n_bins))

    data_feat = np.hstack([lbp_feats, ent, glcm_feats, hog_feats])

    return data_feat
Beispiel #24
0
def normalized_max_mutual_info_score(labels_true, labels_pred):
    """
    A variant version of NMI that is given as:
    NMI_max = MI(U, V) / max{ H(U), H(V) }
    based on 'adjusted mutual info score' in sklearn

    Parameters
    ----------
    :param labels_true: labels of clustering 1 (as a 1-dimensional ndarray)
    :param labels_pred: labels of clustering 2 (as a 1-dimensional ndarray)
    :return: diversity between these two clusterings as a float value

    Returns
    -------
    :return: NMI-max between these two clusterings as a float value

    """
    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
    n_samples = labels_true.shape[0]
    classes = np.unique(labels_true)
    clusters = np.unique(labels_pred)
    # Special limit cases: no clustering since the data is not split.
    # This is a perfect match hence return 1.0.
    if (classes.shape[0] == clusters.shape[0] == 1
            or classes.shape[0] == clusters.shape[0] == 0):
        return 1.0
    contingency = contingency_matrix(labels_true, labels_pred)
    contingency = np.array(contingency, dtype='float')
    # Calculate the MI for the two clusterings
    mi = metrics.mutual_info_score(labels_true,
                                   labels_pred,
                                   contingency=contingency)
    # Calculate the expected value for the mutual information
    # Calculate entropy for each labeling
    h_true, h_pred = entropy(labels_true), entropy(labels_pred)
    nmi_max = mi / max(h_true, h_pred)
    return nmi_max
def ExtractFeature(fruit_images):
    list_of_vectors = []
    for img in fruit_images:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        row, col = gray_img.shape
        canvas = np.zeros((row, col, 1), np.uint8)
        for i in range(row):
            for j in range(col):
                if gray_img[i][j] < 220:
                    canvas.itemset((i, j, 0), 255)
                else:
                    canvas.itemset((i, j, 0), 0)

        kernel = np.ones((3, 3), np.uint8)
        canvas = cv2.morphologyEx(canvas, cv2.MORPH_CLOSE, kernel)

        for i in range(row):
            for j in range(col):
                b, g, r = img[i][j]
                if canvas[i][j] == 255:
                    img.itemset((i, j, 0), b)
                    img.itemset((i, j, 1), g)
                    img.itemset((i, j, 2), r)
                else:
                    img.itemset((i, j, 0), 0)
                    img.itemset((i, j, 1), 0)
                    img.itemset((i, j, 2), 0)

        hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        rgb_means, rgb_std = cv2.meanStdDev(img)
        hsv_means, hsv_std = cv2.meanStdDev(hsv_img)

        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        coeff = pywt.dwt2(gray_img, "haar")  # ---- Dekomposisi lv 1
        LL, (LH, HL, HH) = coeff
        Energy = (LH**2 + HL**2 + HH**2).sum() / img.size
        Entropy = entropy(gray_img)

        b, g, r = img[row / 2 - 1, col / 2 - 1]
        # center_code = tools.CentreClass(b,g,r)
        list_of_vectors.append([
            rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2], rgb_std[1],
            rgb_std[0], hsv_means[2], hsv_means[1], hsv_means[0], hsv_std[2],
            hsv_std[1], hsv_std[0], Energy, Entropy
        ])

    list_of_vectors = np.array(list_of_vectors)
    return (list_of_vectors)
Beispiel #26
0
def glcm_f(segment_region):
    glcm = greycomatrix(segment_region, [5], [0], 256)

    stats = [
        "dissimilarity", "correlation", "contrast", "homogeneity", "ASM",
        "energy"
    ]
    dissimilarity = greycoprops(glcm, stats[0])[0, 0]
    correlation = greycoprops(glcm, stats[1])[0, 0]
    contrast = greycoprops(glcm, stats[2])[0, 0]
    homogeneity = greycoprops(glcm, stats[3])[0, 0]
    ASM = greycoprops(glcm, stats[4])[0, 0]
    energy = greycoprops(glcm, stats[5])[0, 0]
    entropy_f = entropy(segment_region)
    temp_features = [
        dissimilarity, correlation, contrast, homogeneity, ASM, energy,
        entropy_f
    ]
    return temp_features
print("Entropy")
import numpy as np
from skimage import io, color, img_as_ubyte
from skimage.feature import greycomatrix, greycoprops
from sklearn.metrics.cluster import entropy

rgbImg = io.imread('D:/PAPERS/With_Pratap_2019/figs/airplane_cipher.png')
grayImg = img_as_ubyte(color.rgb2gray(rgbImg))
#grayImg = Image.open('Documents/figs/barbara.jpg')

print(entropy(grayImg))
Beispiel #28
0
def test_entropy():
    ent = entropy([0, 0, 42.])
    assert_almost_equal(ent, 0.6365141, 5)
Beispiel #29
0
def test_entropy():
    ent = entropy([0, 0, 42.])
    assert_almost_equal(ent, 0.6365141, 5)
    assert_almost_equal(entropy([]), 1)
Beispiel #30
0
Vprocc2 = shatt.in_range(Vprocc2)
Vprocf2 = shatt.in_range(Vprocf2)
Vprocc3 = shatt.in_range(Vprocc3)
Vprocf3 = shatt.in_range(Vprocf3)

#save_without_retrieve_color(hsv, Vprocc2, Vprocf2, Vprocc3, Vprocf3)

# Retrieve true color to skin
muorig = V.mean()
Vnewc2 = shatt.retrieve_color(Vprocc2, muorig)
Vnewf2 = shatt.retrieve_color(Vprocf2, muorig)
Vnewc3 = shatt.retrieve_color(Vprocc3, muorig)
Vnewf3 = shatt.retrieve_color(Vprocf3, muorig)

# Convert Value into the range 0-1
Vnewc2 = shatt.in_range(Vnewc2)
Vnewf2 = shatt.in_range(Vnewf2)
Vnewc3 = shatt.in_range(Vnewc3)
Vnewf3 = shatt.in_range(Vnewf3)

save_with_retrieve_color(hsv, Vnewc2, Vnewf2, Vnewc3, Vnewf3)

# Select the image which have least entropy
Vlist = [V, Vnewc2, Vnewf2, Vnewc3, Vnewf3]
values = [img_as_ubyte(v) for v in Vlist]

entropy_vals = [entropy(v) for v in values]

print('entropy: ' + str(entropy_vals))
print('index: ' + str(entropy_vals.index(min(entropy_vals))))
                img.itemset((i, j, 1), g)
                img.itemset((i, j, 2), r)
            else:
                img.itemset((i, j, 0), 0)
                img.itemset((i, j, 1), 0)
                img.itemset((i, j, 2), 0)

    hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    rgb_means, rgb_std = cv2.meanStdDev(img)
    hsv_means, hsv_std = cv2.meanStdDev(hsv_img)

    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    coeff = pywt.dwt2(gray_img, "haar")  # ---- Dekomposisi lv 1
    LL, (LH, HL, HH) = coeff
    Energy = (LH**2 + HL**2 + HH**2).sum() / img.size
    Entropy = entropy(gray_img)

    b, g, r = img[row / 2 - 1, col / 2 - 1]
    center_code = tools.CentreClass(b, g, r)
    list_of_vectors.append([
        rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2], rgb_std[1],
        rgb_std[0], hsv_means[2], hsv_means[1], hsv_means[0], hsv_std[2],
        hsv_std[1], hsv_std[0], Energy, Entropy
    ])

list_of_vectors = np.array(list_of_vectors)

# X_train, X_test, y_train, y_test = train_test_split(list_of_vectors, label_ids, test_size=0.30, random_state=14)
X_tr = list_of_vectors
y_tr = label_ids
        input = cv2.imread(path + i, cv2.IMREAD_GRAYSCALE)
        glcm = skimage.feature.greycomatrix(
            input, [1], [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4],
            256,
            symmetric=True,
            normed=True)
        # 对比度,差异性,同质性,能量,相关,二阶矩
        # for prop in {'contrast', 'dissimilarity','homogeneity', 'energy', 'correlation', 'ASM'}:
        contrast = skimage.feature.greycoprops(glcm, "contrast")
        dissimilarity = skimage.feature.greycoprops(glcm, "dissimilarity")
        homogeneity = skimage.feature.greycoprops(glcm, "homogeneity")
        energy = skimage.feature.greycoprops(glcm, "energy")
        correlation = skimage.feature.greycoprops(glcm, "correlation")
        ASM = skimage.feature.greycoprops(glcm, "ASM")

        entropyImg = entropy(input)  #得到熵
        sheet1.write(flag + 1, 0, i)
        sheet1.write(flag + 1, 1, contrast[0][0])
        sheet1.write(flag + 1, 2, dissimilarity[0][0])
        sheet1.write(flag + 1, 3, homogeneity[0][0])
        sheet1.write(flag + 1, 4, energy[0][0])
        sheet1.write(flag + 1, 5, correlation[0][0])
        sheet1.write(flag + 1, 6, ASM[0][0])
        sheet1.write(flag + 1, 7, entropyImg)
        print(contrast[0][0], dissimilarity[0][0], homogeneity[0][0],
              energy[0][0], correlation[0][0], ASM[0][0], entropyImg)

        print("----------分界线-----------")
        flag += 1

wb.save("data/glcm.xls")
def shading_attenuation_method(image, extract, margin):
    """
    Apply the shading attenuation method to an image

    Parameters
    ----------
    image: 3D array
        The image source
    extract: scalar
        Number of pixel to extract, extract x extract
    margin: scalar
        Margin from the borders
    """

    hsv = rgb2hsv(image)
    V = np.copy(hsv[:, :, 2])

    shape = image.shape[0:2]

    """
    Sampling pixels
    ---------------
    """
    Yc, Xc = shatt.sampling_from_corners(margin=margin, extract=extract, shape=shape)
    Yf, Xf = shatt.sampling_from_frames(margin=margin, extract=extract, shape=shape)

    Zc = np.zeros((Xc.shape))
    Zf = np.zeros((Xf.shape))

    for j in range(0, Zc.shape[0]):
        Zc[j] = np.copy(V[Yc[j], Xc[j]])

    for j in range(0, Zf.shape[0]):
        Zf[j] = np.copy(V[Yf[j], Xf[j]])

    """
    Quadratic and cubic polynomial coefficients
    -------------------------------------------
    """
    Ac2 = shatt.quadratic_polynomial_function(Yc, Xc)
    Af2 = shatt.quadratic_polynomial_function(Yf, Xf)

    Ac3 = shatt.cubic_polynomial_function(Yc, Xc)
    Af3 = shatt.cubic_polynomial_function(Yf, Xf)

    """
    Fitting polynomial
    ------------------
    """
    coeffc2 = np.linalg.lstsq(Ac2, Zc)[0]
    coefff2 = np.linalg.lstsq(Af2, Zf)[0]

    coeffc3 = np.linalg.lstsq(Ac3, Zc)[0]
    coefff3 = np.linalg.lstsq(Af3, Zf)[0]

    """
    Processed
    ---------
    """
    Vprocc2 = shatt.apply_quadratic_function(V, coeffc2)
    Vprocf2 = shatt.apply_quadratic_function(V, coefff2)
    Vprocc3 = shatt.apply_cubic_function(V, coeffc3)
    Vprocf3 = shatt.apply_cubic_function(V, coefff3)

    # Convert Value into the range 0-1
    Vprocc2 = shatt.in_range(Vprocc2)
    Vprocf2 = shatt.in_range(Vprocf2)
    Vprocc3 = shatt.in_range(Vprocc3)
    Vprocf3 = shatt.in_range(Vprocf3)

    # Retrieve true color to skin
    muorig = V.mean()
    Vnewc2 = shatt.retrieve_color(Vprocc2, muorig)
    Vnewf2 = shatt.retrieve_color(Vprocf2, muorig)
    Vnewc3 = shatt.retrieve_color(Vprocc3, muorig)
    Vnewf3 = shatt.retrieve_color(Vprocf3, muorig)

    # Convert Value into the range 0-1
    Vnewc2 = shatt.in_range(Vnewc2)
    Vnewf2 = shatt.in_range(Vnewf2)
    Vnewc3 = shatt.in_range(Vnewc3)
    Vnewf3 = shatt.in_range(Vnewf3)

    # Select the image which have least entropy
    Vlist = [V, Vnewc2, Vnewf2, Vnewc3, Vnewf3]
    values = [img_as_ubyte(v) for v in Vlist]

    entropy_vals = [entropy(v) for v in values]
    print('\tentropy: '+str(entropy_vals))
    index = entropy_vals.index(min(entropy_vals))

    hsv[:, :, 2] = np.copy(Vlist[index])
    attenuated = hsv2rgb(hsv)

    return attenuated
Beispiel #34
0
def normalized_mutual_information(cl: np.ndarray, org: np.ndarray):
    """ Clustering accuracy measure, which takes into account mutual information between two clusterings and entropy
        of each cluster """
    assert cl.shape == org.shape

    return mutual_info_score(org, cl) / (abs(entropy(cl) + entropy(org)) / 2)
Beispiel #35
0
def upload_pic(request):
    if request.POST:
        file_doc = request.FILES['ahoy']
        sv = fruit(document=file_doc)
        sv.save()
        oyo = fruit.objects.last()
        oy = oyo.id

        item = fruit.objects.get(id=oy)
        #item = fruit.objects.get(id=oy)
        img_file = item.document
        ayay = img_file.url
        print(ayay)
        print(img_file)
        image = cv2.imread(img_file.url)
        image = cv2.medianBlur(image, 5)
        fruit_images = []
        fruit_images.append(image)

        ##### Feature Extraction
        list_of_vectors = []
        for img in fruit_images:
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            row, col = gray_img.shape
            canvas = np.zeros((row, col, 1), np.uint8)
            for i in range(row):
                for j in range(col):
                    if gray_img[i][j] < 220:
                        canvas.itemset((i, j, 0), 255)
                    else:
                        canvas.itemset((i, j, 0), 0)

            kernel = np.ones((3, 3), np.uint8)
            canvas = cv2.morphologyEx(canvas, cv2.MORPH_CLOSE, kernel)

            for i in range(row):
                for j in range(col):
                    b, g, r = img[i][j]
                    if canvas[i][j] == 255:
                        img.itemset((i, j, 0), b)
                        img.itemset((i, j, 1), g)
                        img.itemset((i, j, 2), r)
                    else:
                        img.itemset((i, j, 0), 0)
                        img.itemset((i, j, 1), 0)
                        img.itemset((i, j, 2), 0)

            hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            rgb_means, rgb_std = cv2.meanStdDev(img)
            hsv_means, hsv_std = cv2.meanStdDev(hsv_img)

            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            coeff = pywt.dwt2(gray_img, "haar")  # ---- Dekomposisi lv 1
            LL, (LH, HL, HH) = coeff
            Energy = (LH**2 + HL**2 + HH**2).sum() / img.size
            Entropy = entropy(gray_img)

            b, g, r = img[row / 2 - 1, col / 2 - 1]
            list_of_vectors.append([
                rgb_means[2], rgb_means[1], rgb_means[0], rgb_std[2],
                rgb_std[1], rgb_std[0], hsv_means[2], hsv_means[1],
                hsv_means[0], hsv_std[2], hsv_std[1], hsv_std[0], Energy,
                Entropy
            ])
        img_file = item.document
        list_of_vectors = np.array(list_of_vectors)

        X_tr = list_of_vectors
        clf = joblib.load('clfRF70-12-g-wcc.pkl')
        test_predict = clf.predict(X_tr)

        fr = test_predict[0]
        fra = int(fr)
        item.Tipe = fra
        item.save()

    return HttpResponseRedirect(reverse('buah:hasil', args=[item.id]))
def info_var(z, zh):
    """Compute variation of information based on M. Meila (2007)."""
    return entropy(z) + entropy(zh) - 2*mutual_info_score(z, zh)
Beispiel #37
0
def create_clusters(tweetsDF,  my_token_pattern, min_dist_thres=0.6, min_max_diff_thres=0.4, max_dist_thres=0.8, iteration_no=1, min_clusters=1, printsize=True, nameprefix='',  selection=True, strout=False, user_identifier='screen_name', cluster_list=None): 
	"""
	Have modes:
	mode1: get a certain number of clusters. Relax parameters for it. (This is the current Mode!)
	mode2: get clusters that comply with certain conditions.

	"min_max_diff_thres" should not be too small. Then You miss thresholds like: min 0.3 - min 0.7: The top is controlled by the maximum anyway. Do not fear from having it big: around 0.4

	"""

	if min_dist_thres > 0.85 and max_dist_thres>0.99:
		logging.info("The parameter values are too high to allow a good selection. We just finish searching for clusters at that stage.")
		logging.info("Threshold Parameters are: \nmin_dist_thres="+str(min_dist_thres)+"\tmin_max_diff_thres:="+str(min_max_diff_thres)+ "\tmax_dist_thres="+str(max_dist_thres))
		return cluster_list


	len_clust_list = 0
	if cluster_list is None:
		cluster_list = []

	elif not selection and len(cluster_list)>0:
		return cluster_list
	else:
		len_clust_list = len(cluster_list)
		logging.info("Starting the iteration with:"+str(len_clust_list)+" clusters.")

		clustered_tweet_ids = []

		for clust_dict in cluster_list:
			clustered_tweet_ids += clust_dict["twids"]

		logging.info("Number of already clustered tweets are:"+str(len(clustered_tweet_ids)))

		logging.info("Tweet set size to be clustered:"+str(len(tweetsDF)))
		tweetsDF = tweetsDF[~tweetsDF.id_str.isin(clustered_tweet_ids)]
		logging.info("Tweet set size to be clustered(after elimination of the already clustered tweets):"+str(len(tweetsDF)))

		if len(tweetsDF)==0:
			logging.info("Please check that the id_str has a unique value for each item.")
			print("Please check that the id_str has a unique value for each item.")
			return cluster_list

	logging.info('Creating clusters was started!!')
	logging.info("Threshold Parameters are: \nmin_dist_thres="+str(min_dist_thres)+"\tmin_max_diff_thres:="+str(min_max_diff_thres)+ "\tmax_dist_thres="+str(max_dist_thres))
	cluster_bigram_cntr = Counter()

	freqcutoff = int(m.log(len(tweetsDF))/2)
	if freqcutoff == 0:
		freqcutoff = 1 # make it at least 1.

	#freqcutoff = int(m.log(len(tweetsDF))/2) # the bigger freq threshold is the quicker to find similar groups of tweets, although precision will decrease.
	logging.info("Feature extraction parameters are:\tfrequencyCutoff:"+str(freqcutoff))

	word_vectorizer = TfidfVectorizer(ngram_range=(1, 2), lowercase=False, norm='l2', min_df=freqcutoff, token_pattern=my_token_pattern)
	text_vectors = word_vectorizer.fit_transform(tweetsDF[active_column])
	# logging.info("Number of features:"+str(len(word_vectorizer.get_feature_names())))
	logging.info("Features are:"+str(word_vectorizer.get_feature_names()))

	n_clust = int(m.sqrt(len(tweetsDF))/2)+iteration_no*(min_clusters-len_clust_list) # The more clusters we need, the more clusters we will create.
	n_initt = int(m.log10(len(tweetsDF)))+iteration_no  # up to 1 million, in KMeans setting, having many iterations is not a problem. # more iterations higher chance of having candidate clusters.

	logging.info("Clustering parameters are:\nnclusters="+str(n_clust)+"\tn_initt="+str(n_initt))
	

	if len(tweetsDF) < 1000000:
		km = KMeans(n_clusters=n_clust, init='k-means++', max_iter=500, n_init=n_initt)  # , n_jobs=16
		logging.info("The data set is small enough to use Kmeans")
	else: 
		km = MiniBatchKMeans(n_clusters=n_clust, init='k-means++', max_iter=500, n_init=n_initt, batch_size=1000)
		logging.info("The data set is BIG, MiniBatchKMeans is used.")

	km.fit(text_vectors)
	
	# Cluster = namedtuple('Cluster', ['cno', 'cstr','tw_ids'])
	clustersizes = get_cluster_sizes(km, tweetsDF[active_column].values)

	logging.info("Cluster sizes are:"+str(clustersizes))

	for cn, csize in clustersizes.most_common():  # range(args.ksize):
		cn = int(cn)
		similar_indices = (km.labels_ == cn).nonzero()[0]
		similar = []
		similar_tuple_list = []
		for i in similar_indices: 
			dist = sp.linalg.norm((km.cluster_centers_[cn] - text_vectors[i]))
			similar_tuple_list.append((dist, tweetsDF['id_str'].values[i], tweetsDF[active_column].values[i], tweetsDF[user_identifier].values[i])) 
			if strout:
				similar.append(str(dist) + "\t" + tweetsDF['id_str'].values[i] + "\t" + tweetsDF[active_column].values[i] + "\t" + tweetsDF[user_identifier].values[i])
		
		similar_tuple_list = sorted(similar_tuple_list, key=itemgetter(0)) # sort based on the 0th, which is the distance from the center, element.
		# test sortedness!

		if strout:	
			similar = sorted(similar, reverse=False)
		cluster_info_str = ''
		user_list = [t[3] for t in similar_tuple_list]  # t[3] means the third element in the similar_tuple_list.
		if selection:
			if (len(similar_tuple_list)>2) and (similar_tuple_list[0][0] < min_dist_thres) and (similar_tuple_list[-1][0] < max_dist_thres) and ((similar_tuple_list[0][0] + min_max_diff_thres) > similar_tuple_list[-1][0]):  # the smallest and biggest distance to the centroid should not be very different, we allow 0.4 for now!
				cluster_info_str += "cluster number and size are: " + str(cn) + '    ' + str(clustersizes[str(cn)]) + "\n"
				for txt in tweetsDF[active_column].values[similar_indices]:
					cluster_bigram_cntr.update(get_uni_bigrams(txt))  # regex.findall(r"\b\w+[-]?\w+\s\w+", txt, overlapped=True))
					# cluster_bigram_cntr.update(txt.split()) # unigrams
				frequency = reverse_index_frequency(cluster_bigram_cntr)
				if strout:
					topterms = [k+":" + str(v) for k, v in cluster_bigram_cntr.most_common() if k in word_vectorizer.get_feature_names()]  
					cluster_info_str += "Top terms are:" + ", ".join(topterms) + "\n"
				if strout:
					cluster_info_str += "distance_to_centroid" + "\t" + "tweet_id" + "\t" + "tweet_text\n"
					if len(similar) > 20:
						cluster_info_str += 'First 10 documents:\n'
						cluster_info_str += "\n".join(similar[:10]) + "\n"
						# print(*similar[:10], sep='\n', end='\n')

						cluster_info_str += 'Last 10 documents:\n'
						cluster_info_str += "\n".join(similar[-10:]) + "\n"
					else:
						cluster_info_str += "Tweets for this cluster are:\n"
						cluster_info_str += "\n".join(similar) + "\n"
			else:
				logging.info("Cluster is not good. Smallest and largest distance to the cluster center are:"+str(similar_tuple_list[0][0])+"\t"+str(similar_tuple_list[-1][0]))
		else:
				cluster_info_str += "cluster number and size are: " + str(cn) + '    ' + str(clustersizes[str(cn)]) + "\n"
				cluster_bigram_cntr = Counter()
				for txt in tweetsDF[active_column].values[similar_indices]:
					cluster_bigram_cntr.update(get_uni_bigrams(txt))
				frequency = reverse_index_frequency(cluster_bigram_cntr)
				if strout:
					topterms = [k+":"+str(v) for k, v in cluster_bigram_cntr.most_common() if k in word_vectorizer.get_feature_names()]  
					cluster_info_str += "Top terms are:" + ", ".join(topterms) + "\n"
				if strout:
					cluster_info_str += "distance_to_centroid" + "\t" + "tweet_id" + "\t" + "tweet_text\n"
					if len(similar) > 20:
						cluster_info_str += 'First 10 documents:\n'
						cluster_info_str += "\n".join(similar[:10]) + "\n"
						# print(*similar[:10], sep='\n', end='\n')

						cluster_info_str += 'Last 10 documents:\n'
						cluster_info_str += "\n".join(similar[-10:]) + "\n"
					else:
						cluster_info_str += "Tweets for this cluster are:\n"
						cluster_info_str += "\n".join(similar) + "\n"

		if len(cluster_info_str) > 0:  # that means there is some information in the cluster.
			logging.info("\nCluster was appended. cluster_info_str:"+cluster_info_str+"\tmin_dist="+str(similar_tuple_list[0][0])+"\tmax_dist="+str(similar_tuple_list[-1][0]))
			cluster_list.append({'cno': cn, 'cnoprefix': nameprefix+str(cn), 'user_entropy': entropy(user_list), 'rif': frequency, 'cstr': cluster_info_str, 'ctweettuplelist': similar_tuple_list,  'twids': list(tweetsDF[np.in1d(km.labels_, [cn])]["id_str"].values)})  # 'user_ent':entropy(user_list),

	logging.info("length of cluster_list:"+str(len(cluster_list)))
	len_clust_list = len(cluster_list) # use to adjust the threshold steps for the next iteration. If you are closer to the target step smaller.
	if len_clust_list<min_clusters:
		logging.info("There is not enough clusters, call the create_clusters again with relaxed threshold parameters (recursively). Iteration no:"+str(iteration_no))

		factor = (min_clusters-len_clust_list)/1000 # if it needs more clusters, it will make a big step
		
		min_dist_thres2, max_dist_thres2, min_max_diff_thres2 = relax_parameters(min_dist_thres, max_dist_thres, min_max_diff_thres, factor)
		logging.info("Threshold step sizes are: \nmin_dist_thres="+str(min_dist_thres-min_dist_thres2)+"\tmax_dist_thres="+str(max_dist_thres-max_dist_thres2)+"\tmin_max_diff_thres="+str(min_max_diff_thres-min_max_diff_thres2))
		return create_clusters(tweetsDF,  my_token_pattern, min_dist_thres=min_dist_thres2, min_max_diff_thres=min_max_diff_thres2, max_dist_thres=max_dist_thres2,  \
			iteration_no=iteration_no+1, min_clusters=min_clusters, user_identifier=user_identifier, cluster_list=cluster_list)

	return cluster_list