Python kurtosisの例、scipy.stats.mstats.kurtosis Pythonの例

コード例 #1

0

ファイルを表示

ファイル: fits_maps.py プロジェクト: Chemap98/Compact-sources-detection-on-CMB-images.

    def kurtosis(self, i=-1, **kwargs):

        if i >= 0:
            a = self.masked_data(i)
            r = mst.kurtosis(a, **kwargs)
        else:
            r = []
            for k in range(self.nmaps):
                a = self.masked_data(k)
                r.append(mst.kurtosis(a, **kwargs))
            r = np.array(r)
        return r

コード例 #2

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: jnothman/scipy

    def test_kurtosis(self):
        # Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis
        # for compatibility with Matlab)
        y = mstats.kurtosis(self.testmathworks, 0, fisher=0, bias=1)
        assert_almost_equal(y, 2.1658856802973, 10)
        # Note that MATLAB has confusing docs for the following case
        #  kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
        #  kurtosis(x)  gives a biased estimate of Fisher's skewness (Pearson-3)
        #  The MATLAB docs imply that both should give Fisher's
        y = mstats.kurtosis(self.testmathworks, fisher=0, bias=0)
        assert_almost_equal(y, 3.663542721189047, 10)
        y = mstats.kurtosis(self.testcase, 0, 0)
        assert_almost_equal(y, 1.64)

        # test that kurtosis works on multidimensional masked arrays
        correct_2d = ma.array(
            np.array([-1.5, -3.0, -1.47247052385, 0.0, -1.26979517952]),
            mask=np.array([False, False, False, True, False], dtype=np.bool),
        )
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1), correct_2d)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row), correct_2d[i])

        correct_2d_bias_corrected = ma.array(
            np.array([-1.5, -3.0, -1.88988209538, 0.0, -0.5234638463918877]),
            mask=np.array([False, False, False, True, False], dtype=np.bool),
        )
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1, bias=False), correct_2d_bias_corrected)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row, bias=False), correct_2d_bias_corrected[i])

        # Check consistency between stats and mstats implementations
        assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]), stats.kurtosis(self.testcase_2d[2, :]))

コード例 #3

0

ファイルを表示

ファイル: features.py プロジェクト: Anushakpb/caffe-windows

def extractFeatV2(image_file):
    region_feat = extractFeatV1(image_file)
    
    image = imread(image_file, as_grey=True)
    image = image.copy()

    # global features
    idx = np.nonzero(255-image)
    nonzero = image[idx]
    global_feat = [ 
        np.mean(nonzero),
        np.std(nonzero),
        kurtosis(nonzero),
        skew(nonzero),
        gini(nonzero,image_file),
    ]
    global_feat = np.asarray(global_feat, dtype='float32' )

    # concat all the features
    image2 = mh.imread(image_file, as_grey=True)
    haralick = mh.features.haralick(image2, ignore_zeros=False, preserve_haralick_bug=False, compute_14th_feature=False)
    lbp = mh.features.lbp(image2, radius=20, points=7, ignore_zeros=False)
    pftas = mh.features.pftas(image2)
    zernike_moments = mh.features.zernike_moments(image2, radius=20, degree=8)
    #surf_feat = surf.surf(image2)
    haralick = np.reshape(haralick,(np.prod(haralick.shape)))
    #surf_feat = np.reshape(surf_feat,(np.prod(surf_feat.shape)))
    
    #mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments, surf_feat))
    mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments))

    feat = np.hstack((global_feat, region_feat, mh_feat))
    
    return feat

コード例 #4

0

ファイルを表示

ファイル: region_properties.py プロジェクト: junge82/NiftyNet-dev

    def kurtosis_(self):
        """
        calculates the kurtosis of the image over the binarised segmentation

        :return:
        """
        return mstats.kurtosis(self.masked_img, 0)

コード例 #5

0

ファイルを表示

ファイル: features.py プロジェクト: Keesiu/meta-kaggle

def extractFeatV2(image_file):
    region_feat = extractFeatV1(image_file)
    
    image = imread(image_file, as_grey=True)
    image = image.copy()

    # global features
    idx = np.nonzero(255-image)
    nonzero = image[idx]
    global_feat = [ 
        np.mean(nonzero),
        np.std(nonzero),
        kurtosis(nonzero),
        skew(nonzero),
        gini(nonzero,image_file),
    ]
    global_feat = np.asarray(global_feat, dtype='float32' )

    # concat all the features
    image2 = mh.imread(image_file, as_grey=True)
    haralick = mh.features.haralick(image2, ignore_zeros=False, preserve_haralick_bug=False, compute_14th_feature=False)
    lbp = mh.features.lbp(image2, radius=20, points=7, ignore_zeros=False)
    pftas = mh.features.pftas(image2)
    zernike_moments = mh.features.zernike_moments(image2, radius=20, degree=8)
    #surf_feat = surf.surf(image2)
    haralick = np.reshape(haralick,(np.prod(haralick.shape)))
    #surf_feat = np.reshape(surf_feat,(np.prod(surf_feat.shape)))
    
    #mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments, surf_feat))
    mh_feat = np.hstack((haralick, lbp, pftas, zernike_moments))

    feat = np.hstack((global_feat, region_feat, mh_feat))
    
    return feat

コード例 #6

0

ファイルを表示

def basicstats_calc(data):
    """ Calculate stats """
# Minimum, maximum, mean, std dev, median, median abs deviation
# no samples, no samples in x dir, no samples in y dir, band
    stats = []
    for i in data:
        srow = []
        dtmp = i.data.compressed()
        srow.append(dtmp.min())
        srow.append(dtmp.max())
        srow.append(dtmp.mean())
        srow.append(dtmp.std())
        srow.append(np.median(dtmp))
        srow.append(np.median(abs(dtmp - srow[-1])))
        srow.append(i.data.size)
        srow.append(i.data.shape[1])
        srow.append(i.data.shape[0])
        srow.append(st.skew(dtmp))
        srow.append(st.kurtosis(dtmp))
        srow = np.array(srow).tolist()
        stats.append([i.dataid] + srow)

    bands = ['Data Column']
    cols = ['Band', 'Minimum', 'Maximum', 'Mean', 'Std Dev', 'Median',
            'Median Abs Dev', 'No Samples', 'No cols (samples in x-dir)',
            'No rows (samples in y-dir)', 'Skewness', 'Kurtosis']
    dattmp = [np.array(stats, dtype=object)]
    return bands, cols, dattmp

コード例 #7

0

ファイルを表示

ファイル: show_table.py プロジェクト: Patrick-Cole/pygmi

def basicstats_calc(data):
    """ Calculate stats """
# Minimum, maximum, mean, std dev, median, median abs deviation
# no samples, no samples in x dir, no samples in y dir, band
    stats = []
    for i in data:
        srow = []
        dtmp = i.data.compressed()
        srow.append(dtmp.min())
        srow.append(dtmp.max())
        srow.append(dtmp.mean())
        srow.append(dtmp.std())
        srow.append(np.median(dtmp))
        srow.append(np.median(abs(dtmp - srow[-1])))
        srow.append(i.data.size)
        srow.append(i.data.shape[1])
        srow.append(i.data.shape[0])
        srow.append(st.skew(dtmp))
        srow.append(st.kurtosis(dtmp))
        srow = np.array(srow).tolist()
        stats.append([i.dataid] + srow)

    bands = ['Data Column']
    cols = ['Band', 'Minimum', 'Maximum', 'Mean', 'Std Dev', 'Median',
            'Median Abs Dev', 'No Samples', 'No cols (samples in x-dir)',
            'No rows (samples in y-dir)', 'Skewness', 'Kurtosis']
    dattmp = [np.array(stats, dtype=object)]
    return bands, cols, dattmp

コード例 #8

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: decarlin/stuartlab-scripts

 def test_kurtosis(self):
     """
         sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
         sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
         Set flags for axis = 0 and
         fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
     """
     y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
     assert_almost_equal(y, 2.1658856802973,10)
     # Note that MATLAB has confusing docs for the following case
     #  kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
     #  kurtosis(x)  gives a biased estimate of Fisher's skewness (Pearson-3)
     #  The MATLAB docs imply that both should give Fisher's
     y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0)
     assert_almost_equal(y, 3.663542721189047,10)
     y = mstats.kurtosis(self.testcase,0,0)
     assert_almost_equal(y,1.64)

コード例 #9

0

ファイルを表示

ファイル: test_mstats_basic.py プロジェクト: minrk/scipy

 def test_kurtosis(self):
     """
         sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
         sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
         Set flags for axis = 0 and
         fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
     """
     y = mstats.kurtosis(self.testmathworks, 0, fisher=0, bias=1)
     assert_almost_equal(y, 2.1658856802973, 10)
     # Note that MATLAB has confusing docs for the following case
     #  kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
     #  kurtosis(x)  gives a biased estimate of Fisher's skewness (Pearson-3)
     #  The MATLAB docs imply that both should give Fisher's
     y = mstats.kurtosis(self.testmathworks, fisher=0, bias=0)
     assert_almost_equal(y, 3.663542721189047, 10)
     y = mstats.kurtosis(self.testcase, 0, 0)
     assert_almost_equal(y, 1.64)

コード例 #10

0

ファイルを表示

ファイル: txtgrey.py プロジェクト: vladpopovici/WSItk

 def __init__(self, stats=None):
     self._statsfn = {
         'mean': lambda x_: x_.mean(),
         'std': lambda x_: x_.std(),
         'kurtosis': lambda x_: kurtosis(x_, axis=None, fisher=True),
         'skewness': lambda x_: skew(x_, axis=None, bias=True)
     }
     if stats is None:
         self.stats = ['mean', 'std']
     else:
         self.stats = [s.lower() for s in stats]
         for s in self.stats:
             if s not in self._statsfn:
                 raise ValueError('Unknown summary statistic')

コード例 #11

0

ファイルを表示

ファイル: txtgrey.py プロジェクト: gitter-badger/WSItk

 def __init__(self, stats=None):
     self._statsfn = {
         'mean': lambda x_: x_.mean(),
         'std': lambda x_: x_.std(),
         'kurtosis': lambda x_: kurtosis(x_, axis=None, fisher=True),
         'skewness': lambda x_: skew(x_, axis=None, bias=True)
     }
     if stats is None:
         self.stats = ['mean', 'std']
     else:
         self.stats = [s.lower() for s in stats]
         for s in self.stats:
             if s not in self._statsfn:
                 raise ValueError('Unknown summary statistic')

コード例 #12

0

ファイルを表示

def basicstats_calc(data):
    """
    Calculate statistics.

    Parameters
    ----------
    data : PyGMI Data.
        PyGMI raster dataset.

    Returns
    -------
    bands : list
        Band list, currently only 'Data Column'
    cols : list
        Columns for the table
    dattmp : list
        List of arrays containing statistics.

    """
    # Minimum, maximum, mean, std dev, median, median abs deviation
    # no samples, no samples in x dir, no samples in y dir, band
    stats = []
    for i in data:
        srow = []
        dtmp = i.data.compressed()
        srow.append(dtmp.min())
        srow.append(dtmp.max())
        srow.append(dtmp.mean())
        srow.append(dtmp.std())
        srow.append(np.median(dtmp))
        srow.append(np.median(abs(dtmp - srow[-1])))
        srow.append(i.data.size)
        srow.append(i.data.shape[1])
        srow.append(i.data.shape[0])
        srow.append(st.skew(dtmp))
        srow.append(st.kurtosis(dtmp))
        srow = np.array(srow).tolist()
        stats.append([i.dataid] + srow)

    bands = ['Data Column']
    cols = [
        'Band', 'Minimum', 'Maximum', 'Mean', 'Std Dev', 'Median',
        'Median Abs Dev', 'No Samples', 'No cols (samples in x-dir)',
        'No rows (samples in y-dir)', 'Skewness', 'Kurtosis'
    ]
    dattmp = [np.array(stats, dtype=object)]
    return bands, cols, dattmp

コード例 #13

0

ファイルを表示

    def test_kurtosis(self):
        #    sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4
        #    sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5
        #    Set flags for axis = 0 and
        #    fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab)
        y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1)
        assert_almost_equal(y, 2.1658856802973,10)
        # Note that MATLAB has confusing docs for the following case
        #  kurtosis(x,0) gives an unbiased estimate of Pearson's skewness
        #  kurtosis(x)  gives a biased estimate of Fisher's skewness (Pearson-3)
        #  The MATLAB docs imply that both should give Fisher's
        y = mstats.kurtosis(self.testmathworks,fisher=0, bias=0)
        assert_almost_equal(y, 3.663542721189047,10)
        y = mstats.kurtosis(self.testcase,0,0)
        assert_almost_equal(y,1.64)

        # test that kurtosis works on multidimensional masked arrays
        correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385,  0.,
                                        -1.26979517952]),
                              mask=np.array([False, False, False,  True,
                                             False], dtype=np.bool))
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1),
                                  correct_2d)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row), correct_2d[i])

        correct_2d_bias_corrected = ma.array(
            np.array([-1.5, -3., -1.88988209538,  0., -0.5234638463918877]),
            mask=np.array([False, False, False,  True, False], dtype=np.bool))
        assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1,
                                                  bias=False),
                                  correct_2d_bias_corrected)
        for i, row in enumerate(self.testcase_2d):
            assert_almost_equal(mstats.kurtosis(row, bias=False),
                                correct_2d_bias_corrected[i])

        # Check consistency between stats and mstats implementations
        assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]),
                                       stats.kurtosis(self.testcase_2d[2, :]))

コード例 #14

0

ファイルを表示

ファイル: region_properties.py プロジェクト: yf817/NiftyNet

 def kurtosis_(self):
     return mstats.kurtosis(self.masked_img, 0)

コード例 #15

0

ファイルを表示

ファイル: galaxy.py プロジェクト: ruchirgarg05/kaggle-galaxy

def get_features(f,
                 dsift_learn=False,
                 debug=False,
                 tiny_img=False,
                 tiny_grad=False,
                 image_statistics=False,
                 color_histogram=False,
                 orientation_histogram=False,
                 transform=0):
    img_color = cv2.imread(f).astype('float')
    if transform == 1:
        img_color = img_color[::-1, :].copy()
    if transform == 2:
        img_color = img_color[:, ::-1].copy()

    original_img = img_color.copy()
    # denoise
    img_color = cv2.GaussianBlur(img_color, (0, 0), 2.0)

    img = cv2.cvtColor(img_color.astype('uint8'),
                       cv2.COLOR_BGR2GRAY).astype('float')

    # TEDYouth 2013 - Filmed November 2013 - 6:43
    # Henry Lin: What we can learn from galaxies far, far away
    # "I subtract away all of the starlight"
    #t = img[np.nonzero(img)].mean()
    #t = img.mean()
    #t = np.max(img_color[np.nonzero(img)].mean(axis=0))
    t = np.max(np.median(img_color[np.nonzero(img)], axis=0))

    img_color[img_color < t] = t
    img_color = rescale(img_color).astype('uint8')

    if debug:
        cv2.imwrite("start.png", img_color)

    img = cv2.cvtColor(img_color.astype('uint8'), cv2.COLOR_BGR2GRAY)

    saturated = saturate(img, q1=0.75)
    labels, nb_maxima = nd.label(saturated == saturated.max(), output='int64')

    if debug:
        cv2.imwrite("adaptive_labels.png", random_colors(labels))

    thresholded = largest_connected_component(img, labels, nb_maxima)
    center = nd.center_of_mass(img, thresholded)

    # features from original image
    original_size = img_color.shape[0] * img_color.shape[1]
    original_shape = img_color.shape[:2]

    idx = np.nonzero(original_img.mean(axis=2))
    nonzero = original_img[idx]
    mean = nonzero.mean(axis=0)
    std = nonzero.std(axis=0)
    mean_center = original_img[thresholded > 0].mean(axis=0)
    features = np.array([
        mean_center[0], mean_center[1], mean_center[2], mean[0], mean[1],
        mean[2], std[0], std[1], std[2],
        kurtosis(nonzero[:, 0]),
        kurtosis(nonzero[:, 1]),
        kurtosis(nonzero[:, 2]),
        skew(nonzero[:, 0]),
        skew(nonzero[:, 1]),
        skew(nonzero[:, 2]),
        gini(nonzero[:, 0], f),
        gini(nonzero[:, 1], f),
        gini(nonzero[:, 1], f)
    ],
                        dtype='float')
    # features = np.empty( 0, dtype='float' )

    img_color = recenter(img_color, (center[1], center[0]),
                         interpolation=cv2.INTER_LINEAR)

    img = cv2.cvtColor(img_color.astype('uint8'), cv2.COLOR_BGR2GRAY)

    # offset from center
    center_offset = np.linalg.norm(
        np.array([center[0], center[1]], dtype='float') -
        np.array(original_shape, dtype='float') / 2)

    # adaptive thresholding
    thresholded = cv2.adaptiveThreshold(
        img,
        maxValue=255,
        adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        thresholdType=cv2.THRESH_BINARY,
        blockSize=301,
        C=0)

    # select largest connected component
    # which is closest to the center
    # use a weighting size/distance**2
    labels, nb_labels = nd.label(thresholded, output='int64')

    if debug:
        cv2.imwrite("debug.png", random_colors(labels))

    thresholded = largest_connected_component(img, labels, nb_labels)

    if debug:
        cv2.imwrite("debug_tresholded.png", thresholded)

    # ellipse fitting
    # cv2.fitEllipse returns a tuple.
    # Tuples are immutable, they can't have their values changed.
    # we have the choice between the least-square minimization implemented in
    # OpenCV or a plain PCA.
    XY = np.transpose(np.nonzero(thresholded))[:, ::-1]
    #((cx,cy),(w,h),angle) = cv2.fitEllipse(XY)
    # eccentricity = math.sqrt(1-(np.min((w,h))/np.max((w,h)))**2)
    ((cx, cy), (w, h), angle) = fit_ellipse(XY, f=f)
    #eccentricity = math.sqrt(1-(h/w)**2)
    eccentricity = h / w

    if w == 0 or h == 0:
        print "bad ellipse:", ((cx, cy), (w, h), angle), f
        exit(1)

    if debug:
        print((cx, cy), (w, h), angle)

    # rotate image
    img_color = rotate(img_color, (cx, cy),
                       angle,
                       interpolation=cv2.INTER_LINEAR)

    thresholded = rotate(thresholded, (cx, cy),
                         angle,
                         interpolation=cv2.INTER_NEAREST)

    cx = float(img.shape[1]) / 2
    cy = float(img.shape[0]) / 2

    if debug:
        cv2.imwrite("rotated.png", img_color)

    # crop
    img_color = img_color[
        max(0, int(cy - h / 2)):min(img.shape[0], int(cy + h / 2 + 1)),
        max(0, int(cx - w / 2)):min(img.shape[1], int(cx + w / 2 + 1))]
    thresholded = thresholded[
        max(0, int(cy - h / 2)):min(img.shape[0], int(cy + h / 2 + 1)),
        max(0, int(cx - w / 2)):min(img.shape[1], int(cx + w / 2 + 1))]

    if debug:
        cv2.imwrite("cropped_thresholded.png", thresholded)

    color_hist = get_color_histogram(img_color)
    if color_histogram:
        return color_hist

    if orientation_histogram:
        return get_orientation_histogram(img_color)

    img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY).astype('float')
    img = rescale(img)

    saturated = saturate(img, q1=0.95)
    labels, nb_maxima = nd.label(saturated == saturated.max(), output='int64')

    if debug:
        cv2.imwrite("labels.png", random_colors(labels))

    if img_color.shape[0] == 0 or img_color.shape[1] == 0:
        print "bad size", img_color.shape, f
        exit(1)

    img_thumbnail = cv2.resize(img.astype('uint8'), (64, 64),
                               interpolation=cv2.INTER_AREA)

    if tiny_img:
        return img_thumbnail.flatten()

    if debug:
        cv2.imwrite("tiny_img.png", img_thumbnail)

    grad_color = nd.gaussian_gradient_magnitude(img_color, 1.0)
    grad_img = rescale(img_color[:, :, 0] + img_color[:, :, 2])

    if debug:
        cv2.imwrite("channel.png", grad_img)

    grad_thumbnail = cv2.resize(grad_img, (64, 64),
                                interpolation=cv2.INTER_AREA)

    if debug:
        cv2.imwrite("tiny_grad.png", grad_thumbnail)

    if tiny_grad == True:
        # return np.append( [eccentricity*100],
        #                   grad_thumbnail.flatten() )
        return grad_thumbnail.flatten()

    if debug:
        cv2.imwrite("cropped.png", img_color)

    # chirality
    # http://en.wikipedia.org/wiki/Chirality
    # An object is chiral if it is not identical to its mirror image.
    #mirror_spiral_img = labels[:,::-1]
    mirror_grad_img = grad_img[:, ::-1]
    chirality = np.sum(np.sqrt(
        (grad_img - mirror_grad_img)**2)) / (grad_img.sum())

    # compare size of the thresholded area to the size of the fitted ellipse
    # and to the size of the whole image
    size_to_ellipse = float(thresholded.sum()) / (math.pi * w * h / 4)
    box_to_image = float(img.shape[0] * img.shape[1]) / original_size

    if size_to_ellipse < 0.1:
        print "SIZE_TO_ELLIPSE debug", f

    if box_to_image > 0.5:
        print "BOX_TO_IMAGE debug", f

    # color features
    # central pixel and mean channel values
    idx = np.nonzero(thresholded)
    mean = img_color[idx].mean(axis=0)
    grey_mean = img[idx].mean()
    img_center = img[img.shape[0] / 2 - img.shape[0] / 4:img.shape[0] / 2 +
                     img.shape[0] / 4, img.shape[1] / 2 -
                     img.shape[1] / 4:img.shape[1] / 2 + img.shape[1] / 4]
    img_center_color = img_color[img.shape[0] / 2 -
                                 img.shape[0] / 4:img.shape[0] / 2 +
                                 img.shape[0] / 4, img.shape[1] / 2 -
                                 img.shape[1] / 4:img.shape[1] / 2 +
                                 img.shape[1] / 4]
    center_mean = img_center[np.nonzero(img_center)].mean()
    center_mean_color = img_center_color[np.nonzero(img_center)].mean(axis=0)
    color_features = [
        img_color[img_color.shape[0] / 2, img_color.shape[1] / 2, 0],
        img_color[img_color.shape[0] / 2, img_color.shape[1] / 2,
                  1], img_color[img_color.shape[0] / 2, img_color.shape[1] / 2,
                                2], mean[0], mean[1], mean[2],
        center_mean_color[0], center_mean_color[1], center_mean_color[2],
        float(img[img.shape[0] / 2, img.shape[1] / 2]) / grey_mean,
        float(center_mean) / grey_mean
    ]

    entropy = get_entropy(img.astype('uint8'))

    light_radius = get_light_radius(img)
    features = np.append(features, [
        eccentricity, w, h,
        thresholded.sum(), entropy, chirality, size_to_ellipse, box_to_image,
        center_offset, light_radius[0], light_radius[1], nb_maxima,
        kurtosis(img_color[idx][:, 0]),
        kurtosis(img_color[idx][:, 1]),
        kurtosis(img_color[idx][:, 2]),
        skew(img_color[idx][:, 0]),
        skew(img_color[idx][:, 1]),
        skew(img_color[idx][:, 2]),
        gini(img_color[idx][:, 0], f),
        gini(img_color[idx][:, 1], f),
        gini(img_color[idx][:, 2], f),
        kurtosis(img[idx]),
        skew(img[idx]),
        gini(img[idx], f)
    ])
    features = np.append(features, color_features)

    # Hu moments from segmentation
    m = cv2.moments(thresholded.astype('uint8'), binaryImage=True)
    hu1 = cv2.HuMoments(m)

    # Hu moments from taking pixel intensities into account
    m = cv2.moments(img, binaryImage=False)
    hu2 = cv2.HuMoments(m)

    m = cv2.moments(grad_img, binaryImage=False)
    hu3 = cv2.HuMoments(m)

    hu = np.append(hu1.flatten(), hu2.flatten())
    hu = np.append(hu.flatten(), hu3.flatten())
    features = np.append(features, hu.flatten())

    features = np.append(features, hu)

    if image_statistics:
        return features

    # features = np.empty( 0, dtype='float' )

    average_prediction = np.zeros(37, dtype='float')

    # PCA features
    if not debug:
        image_statistics = features
        for Class in xrange(1, 12):
            scaler = joblib.load(get_data_folder() +
                                 "/scaler_statistics_Class" + str(Class) + "_")
            clf = joblib.load(get_data_folder() + "/svm_statistics_Class" +
                              str(Class) + "_")
            features = np.append(
                features,
                clf.predict_proba(scaler.transform(image_statistics)))

        average_prediction += features[-37:]

        grad_thumbnail = grad_thumbnail.flatten()
        for Class in xrange(1, 12):
            pca = joblib.load(get_data_folder() + "/pca_Class" + str(Class) +
                              "_")
            thumbnail_pca = pca.transform(grad_thumbnail)

            clf = joblib.load(get_data_folder() + "/pca_SVM_Class" +
                              str(Class) + "_")
            features = np.append(features,
                                 clf.predict_proba(thumbnail_pca).flatten())

        average_prediction += features[-37:]

        img_thumbnail = img_thumbnail.flatten()
        for Class in xrange(1, 12):
            pca = joblib.load(get_data_folder() + "/pca_img_Class" +
                              str(Class) + "_")
            thumbnail_pca = pca.transform(img_thumbnail)

            clf = joblib.load(get_data_folder() + "/pca_img_SVM_Class" +
                              str(Class) + "_")
            features = np.append(features,
                                 clf.predict_proba(thumbnail_pca).flatten())

        average_prediction += features[-37:]

        for Class in xrange(1, 12):
            pca = joblib.load(get_data_folder() + "/pca_color_Class" +
                              str(Class) + "_")
            hist_pca = pca.transform(color_hist)

            clf = joblib.load(get_data_folder() + "/pca_color_SVM_Class" +
                              str(Class) + "_")
            features = np.append(features,
                                 clf.predict_proba(hist_pca).flatten())

        average_prediction += features[-37:]
        average_prediction /= 4
        features = np.append(features, average_prediction)

    return features

コード例 #16

0

ファイルを表示

ファイル: galaxy.py プロジェクト: kevin-keraudren/kaggle-galaxy

def get_features( f,
                  dsift_learn=False,
                  debug=False,
                  tiny_img=False,
                  tiny_grad=False,
                  image_statistics=False,
                  color_histogram=False,
                  orientation_histogram=False,
                  transform=0 ):
    img_color = cv2.imread( f ).astype('float')
    if transform == 1:
        img_color = img_color[::-1,:].copy()
    if transform == 2:
        img_color = img_color[:,::-1].copy()

    original_img = img_color.copy()
    # denoise
    img_color = cv2.GaussianBlur(img_color,(0,0),2.0)
    
    img = cv2.cvtColor( img_color.astype('uint8'),
                        cv2.COLOR_BGR2GRAY ).astype('float')
    
    # TEDYouth 2013 - Filmed November 2013 - 6:43
    # Henry Lin: What we can learn from galaxies far, far away
    # "I subtract away all of the starlight"
    #t = img[np.nonzero(img)].mean()
    #t = img.mean()
    #t = np.max(img_color[np.nonzero(img)].mean(axis=0))
    t = np.max(np.median(img_color[np.nonzero(img)],axis=0))
    
    img_color[img_color<t] = t
    img_color = rescale(img_color).astype('uint8')
    
    if debug:
        cv2.imwrite("start.png",img_color)
        
    img = cv2.cvtColor( img_color.astype('uint8'),
                        cv2.COLOR_BGR2GRAY )

    saturated = saturate(img,q1=0.75)
    labels,nb_maxima = nd.label(saturated==saturated.max(), output='int64')

    if debug:
        cv2.imwrite("adaptive_labels.png",random_colors(labels))
    
    thresholded = largest_connected_component( img, labels, nb_maxima )
    center = nd.center_of_mass( img, thresholded )

    # features from original image
    original_size = img_color.shape[0]*img_color.shape[1]
    original_shape = img_color.shape[:2]

    idx = np.nonzero(original_img.mean(axis=2))
    nonzero = original_img[idx]
    mean = nonzero.mean(axis=0)
    std = nonzero.std(axis=0)
    mean_center = original_img[thresholded>0].mean(axis=0)
    features = np.array( [ mean_center[0],mean_center[1],mean_center[2],
                           mean[0],mean[1],mean[2],
                           std[0],std[1],std[2],
                           kurtosis(nonzero[:,0]),
                           kurtosis(nonzero[:,1]),
                           kurtosis(nonzero[:,2]),
                           skew(nonzero[:,0]),
                           skew(nonzero[:,1]),
                           skew(nonzero[:,2]),
                           gini(nonzero[:,0],f),
                           gini(nonzero[:,1],f),
                           gini(nonzero[:,1],f)
                           ], dtype='float' )
    # features = np.empty( 0, dtype='float' )
    
    img_color = recenter( img_color, (center[1],center[0]), interpolation=cv2.INTER_LINEAR )

    img = cv2.cvtColor( img_color.astype('uint8'),
                        cv2.COLOR_BGR2GRAY )

    # offset from center
    center_offset = np.linalg.norm(np.array([center[0],center[1]],dtype='float')
                                   - np.array(original_shape,dtype='float')/2)
            
    
    # adaptive thresholding
    thresholded = cv2.adaptiveThreshold( img,
                                         maxValue=255,
                                         adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                         thresholdType=cv2.THRESH_BINARY,
                                         blockSize=301,
                                         C=0 )

    # select largest connected component
    # which is closest to the center
    # use a weighting size/distance**2
    labels, nb_labels = nd.label( thresholded, output='int64' )

    if debug:
        cv2.imwrite("debug.png",random_colors(labels))
        
    thresholded = largest_connected_component( img, labels, nb_labels )
        
    if debug:
        cv2.imwrite( "debug_tresholded.png", thresholded )
    
    # ellipse fitting
    # cv2.fitEllipse returns a tuple.
    # Tuples are immutable, they can't have their values changed.
    # we have the choice between the least-square minimization implemented in
    # OpenCV or a plain PCA.
    XY = np.transpose( np.nonzero(thresholded) )[:,::-1]
    #((cx,cy),(w,h),angle) = cv2.fitEllipse(XY)
    # eccentricity = math.sqrt(1-(np.min((w,h))/np.max((w,h)))**2)   
    ((cx,cy),(w,h),angle) = fit_ellipse(XY,f=f)
    #eccentricity = math.sqrt(1-(h/w)**2)
    eccentricity = h/w
    
    if w == 0 or h == 0:
        print "bad ellipse:", ((cx,cy),(w,h),angle), f
        exit(1)

    if debug:
        print ((cx,cy),(w,h),angle)

    # rotate image
    img_color = rotate( img_color, (cx,cy), angle, interpolation=cv2.INTER_LINEAR )

    thresholded = rotate( thresholded, (cx,cy), angle, interpolation=cv2.INTER_NEAREST )

    cx = float(img.shape[1])/2
    cy = float(img.shape[0])/2        
    
    if debug:
        cv2.imwrite( "rotated.png", img_color )

    # crop
    img_color = img_color[max(0,int(cy-h/2)):min(img.shape[0],int(cy+h/2+1)),
                          max(0,int(cx-w/2)):min(img.shape[1],int(cx+w/2+1))]
    thresholded = thresholded[max(0,int(cy-h/2)):min(img.shape[0],int(cy+h/2+1)),
                              max(0,int(cx-w/2)):min(img.shape[1],int(cx+w/2+1))]

    if debug:
        cv2.imwrite("cropped_thresholded.png",thresholded)

    color_hist = get_color_histogram(img_color)
    if color_histogram:
        return color_hist

    if orientation_histogram:
        return get_orientation_histogram(img_color)    

    img = cv2.cvtColor( img_color, cv2.COLOR_BGR2GRAY ).astype('float')
    img = rescale(img)

    saturated = saturate(img,q1=0.95)
    labels,nb_maxima = nd.label(saturated==saturated.max(), output='int64')

    if debug:
        cv2.imwrite("labels.png",random_colors(labels))
    
    if img_color.shape[0] == 0 or img_color.shape[1] == 0:
        print "bad size", img_color.shape, f
        exit(1)

    img_thumbnail = cv2.resize( img.astype('uint8'), (64,64),
                                interpolation=cv2.INTER_AREA )

    if tiny_img:
            return img_thumbnail.flatten()

    if debug:
        cv2.imwrite( "tiny_img.png", img_thumbnail )     

    grad_color = nd.gaussian_gradient_magnitude(img_color,1.0)
    grad_img = rescale(img_color[:,:,0]+img_color[:,:,2])

    if debug:
        cv2.imwrite( "channel.png", grad_img )        

    grad_thumbnail = cv2.resize( grad_img, (64,64),
                                 interpolation=cv2.INTER_AREA )

    if debug:
        cv2.imwrite( "tiny_grad.png", grad_thumbnail )
            
    if tiny_grad == True:
        # return np.append( [eccentricity*100],
        #                   grad_thumbnail.flatten() )
        return grad_thumbnail.flatten()

    if debug:
        cv2.imwrite( "cropped.png", img_color )

    # chirality
    # http://en.wikipedia.org/wiki/Chirality
    # An object is chiral if it is not identical to its mirror image.
    #mirror_spiral_img = labels[:,::-1]
    mirror_grad_img = grad_img[:,::-1]
    chirality = np.sum(np.sqrt( (grad_img - mirror_grad_img)**2 ))/ (grad_img.sum()) 

    # compare size of the thresholded area to the size of the fitted ellipse
    # and to the size of the whole image
    size_to_ellipse = float(thresholded.sum()) / (math.pi * w * h / 4)
    box_to_image = float(img.shape[0]*img.shape[1]) / original_size

    if size_to_ellipse < 0.1:
        print "SIZE_TO_ELLIPSE debug", f
        
    if box_to_image > 0.5:
        print "BOX_TO_IMAGE debug", f
        
    # color features
    # central pixel and mean channel values
    idx = np.nonzero(thresholded)
    mean = img_color[idx].mean(axis=0)
    grey_mean = img[idx].mean()
    img_center = img[img.shape[0]/2-img.shape[0]/4:img.shape[0]/2+img.shape[0]/4,
                     img.shape[1]/2-img.shape[1]/4:img.shape[1]/2+img.shape[1]/4]
    img_center_color = img_color[img.shape[0]/2-img.shape[0]/4:img.shape[0]/2+img.shape[0]/4,
                                 img.shape[1]/2-img.shape[1]/4:img.shape[1]/2+img.shape[1]/4]
    center_mean = img_center[np.nonzero(img_center)].mean()
    center_mean_color = img_center_color[np.nonzero(img_center)].mean(axis=0)
    color_features = [
        img_color[img_color.shape[0]/2,
                                 img_color.shape[1]/2,0],
                       img_color[img_color.shape[0]/2,
                                 img_color.shape[1]/2,1],
                       img_color[img_color.shape[0]/2,
                                 img_color.shape[1]/2,2],
                      mean[0],mean[1],mean[2],
                       center_mean_color[0],center_mean_color[1],center_mean_color[2],
                       float(img[img.shape[0]/2,
                                 img.shape[1]/2])/grey_mean,
                       float(center_mean)/grey_mean]

    entropy = get_entropy(img.astype('uint8'))

    light_radius = get_light_radius(img)
    features = np.append( features, [ eccentricity,
                                      w,h,
                                      thresholded.sum(),
                                     entropy,
                                     chirality,
                                     size_to_ellipse,
                                     box_to_image,
                                     center_offset,
                                     light_radius[0],
                                      light_radius[1],
                                      nb_maxima,
                                      kurtosis(img_color[idx][:,0]),
                                      kurtosis(img_color[idx][:,1]),
                                      kurtosis(img_color[idx][:,2]),
                                      skew(img_color[idx][:,0]),
                                      skew(img_color[idx][:,1]),
                                      skew(img_color[idx][:,2]),
                                      gini(img_color[idx][:,0],f),
                                      gini(img_color[idx][:,1],f),
                                      gini(img_color[idx][:,2],f),
                                      kurtosis(img[idx]),
                                      skew(img[idx]),
                                      gini(img[idx],f)
                                      ] )    
    features = np.append( features, color_features )
                           
    # Hu moments from segmentation
    m = cv2.moments( thresholded.astype('uint8' ), binaryImage=True )
    hu1 = cv2.HuMoments( m )

    # Hu moments from taking pixel intensities into account
    m = cv2.moments( img, binaryImage=False )
    hu2 = cv2.HuMoments( m )
    
    m = cv2.moments( grad_img, binaryImage=False )
    hu3 = cv2.HuMoments( m )    

    hu = np.append( hu1.flatten(), hu2.flatten() )
    hu = np.append( hu.flatten(), hu3.flatten() )
    features = np.append( features, hu.flatten() )

    features = np.append( features, hu )

    if image_statistics:
        return features

    # features = np.empty( 0, dtype='float' )

    average_prediction = np.zeros( 37, dtype='float' )
    
    # PCA features
    if not debug:
        image_statistics = features
        for Class in xrange(1,12):
            scaler = joblib.load(get_data_folder()+"/scaler_statistics_Class"+ str(Class)+"_")
            clf = joblib.load(get_data_folder()+"/svm_statistics_Class"+ str(Class)+"_")
            features = np.append( features, clf.predict_proba(scaler.transform(image_statistics)))

        average_prediction += features[-37:]
        
        grad_thumbnail = grad_thumbnail.flatten()
        for Class in xrange(1,12):
            pca = joblib.load(get_data_folder()+"/pca_Class"+ str(Class)+"_")
            thumbnail_pca = pca.transform(grad_thumbnail)

            clf = joblib.load(get_data_folder()+"/pca_SVM_Class" + str(Class)+"_")
            features = np.append( features,
                                  clf.predict_proba(thumbnail_pca).flatten() )

        average_prediction += features[-37:]
            
        img_thumbnail = img_thumbnail.flatten()
        for Class in xrange(1,12):
            pca = joblib.load(get_data_folder()+"/pca_img_Class"+ str(Class)+"_")
            thumbnail_pca = pca.transform(img_thumbnail)

            clf = joblib.load(get_data_folder()+"/pca_img_SVM_Class" + str(Class)+"_")
            features = np.append( features,
                                  clf.predict_proba(thumbnail_pca).flatten() )

        average_prediction += features[-37:]
        
        for Class in xrange(1,12):
            pca = joblib.load(get_data_folder()+"/pca_color_Class"+ str(Class)+"_")
            hist_pca = pca.transform(color_hist)

            clf = joblib.load(get_data_folder()+"/pca_color_SVM_Class" + str(Class)+"_")
            features = np.append( features,
                                  clf.predict_proba(hist_pca).flatten() )            

        average_prediction += features[-37:]
        average_prediction /= 4
        features = np.append( features, average_prediction )
        
    return features