def pointwise_mutual_information_score(X, y):

    #TODO Should binarize things
    #Filter negative mutual information

    X = check_array(X, accept_sparse='csr')
    if np.any((X.data if issparse(X) else X) < 0):
        raise ValueError("Input X must be non-negative.")

    if issparse(X):
        X_bin = sp.csr_matrix(X)
        X_bin.data = np.ones(X_bin.data.shape)

    else:
        raise ValueError('Matrix should be sparse')

    Y = LabelBinarizer().fit_transform(y)
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)

    observed = safe_sparse_dot(Y.T, X_bin,
                               dense_output=True)  # n_classes * n_features

    feature_count = X_bin.sum(axis=0).reshape(1, -1)

    n_feat = X_bin.shape[1]
    n_classes = Y.shape[1]  #Should be 2 for binary classes

    class_count = Y.sum(axis=0).reshape(1, -1)

    F = np.tile(feature_count, (n_classes, 1))
    C = np.tile(class_count, (n_feat, 1))

    #embed()

    print(feature_count.shape)
    print(class_count.shape)

    eps = 1e-8

    #Do real Mutual Information
    PMI = np.log(eps + observed) - np.log(C.T) - np.log(F)
    PMI = np.maximum(PMI, 0)

    return np.asarray(PMI.sum(axis=0)).squeeze()
Exemple #2
0
# initialize the image preprocessors
aap = AspectAwarePreprocessor(224, 224, gray=True)
iap = ImageToArrayPreprocessor()

# load the dataset from disk then scale the raw pixel intensities
# to the range [0, 1]
sdl = SimpleDatasetLoader(preprocessor=[iap], gray=True)
data, labels = sdl.load(imagePaths, verbose=500)
data = data / 255

# # convert the labels from integers to vectors
labels = LabelBinarizer().fit_transform(labels)

# account for skew in the labeled data
classTotals = labels.sum(axis=0)
classWeight = classTotals.max() / classTotals

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
trainX, testX, trainY, testY = train_test_split(data,
                                                labels,
                                                test_size=0.25,
                                                stratify=labels,
                                                random_state=42)

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.2,