def pointwise_mutual_information_score(X, y): #TODO Should binarize things #Filter negative mutual information X = check_array(X, accept_sparse='csr') if np.any((X.data if issparse(X) else X) < 0): raise ValueError("Input X must be non-negative.") if issparse(X): X_bin = sp.csr_matrix(X) X_bin.data = np.ones(X_bin.data.shape) else: raise ValueError('Matrix should be sparse') Y = LabelBinarizer().fit_transform(y) if Y.shape[1] == 1: Y = np.append(1 - Y, Y, axis=1) observed = safe_sparse_dot(Y.T, X_bin, dense_output=True) # n_classes * n_features feature_count = X_bin.sum(axis=0).reshape(1, -1) n_feat = X_bin.shape[1] n_classes = Y.shape[1] #Should be 2 for binary classes class_count = Y.sum(axis=0).reshape(1, -1) F = np.tile(feature_count, (n_classes, 1)) C = np.tile(class_count, (n_feat, 1)) #embed() print(feature_count.shape) print(class_count.shape) eps = 1e-8 #Do real Mutual Information PMI = np.log(eps + observed) - np.log(C.T) - np.log(F) PMI = np.maximum(PMI, 0) return np.asarray(PMI.sum(axis=0)).squeeze()
# initialize the image preprocessors aap = AspectAwarePreprocessor(224, 224, gray=True) iap = ImageToArrayPreprocessor() # load the dataset from disk then scale the raw pixel intensities # to the range [0, 1] sdl = SimpleDatasetLoader(preprocessor=[iap], gray=True) data, labels = sdl.load(imagePaths, verbose=500) data = data / 255 # # convert the labels from integers to vectors labels = LabelBinarizer().fit_transform(labels) # account for skew in the labeled data classTotals = labels.sum(axis=0) classWeight = classTotals.max() / classTotals # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.25, stratify=labels, random_state=42) # construct the image generator for data augmentation aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2,