def load_mutation_status(genetic_file, mutation_type): """Loads the mutation data from a genetic file Args: genetic_file (string): The path to the genetic file mutation_type (list): List of the genetic mutations to load Returns: dict: A dict containing 'patient_IDs', 'mutation_label' and 'mutation_type' """ _, extension = os.path.splitext(genetic_file) if extension == '.txt': mutation_names, patient_IDs, mutation_status = load_genetic_file( genetic_file) elif extension == '.ini': mutation_names, patient_IDs, mutation_status = load_genetic_XNAT( genetic_file) else: raise ae.PREDICTIOError(extension + ' is not valid genetic file extension.') print("Label names to extract: " + str(mutation_type)) mutation_label = list() for i_mutation in mutation_type: mutation_index = np.where(mutation_names == i_mutation)[0] if mutation_index.size == 0: raise ae.PREDICTValueError('Could not find mutation: ' + i_mutation) else: mutation_label.append(mutation_status[:, mutation_index]) mutation_data = dict() mutation_data['patient_IDs'] = patient_IDs mutation_data['mutation_label'] = mutation_label mutation_data['mutation_name'] = mutation_type return mutation_data
def get_semantic_features(data, patientID): patient_ID = data['Patient'] semantics_names = data.keys() # Get index of current patient index = None for i, s in enumerate(patient_ID): if s in patientID: index = i if index is None: raise ae.PREDICTValueError("No semantic features found for " + patientID) # Extract all labels semantics_labels = list() semantics_features = list() for name in semantics_names: if name != 'Patient': semantics_labels.append('semf_' + name) semantics_features.append(data[name][index]) return semantics_features, semantics_labels
def gabor_filter_parallel(image, mask, parameters=dict(), n_jobs=1, backend='threading'): """ Apply gabor filters to image, done in parallel. Note: on a cluster, where parallelisation of the gabor filters is not possible, use backend="threading" """ if "gabor_frequencies" in parameters.keys(): gabor_frequencies = parameters["gabor_frequencies"] else: gabor_frequencies = [0.05, 0.2, 0.5] if "gabor_angles" in parameters.keys(): gabor_angles = parameters["gabor_angles"] else: gabor_angles = [0, 45, 90, 135] # Create kernel from frequencies and angles kernels = list(itertools.product(gabor_frequencies, gabor_angles)) N_slices = image.shape[2] N_kernels = len(kernels) gabor_features = np.zeros([N_kernels, 2, N_slices]) full_filtered = list() for i_slice in range(0, N_slices): filtered = Parallel(n_jobs=n_jobs, backend=backend)( delayed(gabor_filter)(image=image[:, :, i_slice], mask=mask[:, :, i_slice], kernel=kernel) for kernel in kernels) # filtered_image.append(filtered) for i_index, i_kernel in enumerate(kernels): # features[i_index, 0, i_slice] = filtered[i_index].mean() # features[i_index, 1, i_slice] = filtered[i_index].var() if i_slice == 0: full_filtered.append(filtered[i_index]) else: full_filtered[i_index] = np.append(full_filtered[i_index], filtered[i_index]) mean_gabor = list() std_gabor = list() min_gabor = list() max_gabor = list() skew_gabor = list() kurt_gabor = list() for i_index, i_kernel in enumerate(kernels): mean_gabor.append(np.mean(full_filtered[i_index])) std_gabor.append(np.std(full_filtered[i_index])) min_gabor.append(np.percentile(full_filtered[i_index], 2)) max_gabor.append(np.percentile(full_filtered[i_index], 98)) skew_gabor.append(scipy.stats.skew(full_filtered[i_index])) kurt_gabor.append(scipy.stats.kurtosis(full_filtered[i_index])) # features = np.mean(features, 2).flatten() # features = features.tolist() gabor_features = mean_gabor + std_gabor + min_gabor + max_gabor + skew_gabor + kurt_gabor # Create labels gabor_labels = list() for i_kernel in kernels: # Round two to decimals to reduce name i_kernel = [i_kernel[0], round(i_kernel[1], 2)] label_mean = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'mean' label_std = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'std' label_min = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'min' label_max = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'max' label_skew = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'skew' label_kurt = 'tf_Gabor_' + str(i_kernel[0]) + 'A' + str( i_kernel[1]) + 'kurt' gabor_labels.append(label_mean) gabor_labels.append(label_std) gabor_labels.append(label_min) gabor_labels.append(label_max) gabor_labels.append(label_skew) gabor_labels.append(label_kurt) if len(gabor_features) != len(gabor_labels): raise ae.PREDICTValueError('Label length does not fit feature length') return gabor_features, gabor_labels
def get_GLCM_features(image, mask, parameters=dict()): ''' Compute Gray Level Co-occurence Matrix (GLCM) features. The image is first discretized to a set number of greyscale values. The GLCM will be computed at multiple distances and angles. The pixels outside the mask will always be set to zero. As the GLCM is defined in 2D, the GLCM for a 3D image will be computed by computing statistics over all the GLCM for all 2D axial slices, such as the mean and std. The output are two lists: the feature values and the labels. ''' if "levels" in parameters.keys(): levels = parameters["levels"] else: levels = 16 if "angles" in parameters.keys(): angles = parameters["angles"] else: angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] if "distances" in parameters.keys(): distances = parameters["distances"] else: distances = [1, 3] N_slices = image.shape[2] contrast = list() dissimilarity = list() homogeneity = list() ASM = list() energy = list() correlation = list() GLCM_matrix = np.zeros([levels, levels, len(distances), len(angles)]) for i_slice in range(0, N_slices): image_bounded, mask_bounded = bbox_2D(image[:, :, i_slice], mask[:, :, i_slice]) image_bounded[~mask_bounded] = 0 image_bounded = image_bounded + image_bounded.min() image_bounded = image_bounded * 255.0 / image_bounded.max() image_bounded = image_bounded.astype(np.uint8) image_bounded = rescale_intensity(image_bounded, out_range=(0, 15)) GLCM_matrix += greycomatrix(image_bounded, distances, angles, levels=levels, normed=True) contrast = greycoprops(GLCM_matrix, 'contrast').flatten() dissimilarity = greycoprops(GLCM_matrix, 'dissimilarity').flatten() homogeneity = greycoprops(GLCM_matrix, 'homogeneity').flatten() ASM = greycoprops(GLCM_matrix, 'ASM').flatten() energy = greycoprops(GLCM_matrix, 'energy').flatten() correlation = greycoprops(GLCM_matrix, 'correlation').flatten() GLCM_features = contrast.tolist() +\ dissimilarity.tolist() +\ homogeneity.tolist() +\ ASM.tolist() + energy.tolist() +\ correlation.tolist() feature_names = [ 'tf_GLCM_contrast', 'tf_GLCM_dissimilarity', 'tf_GLCM_homogeneity', 'tf_GLCM_ASM', 'tf_GLCM_energy', 'tf_GLCM_correlation' ] GLCM_labels = list() for i_name, i_dist, i_angle in itertools.product(feature_names, distances, angles): # Round to reduce name length i_dist = round(i_dist, 2) i_angle = round(i_dist, 2) label = i_name + 'd' + str(i_dist) + 'A' + str(i_angle) GLCM_labels.append(label) if len(GLCM_features) != len(GLCM_labels): print(len(GLCM_features)) print(len(GLCM_labels)) raise ae.PREDICTValueError('Label length does not fit feature length') return GLCM_features, GLCM_labels
def get_GLCM_features_multislice(image, mask, parameters=dict()): if "levels" in parameters.keys(): levels = parameters["levels"] else: levels = 16 if "angles" in parameters.keys(): angles = parameters["angles"] else: angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] if "distances" in parameters.keys(): distances = parameters["distances"] else: distances = [1, 3] N_slices = image.shape[2] contrast = list() dissimilarity = list() homogeneity = list() ASM = list() energy = list() correlation = list() for i_slice in range(0, N_slices): image_bounded, mask_bounded = bbox_2D(image[:, :, i_slice], mask[:, :, i_slice]) image_bounded[~mask_bounded] = 0 image_bounded = image_bounded + image_bounded.min() image_bounded = image_bounded * 255.0 / image_bounded.max() image_bounded = image_bounded.astype(np.uint8) image_bounded = rescale_intensity(image_bounded, out_range=(0, 15)) GLCM_matrix = greycomatrix(image_bounded, distances, angles, levels=levels, normed=True) contrast.append(greycoprops(GLCM_matrix, 'contrast').flatten()) dissimilarity.append( greycoprops(GLCM_matrix, 'dissimilarity').flatten()) homogeneity.append(greycoprops(GLCM_matrix, 'homogeneity').flatten()) ASM.append(greycoprops(GLCM_matrix, 'ASM').flatten()) energy.append(greycoprops(GLCM_matrix, 'energy').flatten()) correlation.append(greycoprops(GLCM_matrix, 'correlation').flatten()) contrast_mean = np.mean(contrast, 0) contrast_std = np.std(contrast, 0) dissimilarity_mean = np.mean(dissimilarity, 0) dissimilarity_std = np.std(dissimilarity, 0) homogeneity_mean = np.mean(homogeneity, 0) homogeneity_std = np.std(homogeneity, 0) ASM_mean = np.mean(ASM, 0) ASM_std = np.std(ASM, 0) energy_mean = np.mean(energy, 0) energy_std = np.std(energy, 0) correlation_mean = np.mean(correlation, 0) correlation_std = np.std(correlation, 0) GLCM_features = contrast_mean.tolist() + contrast_std.tolist() +\ dissimilarity_mean.tolist() + dissimilarity_std.tolist() +\ homogeneity_mean.tolist() + homogeneity_std.tolist() +\ ASM_mean.tolist() + ASM_std.tolist() + energy_mean.tolist() +\ energy_std.tolist() + correlation_mean.tolist() +\ correlation_std.tolist() feature_names = [ 'tf_GLCMMS_contrast', 'tf_GLCMMS_dissimilarity', 'tf_GLCMMS_homogeneity', 'tf_GLCMMS_ASM', 'tf_GLCMMS_energy', 'tf_GLCMMS_correlation' ] GLCM_labels = list() for i_name, i_dist, i_angle in itertools.product(feature_names, distances, angles): # Round to reduce name length i_dist = round(i_dist, 2) i_angle = round(i_dist, 2) label_mean = i_name + 'd' + str(i_dist) + 'A' + str(i_angle) + 'mean' label_std = i_name + 'd' + str(i_dist) + 'A' + str(i_angle) + 'std' GLCM_labels.append(label_mean) GLCM_labels.append(label_std) if len(GLCM_features) != len(GLCM_labels): print(len(GLCM_features)) print(len(GLCM_labels)) raise ae.PREDICTValueError('Label length does not fit feature length') return GLCM_features, GLCM_labels