def create_folds(file_path): """Creates 10 folds for cross validation :param file_path: file path to save the folds :type file_path: str :return: None """ data = read_adhd_data() data = data[data['class'] != 'ADHD-Hyperactive/Impulsive'] stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl') subject_ids = data.network_name.values.tolist() subject_graph_files = os.listdir('data/ADHD200_CC200/') subject_ids = [ subject for subject in subject_ids if subject + '_connectivity_matrix_file.txt' in subject_graph_files ] dataset = ADHD('ADHD200_CC200', subject_ids, data, stats) X = subject_ids y = [] for subject_id in subject_ids: encoded_class = dataset.get_brain_class(subject_id) class_id = np.argmax(encoded_class) y.append(class_id) X = np.array(X) y = np.array(y) skf = StratifiedKFold(n_splits=10) folds = [([subject_ids[i] for i in train.tolist()], [subject_ids[i] for i in test.tolist()]) for train, test in skf.split(X, y)] with open(file_path, 'wb') as f: pickle.dump(folds, f, pickle.HIGHEST_PROTOCOL)
def test_brain_convolution_model(test_subject_ids, trained_model): data = read_adhd_data() stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl') test_dataset = ADHD('ADHD200_CC200', test_subject_ids, data.loc[data['network_name'].isin(test_subject_ids)], stats) data_adj = [] data_features = [] for subject_id in test_subject_ids: adj, features, classes = test_dataset.get_brain_graph_and_class( subject_id) data_features.append(features) data_adj.append(adj) predictions = trained_model.predict( [np.array(data_features), np.array(data_adj)]) return predictions
def train_brain_convolution_model(subject_ids): data = read_adhd_data() stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl') train_subject_ids = subject_ids train_dataset = ADHD( 'ADHD200_CC200', train_subject_ids, data.loc[data['network_name'].isin(train_subject_ids)], stats) config = Config(node_dim=3, num_classes=3, batch_size=1) train_data_generator = data_generator(train_dataset, config, shuffle=True) model = BrainConvolutionModel(config).build() model.summary() optimizer = Adam(0.001, amsgrad=True, decay=config.WEIGHT_DECAY) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.fit_generator(train_data_generator, 10, 50) return model
def get_data(subject_ids): """Create dataset for training and testing the classifiers :param subject_ids: list of subject ids from the dataset :type subject_ids: list(str) :return: features (flatten matrix), classes :rtype: numpy.array, numpy.array """ data = read_adhd_data() data = data[data['class'] != 'ADHD-Hyperactive/Impulsive'] stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl') dataset = ADHD('ADHD200_CC200', subject_ids, data, stats) x = [] y = [] for subject_id in subject_ids: matrix, node_features, encoded_class = dataset.get_brain_graph_and_class( subject_id) adj = matrix[np.triu_indices(matrix.shape[0], k=1)] decoded_class = np.argmax(encoded_class) x.append(adj) y.append(decoded_class) return np.array(x), np.array(y)
:param split: ratio value for validation data :type split: float :return: train subject ids, validation subject ids :rtype: list(str), list(str) """ size = len(subject_ids) split_index = int(round(size * (1 - split))) random.shuffle(subject_ids) train_ids = subject_ids[:split_index] validation_ids = subject_ids[split_index:] return train_ids, validation_ids if __name__ == '__main__': data = read_functional_connectomes_data() stats = find_statistics('data/1000_Functional_Connectomes/', 'data/1000_functional_connectomes_stats.pkl') subject_ids = data.network_name.values.tolist() subject_graph_files = os.listdir('data/1000_Functional_Connectomes/') subject_ids = [ subject for subject in subject_ids if subject + '_connectivity_matrix_file.txt' in subject_graph_files ] train_subject_ids, val_subject_ids = train_validation_split( subject_ids, 0.1) train_dataset = FunctionalConnectomes( '1000_functional_connectomes', train_subject_ids, data.loc[data['network_name'].isin(train_subject_ids)], stats) validation_dataset = FunctionalConnectomes( '1000_functional_connectomes', val_subject_ids, data.loc[data['network_name'].isin(val_subject_ids)], stats) config = Config(node_dim=3, num_classes=2, batch_size=3)
:type split: float :return: train subject ids, validation subject ids :rtype: list(str), list(str) """ size = len(subject_ids) split_index = int(round(size * (1 - split))) random.shuffle(subject_ids) train_ids = subject_ids[:split_index] validation_ids = subject_ids[split_index:] return train_ids, validation_ids if __name__ == '__main__': data = read_adhd_data() data = data[data['class'] != 'ADHD-Hyperactive/Impulsive'] stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl') subject_ids = data.network_name.values.tolist() subject_graph_files = os.listdir('data/ADHD200_CC200/') subject_ids = [ subject for subject in subject_ids if subject + '_connectivity_matrix_file.txt' in subject_graph_files ] train_subject_ids, val_subject_ids = train_validation_split( subject_ids, 0.1) train_dataset = ADHD( 'ADHD200_CC200', train_subject_ids, data.loc[data['network_name'].isin(train_subject_ids)], stats) validation_dataset = ADHD( 'ADHD200_CC200', val_subject_ids, data.loc[data['network_name'].isin(val_subject_ids)], stats) config = Config(node_dim=3, num_classes=3, batch_size=2)