def create_folds(file_path):
    """Creates 10 folds for cross validation

    :param file_path: file path to save the folds
    :type file_path: str
    :return: None
    """
    data = read_adhd_data()
    data = data[data['class'] != 'ADHD-Hyperactive/Impulsive']
    stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl')
    subject_ids = data.network_name.values.tolist()
    subject_graph_files = os.listdir('data/ADHD200_CC200/')
    subject_ids = [
        subject for subject in subject_ids
        if subject + '_connectivity_matrix_file.txt' in subject_graph_files
    ]
    dataset = ADHD('ADHD200_CC200', subject_ids, data, stats)

    X = subject_ids
    y = []
    for subject_id in subject_ids:
        encoded_class = dataset.get_brain_class(subject_id)
        class_id = np.argmax(encoded_class)
        y.append(class_id)
    X = np.array(X)
    y = np.array(y)
    skf = StratifiedKFold(n_splits=10)
    folds = [([subject_ids[i] for i in train.tolist()],
              [subject_ids[i] for i in test.tolist()])
             for train, test in skf.split(X, y)]
    with open(file_path, 'wb') as f:
        pickle.dump(folds, f, pickle.HIGHEST_PROTOCOL)
Beispiel #2
0
def test_brain_convolution_model(test_subject_ids, trained_model):
    data = read_adhd_data()
    stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl')
    test_dataset = ADHD('ADHD200_CC200', test_subject_ids,
                        data.loc[data['network_name'].isin(test_subject_ids)],
                        stats)

    data_adj = []
    data_features = []
    for subject_id in test_subject_ids:
        adj, features, classes = test_dataset.get_brain_graph_and_class(
            subject_id)
        data_features.append(features)
        data_adj.append(adj)
    predictions = trained_model.predict(
        [np.array(data_features), np.array(data_adj)])
    return predictions
Beispiel #3
0
def train_brain_convolution_model(subject_ids):
    data = read_adhd_data()
    stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl')
    train_subject_ids = subject_ids
    train_dataset = ADHD(
        'ADHD200_CC200', train_subject_ids,
        data.loc[data['network_name'].isin(train_subject_ids)], stats)
    config = Config(node_dim=3, num_classes=3, batch_size=1)

    train_data_generator = data_generator(train_dataset, config, shuffle=True)

    model = BrainConvolutionModel(config).build()

    model.summary()
    optimizer = Adam(0.001, amsgrad=True, decay=config.WEIGHT_DECAY)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    model.fit_generator(train_data_generator, 10, 50)

    return model
def get_data(subject_ids):
    """Create dataset for training and testing the classifiers

    :param subject_ids: list of subject ids from the dataset
    :type subject_ids: list(str)
    :return: features (flatten matrix), classes
    :rtype: numpy.array, numpy.array
    """
    data = read_adhd_data()
    data = data[data['class'] != 'ADHD-Hyperactive/Impulsive']
    stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl')
    dataset = ADHD('ADHD200_CC200', subject_ids, data, stats)
    x = []
    y = []
    for subject_id in subject_ids:
        matrix, node_features, encoded_class = dataset.get_brain_graph_and_class(
            subject_id)
        adj = matrix[np.triu_indices(matrix.shape[0], k=1)]
        decoded_class = np.argmax(encoded_class)
        x.append(adj)
        y.append(decoded_class)

    return np.array(x), np.array(y)
    :param split: ratio value for validation data
    :type split: float
    :return: train subject ids, validation subject ids
    :rtype: list(str), list(str)
    """
    size = len(subject_ids)
    split_index = int(round(size * (1 - split)))
    random.shuffle(subject_ids)
    train_ids = subject_ids[:split_index]
    validation_ids = subject_ids[split_index:]
    return train_ids, validation_ids


if __name__ == '__main__':
    data = read_functional_connectomes_data()
    stats = find_statistics('data/1000_Functional_Connectomes/',
                            'data/1000_functional_connectomes_stats.pkl')
    subject_ids = data.network_name.values.tolist()
    subject_graph_files = os.listdir('data/1000_Functional_Connectomes/')
    subject_ids = [
        subject for subject in subject_ids
        if subject + '_connectivity_matrix_file.txt' in subject_graph_files
    ]
    train_subject_ids, val_subject_ids = train_validation_split(
        subject_ids, 0.1)
    train_dataset = FunctionalConnectomes(
        '1000_functional_connectomes', train_subject_ids,
        data.loc[data['network_name'].isin(train_subject_ids)], stats)
    validation_dataset = FunctionalConnectomes(
        '1000_functional_connectomes', val_subject_ids,
        data.loc[data['network_name'].isin(val_subject_ids)], stats)
    config = Config(node_dim=3, num_classes=2, batch_size=3)
Beispiel #6
0
    :type split: float
    :return: train subject ids, validation subject ids
    :rtype: list(str), list(str)
    """
    size = len(subject_ids)
    split_index = int(round(size * (1 - split)))
    random.shuffle(subject_ids)
    train_ids = subject_ids[:split_index]
    validation_ids = subject_ids[split_index:]
    return train_ids, validation_ids


if __name__ == '__main__':
    data = read_adhd_data()
    data = data[data['class'] != 'ADHD-Hyperactive/Impulsive']
    stats = find_statistics('data/ADHD200_CC200/', 'data/adhd_stats.pkl')
    subject_ids = data.network_name.values.tolist()
    subject_graph_files = os.listdir('data/ADHD200_CC200/')
    subject_ids = [
        subject for subject in subject_ids
        if subject + '_connectivity_matrix_file.txt' in subject_graph_files
    ]
    train_subject_ids, val_subject_ids = train_validation_split(
        subject_ids, 0.1)
    train_dataset = ADHD(
        'ADHD200_CC200', train_subject_ids,
        data.loc[data['network_name'].isin(train_subject_ids)], stats)
    validation_dataset = ADHD(
        'ADHD200_CC200', val_subject_ids,
        data.loc[data['network_name'].isin(val_subject_ids)], stats)
    config = Config(node_dim=3, num_classes=3, batch_size=2)