def predict_attention_nets(test_path, train_x=None):

    pre_type = "sym"  # "sym"

    input_size = (2560, 12)
    net_num = 10
    inputs_list = [Input(shape=input_size) for _ in range(net_num)]
    outputs = attentionmodel.build_network(inputs_list,
                                           0.5,
                                           num_classes=10,
                                           block_size=4,
                                           relu=False)
    model = Model(inputs=inputs_list, outputs=outputs)

    test_files = os.listdir(test_path)
    test_files.sort()

    print("*********read data for dense nets******")
    test_x = [
        read_data_seg(test_path, preprocess=True, n_index=i, pre_type=pre_type)
        for i in range(net_num)
    ]

    n_fold = 3
    n_classes = 10
    attention_blend_test = np.zeros(
        (len(test_files), n_fold, n_classes)).astype('float32')

    model_path = './official_attention_model/'

    ####################################################################################
    #blend_train = np.zeros((6500, n_fold, n_classes)).astype('float32')

    en_amount = 1
    for seed in range(en_amount):
        print("*********Start Attention Nets***************")
        for i in range(n_fold):
            print('fold: ', i + 1, ' training')

            model_name = "attention_extend_weights-best_k{}_r{}_0802_30.hdf5".format(
                seed, i)
            # Evaluate best trained model
            model.load_weights(model_path + model_name)
            attention_blend_test[:, i, :] = model.predict(test_x)

            #blend_train[:,i,:] = model.predict(train_x)

    del test_x
    gc.collect()
    '''
    train_pd0 = pd.DataFrame(blend_train[:,0,:])
    train_pd1 = pd.DataFrame(blend_train[:,1,:])
    train_pd2 = pd.DataFrame(blend_train[:,2,:])
    csv_path = "./quarter_final/"
    train_pd0.to_csv(csv_path+"attention_10net_fold0.csv",index=None)
    train_pd1.to_csv(csv_path+"attention_10net_fold1.csv",index=None)
    train_pd2.to_csv(csv_path+"attention_10net_fold2.csv",index=None)
    '''
    print(" predict_attention_nets OK !!!!!!!!!")
    return attention_blend_test
Ejemplo n.º 2
0
def predcit_net_kfolds():

    pre_type = "sym"  # "sym"

    labels = pd.read_csv(path + "reference.csv")
    raw_IDs = labels["File_name"].values.tolist()

    IDs = {}
    IDs["sym"] = raw_IDs
    IDs["db4"] = [i + "_db4" for i in raw_IDs]
    IDs["db6"] = [i + "_db6" for i in raw_IDs]

    input_size = (2560, 12)
    net_num = 10
    inputs_list = [Input(shape=input_size) for _ in range(net_num)]
    outputs = attentionmodel.build_network(inputs_list,
                                           0.5,
                                           num_classes=9,
                                           block_size=4,
                                           relu=False)
    model = Model(inputs=inputs_list, outputs=outputs)

    net_num = 10
    test_x = [
        read_data_seg(path,
                      split='Val',
                      preprocess=True,
                      n_index=i,
                      pre_type=pre_type) for i in range(net_num)
    ]

    model_path = './official_attention_model/'
    model_name = 'attention_extend_weights-best_one_fold.hdf5'

    en_amount = 1
    for seed in range(en_amount):
        print("************************")
        n_fold = 3  # 3
        n_classes = 9

        kfold = StratifiedKFold(n_splits=n_fold,
                                shuffle=True,
                                random_state=seed)
        kf = kfold.split(IDs[pre_type], labels['label1'])

        blend_train = np.zeros(
            (6500, n_fold, n_classes)).astype('float32')  # len(train_x)
        blend_test = np.zeros(
            (500, n_fold, n_classes)).astype('float32')  # len(test_x)

        count = 0

        for i, (index_train, index_valid) in enumerate(kf):
            print('fold: ', i + 1, ' training')
            t = time.time()

            tr_IDs = np.array(IDs[pre_type])  # [index_train]
            # val_IDs = np.array(IDs[pre_type])[index_valid]
            print(tr_IDs.shape)

            X = np.empty((tr_IDs.shape[0], 10, 2560, 12))
            for j, ID in enumerate(tr_IDs):
                X[j, ] = np.load("training_data/" + ID + ".npy")
            # X_tr = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)]
            X_tr = [
                X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6],
                X[:, 7], X[:, 8], X[:, 9]
            ]
            # print(X.shape)
            del X

            # Evaluate best trained model
            model.load_weights(
                model_path +
                'attention_extend_weights-best_k{}_r{}_0608.hdf5'.format(
                    seed, i))

            blend_train[:, i, :] = model.predict(X_tr)
            blend_test[:, i, :] = model.predict(test_x)

            del X_tr
            gc.collect()
            gc.collect()
            count += 1

    index = np.arange(6500)
    y_train = preprocess_y(labels, index)

    train_y = 0.1 * blend_train[:,
                                0, :] + 0.1 * blend_train[:,
                                                          1, :] + 0.8 * blend_train[:,
                                                                                    2, :]

    threshold = np.arange(0.1, 0.9, 0.1)
    acc = []
    accuracies = []
    best_threshold = np.zeros(train_y.shape[1])

    for i in range(train_y.shape[1]):
        y_prob = np.array(train_y[:, i])
        for j in threshold:
            y_pred = [1 if prob >= j else 0 for prob in y_prob]
            acc.append(f1_score(y_train[:, i], y_pred, average='macro'))
        acc = np.array(acc)
        index = np.where(acc == acc.max())
        accuracies.append(acc.max())
        best_threshold[i] = threshold[index[0][0]]
        acc = []

    print("best_threshold :", best_threshold)

    y_pred = np.array([[
        1 if train_y[i, j] >= best_threshold[j] else 0
        for j in range(train_y.shape[1])
    ] for i in range(len(train_y))])
    print(" train data f1_score  :", f1_score(y_train, y_pred,
                                              average='macro'))

    for i in range(9):
        print("f1 score of ab {} is {}".format(
            i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))

    out = 0.1 * blend_test[:,
                           0, :] + 0.1 * blend_test[:,
                                                    1, :] + 0.8 * blend_test[:,
                                                                             2, :]

    y_pred_test = np.array([[
        1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1])
    ] for i in range(len(out))])

    classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]

    test_y = y_pred_test

    y_pred = [[
        1 if test_y[i, j] >= best_threshold[j] else 0
        for j in range(test_y.shape[1])
    ] for i in range(len(test_y))]
    pred = []
    for j in range(test_y.shape[0]):
        pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1])

    val_dataset_path = path + "/Val/"
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    with open('answers_attention_{}_0609.csv'.format(pre_type),
              'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            'File_name', 'label1', 'label2', 'label3', 'label4', 'label5',
            'label6', 'label7', 'label8'
        ])
        count = 0
        for file_name in val_files:
            if file_name.endswith('.mat'):

                record_name = file_name.strip('.mat')
                answer = []
                answer.append(record_name)

                result = pred[count]

                answer.extend(result)
                for i in range(8 - len(result)):
                    answer.append('')
                count += 1
                writer.writerow(answer)
        csvfile.close()
Ejemplo n.º 3
0
def training_net_kfolds():

    train_dataset_path = path + "/Train/"
    val_dataset_path = path + "/Val/"

    train_files = os.listdir(train_dataset_path)
    train_files.sort()
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    labels = pd.read_csv(path + "reference.csv")
    labels_en = pd.read_csv(path + "kfold_labels_en.csv")
    data_info = pd.read_csv(path + "data_info.csv")

    input_size = (2560, 12)
    net_num = 10
    inputs_list = [Input(shape=input_size) for _ in range(net_num)]
    outputs = attentionmodel.build_network(inputs_list,
                                           0.5,
                                           num_classes=9,
                                           block_size=4,
                                           relu=False)
    model = Model(inputs=inputs_list, outputs=outputs)
    # print(model.summary())

    raw_IDs = labels_en["File_name"].values.tolist()
    extend_db4_IDs = [i + "_db4" for i in raw_IDs]
    extend_db6_IDs = [i + "_db6" for i in raw_IDs]
    all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs

    train_labels = labels_en["label1"].values
    all_train_labels = np.hstack((train_labels, train_labels, train_labels))

    # Parameters
    params = {
        'dim': (10, 2560),
        'batch_size': 64,
        'n_classes': 9,
        'n_channels': 12,
        'shuffle': True
    }

    en_amount = 1
    model_path = './official_attention_model/'

    for seed in range(en_amount):
        print("************************")
        n_fold = 3
        n_classes = 9

        kfold = StratifiedKFold(n_splits=n_fold,
                                shuffle=True,
                                random_state=1234)
        kf = kfold.split(all_IDs, all_train_labels)

        for i, (index_train, index_valid) in enumerate(kf):
            print('fold: ', i + 1, ' training')
            t = time.time()

            tr_IDs = np.array(all_IDs)[index_train]
            val_IDs = np.array(all_IDs)[index_valid]
            print(tr_IDs.shape)

            # Generators
            training_generator = DataGenerator(tr_IDs, labels, **params)
            validation_generator = DataGenerator(val_IDs, labels, **params)

            checkpointer = ModelCheckpoint(
                filepath=model_path +
                'attention_extend_weights-best_k{}_r{}_0609_30.hdf5'.format(
                    seed, i),
                monitor='val_fmeasure',
                verbose=1,
                save_best_only=True,
                save_weights_only=True,
                mode='max')  # val_fmeasure
            reduce = ReduceLROnPlateau(monitor='val_fmeasure',
                                       factor=0.5,
                                       patience=2,
                                       verbose=1,
                                       min_delta=1e-4,
                                       mode='max')

            earlystop = EarlyStopping(monitor='val_fmeasure', patience=10)

            config = Config()
            add_compile(model, config)

            callback_lists = [checkpointer, reduce]

            history = model.fit_generator(
                generator=training_generator,
                validation_data=validation_generator,
                use_multiprocessing=False,
                epochs=30,  # 50
                verbose=1,
                callbacks=callback_lists)
Ejemplo n.º 4
0
def predict_net_one_fold():

    pre_type = "sym"

    labels = pd.read_csv(path + "reference.csv")
    raw_IDs = labels["File_name"].values.tolist()

    IDs = {}
    IDs["sym"] = raw_IDs
    IDs["db4"] = [i + "_db4" for i in raw_IDs]
    IDs["db6"] = [i + "_db6" for i in raw_IDs]

    X = np.empty((6500, 10, 2560, 12))
    for i, ID in enumerate(IDs[pre_type]):
        X[i, ] = np.load("training_data/" + ID + ".npy")
    # train_x = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
    train_x = [
        X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7],
        X[:, 8], X[:, 9]
    ]

    index = np.arange(6500)
    y_train = preprocess_y(labels, index)

    input_size = (2560, 12)
    net_num = 10
    inputs_list = [Input(shape=input_size) for _ in range(net_num)]

    outputs = attentionmodel.build_network(inputs_list,
                                           0.5,
                                           num_classes=9,
                                           block_size=4,
                                           relu=False)
    model = Model(inputs=inputs_list, outputs=outputs)
    # print(model.summary())

    model_path = './official_attention_model/'
    model_name = 'attention_extend_weights-best_one_fold_0607.hdf5'

    model.load_weights(model_path + model_name)
    blend_train = model.predict(train_x)

    threshold = np.arange(0.1, 0.9, 0.1)
    acc = []
    accuracies = []
    best_threshold = np.zeros(blend_train.shape[1])

    for i in range(blend_train.shape[1]):
        y_prob = np.array(blend_train[:, i])
        for j in threshold:
            y_pred = [1 if prob >= j else 0 for prob in y_prob]
            acc.append(f1_score(y_train[:, i], y_pred, average='macro'))
        acc = np.array(acc)
        index = np.where(acc == acc.max())
        accuracies.append(acc.max())
        best_threshold[i] = threshold[index[0][0]]
        acc = []

    print("best_threshold :", best_threshold)

    y_pred = np.array([[
        1 if blend_train[i, j] >= best_threshold[j] else 0
        for j in range(blend_train.shape[1])
    ] for i in range(len(blend_train))])
    print(" train data f1_score  :", f1_score(y_train, y_pred,
                                              average='macro'))

    for i in range(9):
        print("f1 score of ab {} is {}".format(
            i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))

    net_num = 10
    test_x = [
        read_data_seg(path,
                      split='Val',
                      preprocess=True,
                      n_index=i,
                      pre_type=pre_type) for i in range(net_num)
    ]

    out = model.predict(test_x)
    y_pred_test = np.array([[
        1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1])
    ] for i in range(len(out))])

    classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]

    test_y = y_pred_test

    y_pred = [[
        1 if test_y[i, j] >= best_threshold[j] else 0
        for j in range(test_y.shape[1])
    ] for i in range(len(test_y))]
    pred = []
    for j in range(test_y.shape[0]):
        pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1])

    val_dataset_path = path + "/Val/"
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    with open('answers_attention_{}_0607.csv'.format(pre_type),
              'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([
            'File_name', 'label1', 'label2', 'label3', 'label4', 'label5',
            'label6', 'label7', 'label8'
        ])
        count = 0
        for file_name in val_files:
            if file_name.endswith('.mat'):

                record_name = file_name.strip('.mat')
                answer = []
                answer.append(record_name)

                result = pred[count]

                answer.extend(result)
                for i in range(8 - len(result)):
                    answer.append('')
                count += 1
                writer.writerow(answer)
        csvfile.close()
Ejemplo n.º 5
0
def training_net_one_fold():

    train_dataset_path = path + "/Train/"
    val_dataset_path = path + "/Val/"

    train_files = os.listdir(train_dataset_path)
    train_files.sort()
    val_files = os.listdir(val_dataset_path)
    val_files.sort()

    labels = pd.read_csv(path + "reference.csv")
    labels_en = pd.read_csv(path + "kfold_labels_en.csv")
    data_info = pd.read_csv(path + "data_info.csv")

    input_size = (2560, 12)
    net_num = 10
    inputs_list = [Input(shape=input_size) for _ in range(net_num)]
    outputs = attentionmodel.build_network(inputs_list,
                                           0.5,
                                           num_classes=9,
                                           block_size=4,
                                           relu=False)
    model = Model(inputs=inputs_list, outputs=outputs)
    # print(model.summary())

    raw_IDs = labels_en["File_name"].values.tolist()
    extend_db4_IDs = [i + "_db4" for i in raw_IDs]
    extend_db6_IDs = [i + "_db6" for i in raw_IDs]
    all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs

    train_labels = labels_en["label1"].values
    all_train_labels = np.hstack((train_labels, train_labels, train_labels))

    # Parameters
    params = {
        'dim': (10, 2560),
        'batch_size': 64,
        'n_classes': 9,
        'n_channels': 12,
        'shuffle': True
    }

    en_amount = 1
    model_path = './official_attention_model/'
    index = np.arange(23109)
    np.random.shuffle(index)

    index_train = index[:16176]
    index_valid = index[16176:]

    tr_IDs = np.array(all_IDs)[index_train]
    val_IDs = np.array(all_IDs)[index_valid]

    print(tr_IDs.shape)
    print(val_IDs.shape)

    # Generators
    training_generator = DataGenerator(tr_IDs, labels, **params)
    validation_generator = DataGenerator(val_IDs, labels, **params)

    checkpointer = ModelCheckpoint(
        filepath=model_path +
        'attention_extend_weights-best_one_fold_0607.hdf5',
        monitor='val_fmeasure',
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
        mode='max')  # val_fmeasure
    reduce = ReduceLROnPlateau(monitor='val_fmeasure',
                               factor=0.5,
                               patience=2,
                               verbose=1,
                               min_delta=1e-4,
                               mode='max')

    earlystop = EarlyStopping(monitor='val_fmeasure', patience=5)

    config = Config()
    add_compile(model, config)

    callback_lists = [checkpointer, reduce]

    history = model.fit_generator(generator=training_generator,
                                  validation_data=validation_generator,
                                  use_multiprocessing=False,
                                  epochs=20,
                                  verbose=1,
                                  callbacks=callback_lists)