def run_experiment_test(base_dir):
    data_base_dir = base_dir  #= r'C:\Users\ORI\Documents\Thesis\dataset_all'
    all_subjects = [
        "RSVP_Color116msVPicr.mat", "RSVP_Color116msVPpia.mat",
        "RSVP_Color116msVPfat.mat", "RSVP_Color116msVPgcb.mat",
        "RSVP_Color116msVPgcc.mat", "RSVP_Color116msVPgcd.mat",
        "RSVP_Color116msVPgcf.mat", "RSVP_Color116msVPgcg.mat",
        "RSVP_Color116msVPgch.mat", "RSVP_Color116msVPiay.mat",
        "RSVP_Color116msVPicn.mat"
    ]

    for subject in all_subjects:
        # subject = "RSVP_Color116msVPgcd.mat"

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        data_generator = triplet_data_generator(
            all_data_per_char_as_matrix[train_mode_per_block == 1],
            target_per_char_as_matrix[train_mode_per_block == 1], 80)

        testing_data, testing_tags = get_all_triplet_combinations_testing(
            all_data_per_char_as_matrix, target_per_char_as_matrix,
            train_mode_per_block)

        valid_data = triplet_data_collection(
            all_data_per_char_as_matrix[train_mode_per_block == 2],
            target_per_char_as_matrix[train_mode_per_block == 2], 80)

        total_number_of_char_in_training = all_data_per_char_as_matrix[
            train_mode_per_block == 1].shape[0] / 10

        # region Build the P300Net model
        model = get_graph_lstm(3, 10, 25, 55)
        # endregion

        # region the P300Net identification model
        P300IdentificationModel = get_item_lstm_subgraph(25, 55)
        P300IdentificationModel.compile(loss='binary_crossentropy',
                                        class_mode="binary",
                                        optimizer='rmsprop')
        # endregion

        # region train the P300Net model
        # model.fit_generator(data_generator, 2880, nb_epoch=10, validation_data=valid_data)
        model.fit_generator(data_generator,
                            80 * 40,
                            nb_epoch=2,
                            validation_data=valid_data)
        # endregion

        # all_train_data = dict()
        train_p300_model(
            P300IdentificationModel,
            all_data_per_char_as_matrix[train_mode_per_block == 1],
            target_per_char_as_matrix[train_mode_per_block == 1])

        final_model = get_item_lstm_subgraph(25, 55)
        final_model_original_weights = final_model.get_weights()

        final_model.compile(loss='binary_crossentropy',
                            class_mode="binary",
                            optimizer='sgd')
        final_model.set_weights(
            list(model.nodes['item_latent'].layer.get_weights()))

        all_prediction_P300Net = predict_p300_model(
            final_model,
            all_data_per_char_as_matrix[train_mode_per_block != 1])
        all_prediction_normal = predict_p300_model(
            P300IdentificationModel,
            all_data_per_char_as_matrix[train_mode_per_block != 1])
        all_prediction_normal = all_prediction_P300Net
        plt.subplot(1, 4, 1)
        # plt.imshow(all_prediction, interpolation='none')
        plt.subplot(1, 4, 2)
        x = T.dmatrix('x')
        import theano

        softmax_res_func = theano.function([x], T.nnet.softmax(x))

        #
        # plt.imshow(softmax_res_func(all_prediction), interpolation='none')
        # plt.subplot(1, 4, 3)
        # plt.imshow(softmax_res_func(np.mean(all_prediction.reshape((-1, 10, 30)), axis=1)).astype(np.int),
        #            interpolation='none')

        plt.subplot(1, 4, 3)
        test_tags = target_per_char_as_matrix[
            train_mode_per_block !=
            1]  # np.array([target_per_char[x][train_mode_per_block != 1] for x in range(30)]).T
        # plt.imshow(np.mean(all_res.reshape((-1, 10, 30)), axis=1), interpolation='none')

        all_res = test_tags

        # plt.imshow(softmax_res_func(all_prediction_normal.reshape((-1, 30))), interpolation='none')

        actual_untrained = np.argmax(softmax_res_func(
            np.mean(all_prediction_normal.reshape((-1, 10, 30)), axis=1)),
                                     axis=1)
        # actual = np.where(np.round(softmax_res_func(np.mean(all_prediction.reshape((-1, 10, 30)), axis=1))) == 1)[0];
        gt = np.argmax(np.mean(all_res.reshape((-1, 10, 30)), axis=1), axis=1)
        # np.intersect1d(actual, gt)
        # accuracy = len(np.intersect1d(actual, gt)) / float(len(gt))

        plt.subplot(1, 2, 1)
        plt.imshow(
            softmax_res_func(
                np.mean(all_prediction_normal.reshape((-1, 10, 30)), axis=1)))
        plt.subplot(1, 2, 2)
        plt.imshow(np.mean(all_res.reshape((-1, 10, 30)), axis=1))
        plt.show()

        accuracy = 0
        accuracy_untrained = np.sum(actual_untrained == gt) / float(len(gt))
        print "subject:{0} accu:{1} acc_untrained{2}".format(
            subject, accuracy, accuracy_untrained)
    args = parser.parse_args()
    start_idx = args.start_sub_idx
    end_idx = args.end_sub_idx

    train_data_all_subject = []
    test_data_all_subject = []

    train_tags_all_subject = []
    test_tags_all_subject = []
    test_data_all_subject_with_noise = dict()
    for experiment_counter, subject in enumerate(
            all_subjects[start_idx:end_idx]):
        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        _, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        noise_data = dict()

        noist_shifts = [-120, -80, -40, 0, 40, 80, 120]

        for time_shift_noise in noist_shifts:
            _, _, _, noise_data[
                time_shift_noise], _ = create_data_rep_training(
                    file_name, (-200 + time_shift_noise),
                    (800 + time_shift_noise),
                    downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                        help="first sub",
                        type=int,
                        default=len(all_subjects))
    # parser.add_argument("start_sub_idx", help="first sub",
    #                     type=int, default=len(all_subjects))
    # parser.add_argument("last_sub_idx", help="last sub",
    #                 type=int, default=len(all_subjects))

    args = parser.parse_args()
    start_idx = args.start_sub_idx
    end_idx = args.end_sub_idx
    for experiment_counter, subject in enumerate(
            all_subjects[start_idx:end_idx]):

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                                           n_folds=4,
                                           random_state=42,
                                           shuffle=True))):

            # seperate randomally

            batch_size = 20
            select = 1

            train_as_p300 = False
            train_indexes = train_mode_per_block == 1
def train_on_subjset(all_subjects, model_file_name):
    print "start ----------{}-------".format(model_file_name)

    parser = argparse.ArgumentParser()
    parser.add_argument("-start_sub_idx",
                        help="first sub",
                        type=int,
                        default=0)
    parser.add_argument("-end_sub_idx",
                        help="first sub",
                        type=int,
                        default=len(all_subjects))

    args = parser.parse_args()
    start_idx = args.start_sub_idx
    end_idx = args.end_sub_idx

    from experiments.P300_RSVP.model_left_out.load_left_out_model import load_left_out_model

    only_p300_model_1 = load_left_out_model(model_file_name)
    original_wights = only_p300_model_1.get_weights()

    for experiment_counter, subject in enumerate(
            all_subjects[start_idx:end_idx]):

        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                                           n_folds=4,
                                           random_state=42,
                                           shuffle=True))):
            train_data_all_subject = []
            test_data_all_subject = []

            train_tags_all_subject = []
            test_tags_all_subject = []
            batch_size = 20
            select = 1
            train_as_p300 = False
            train_indexes = train_mode_per_block == 1
            validation_indexes = train_mode_per_block == 2
            test_indexes = train_mode_per_block != 1

            if train_as_p300:

                data_generator_batch = triplet_data_generator_no_dict(
                    all_data_per_char_as_matrix[train_indexes],
                    target_per_char_as_matrix[train_indexes],
                    batch_size=batch_size,
                    select=select,
                    debug_mode=False)
            else:

                def flatten_repetitions(data_to_flatten):
                    return np.reshape(
                        np.reshape(data_to_flatten.T * 10,
                                   (-1, 1)) + np.arange(10), (-1))

                train_indexes = flatten_repetitions(
                    cross_validation_indexes[0])
                test_indexes = flatten_repetitions(cross_validation_indexes[1])

                train_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[train_indexes]).astype(
                            np.float32))
                test_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[test_indexes]).astype(
                            np.float32))

                train_tags_all_subject.append(
                    target_per_char_as_matrix[train_indexes])
                test_tags_all_subject.append(
                    target_per_char_as_matrix[test_indexes])

            eeg_sample_shape = (25, 55)
            # keras.models.load_model(model_file_name)  #

            #

            only_p300_model_1.set_weights(
                original_wights
            )  #=   get_only_P300_model_LSTM_CNN(eeg_sample_shape)

            only_p300_model_1.summary()

            from keras.optimizers import RMSprop

            only_p300_model_1.compile(
                optimizer=RMSprop(),
                loss='binary_crossentropy',
                metrics=['accuracy'],
            )
            model = only_p300_model_1
            print "after compile"

            # model = LDA()
            train_data = stats.zscore(np.vstack(train_data_all_subject),
                                      axis=1)
            train_tags = np.vstack(train_tags_all_subject).flatten()

            test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
            test_tags = np.vstack(test_tags_all_subject).flatten()

            accuracy_train, auc_score_train = predict_using_model(
                model,
                test_data.reshape(test_data.shape[0] * test_data.shape[1],
                                  test_data.shape[2], test_data.shape[3]),
                test_tags)
            #
            print "{} before train accuracy_test {}:{}, auc_score_test:{} ".format(
                subject, rep_per_sub, accuracy_train, auc_score_train)

            accuracy_train, auc_score_train = predict_using_model(
                model,
                train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                   train_data.shape[2], train_data.shape[3]),
                train_tags)

            print "{} before train  accuracy_train {}:{}, auc_score_train:{} ".format(
                subject, rep_per_sub, accuracy_train, auc_score_train)

            model.optimizer.lr.set_value(0.0001)
            for i in range(1):
                model.fit(train_data.reshape(
                    train_data.shape[0] * train_data.shape[1],
                    train_data.shape[2], train_data.shape[3]),
                          train_tags,
                          verbose=1,
                          nb_epoch=30,
                          batch_size=600,
                          shuffle=True)

                accuracy_train, auc_score_train = predict_using_model(
                    model,
                    test_data.reshape(test_data.shape[0] * test_data.shape[1],
                                      test_data.shape[2], test_data.shape[3]),
                    test_tags)

                print "{} after train accuracy_test {}:{}, auc_score_test:{} ".format(
                    subject, rep_per_sub, accuracy_train, auc_score_train)

                accuracy_train, auc_score_train = predict_using_model(
                    model,
                    train_data.reshape(
                        train_data.shape[0] * train_data.shape[1],
                        train_data.shape[2], train_data.shape[3]), train_tags)

                print "{} after train accuracy_train {}:{}, auc_score_train:{} ".format(
                    subject, rep_per_sub, accuracy_train, auc_score_train)

            # model.save(r"c:\temp\{}.h5".format(model_file_name,overwrite=True))

            print "end ----------{}-------".format(file_name)
def train_on_subjset(all_subjects, file_name):
    # all_subjects = ["RSVP_Color116msVPgcd.mat",
    #                 "RSVP_Color116msVPgcc.mat",
    #                 "RSVP_Color116msVPpia.mat",
    #                 "RSVP_Color116msVPgcb.mat",
    #                 "RSVP_Color116msVPgcf.mat",
    #                 "RSVP_Color116msVPgcg.mat",
    #                 "RSVP_Color116msVPgch.mat",
    #                 # "RSVP_Color116msVPiay.mat",
    #                 "RSVP_Color116msVPicn.mat",
    #                 "RSVP_Color116msVPicr.mat",
    #                 "RSVP_Color116msVPfat.mat",
    #
    #                 ];
    #
    #
    # all_subjects = [
    #     "RSVP_Color116msVPiay.mat",
    # ];

    parser = argparse.ArgumentParser()
    parser.add_argument("-start_sub_idx",
                        help="first sub",
                        type=int,
                        default=0)
    parser.add_argument("-end_sub_idx",
                        help="first sub",
                        type=int,
                        default=len(all_subjects))
    # parser.add_argument("start_sub_idx", help="first sub",
    #                     type=int, default=len(all_subjects))
    # parser.add_argument("last_sub_idx", help="last sub",
    #                 type=int, default=len(all_subjects))

    args = parser.parse_args()
    start_idx = args.start_sub_idx
    end_idx = args.end_sub_idx

    train_data_all_subject = []
    test_data_all_subject = []

    train_tags_all_subject = []
    test_tags_all_subject = []

    for experiment_counter, subject in enumerate(
            all_subjects[start_idx:end_idx]):
        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                                           n_folds=4,
                                           random_state=42,
                                           shuffle=True))):
            batch_size = 20
            select = 1
            train_as_p300 = False
            train_indexes = train_mode_per_block == 1
            validation_indexes = train_mode_per_block == 2
            test_indexes = train_mode_per_block != 1

            if train_as_p300:

                data_generator_batch = triplet_data_generator_no_dict(
                    all_data_per_char_as_matrix[train_indexes],
                    target_per_char_as_matrix[train_indexes],
                    batch_size=batch_size,
                    select=select,
                    debug_mode=False)
            else:
                # cross_validation_indexes = list(cross_validation.KFold(len(train_mode_per_block)/10, n_folds=4,
                #                                                               random_state=42, shuffle=True))

                def flatten_repetitions(data_to_flatten):
                    return np.reshape(
                        np.reshape(data_to_flatten.T * 10,
                                   (-1, 1)) + np.arange(10), (-1))

                train_indexes = flatten_repetitions(
                    cross_validation_indexes[0])
                test_indexes = flatten_repetitions(cross_validation_indexes[1])

                # data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                   target_per_char_as_matrix[train_indexes], shuffle_data=False)
                #
                # test_data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                      target_per_char_as_matrix[train_indexes],
                #                                                      shuffle_data=False)

                train_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[train_indexes]).astype(
                            np.float32))
                test_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[test_indexes]).astype(
                            np.float32))

                train_tags_all_subject.append(
                    target_per_char_as_matrix[train_indexes])
                test_tags_all_subject.append(
                    target_per_char_as_matrix[test_indexes])

            break
    eeg_sample_shape = (25, 55)
    only_p300_model_1 = get_only_P300_model_LSTM_CNN(eeg_sample_shape)
    only_p300_model_1.summary()

    from keras.optimizers import RMSprop

    only_p300_model_1.compile(
        optimizer=RMSprop(),
        loss='binary_crossentropy',
        metrics=['accuracy'],
    )
    model = only_p300_model_1
    print "after compile"

    # model = LDA()
    train_data = stats.zscore(np.vstack(train_data_all_subject), axis=1)
    train_tags = np.vstack(train_tags_all_subject).flatten()

    test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
    test_tags = np.vstack(test_tags_all_subject).flatten()

    for i in range(30):
        model.fit(train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                     train_data.shape[2], train_data.shape[3]),
                  train_tags,
                  verbose=1,
                  nb_epoch=1,
                  batch_size=600,
                  shuffle=True)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            test_data.reshape(test_data.shape[0] * test_data.shape[1],
                              test_data.shape[2], test_data.shape[3]),
            test_tags)

        print "accuracy_test {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            train_data.reshape(train_data.shape[0] * train_data.shape[1],
                               train_data.shape[2], train_data.shape[3]),
            train_tags)

        print "accuracy_train {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

    model.optimizer.lr.set_value(0.0001)
    for i in range(30):
        model.fit(train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                     train_data.shape[2], train_data.shape[3]),
                  train_tags,
                  verbose=1,
                  nb_epoch=1,
                  batch_size=600,
                  shuffle=True)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            test_data.reshape(test_data.shape[0] * test_data.shape[1],
                              test_data.shape[2], test_data.shape[3]),
            test_tags)

        print "accuracy_test {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            train_data.reshape(train_data.shape[0] * train_data.shape[1],
                               train_data.shape[2], train_data.shape[3]),
            train_tags)

        print "accuracy_train {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

    model.save(r"c:\temp\single_subject_model.h5")

    pass
def train_on_subjset(all_subjects, model_file_name):
    print "start ----------{}-------".format(model_file_name)

    # from experiments.P300_RSVP.model_left_out.load_left_out_model import  load_left_out_model

    model_file_name_full = os.path.join(os.path.dirname(LEFT_OUT_MODEL_FOLDER),
                                        model_file_name)

    # only_p300_model_1 = load_left_out_model(model_file_name)
    # original_wights = only_p300_model_1.get_weights()

    for experiment_counter, subject in enumerate(all_subjects):

        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                                           n_folds=4,
                                           random_state=42,
                                           shuffle=True))):
            train_data_all_subject = []
            test_data_all_subject = []

            train_tags_all_subject = []
            test_tags_all_subject = []

            def flatten_repetitions(data_to_flatten):
                return np.reshape(
                    np.reshape(data_to_flatten.T * 10,
                               (-1, 1)) + np.arange(10), (-1))

            train_indexes = flatten_repetitions(cross_validation_indexes[0])
            test_indexes = flatten_repetitions(cross_validation_indexes[1])

            train_data_all_subject.append(
                np.asarray(all_data_per_char_as_matrix[train_indexes]).astype(
                    np.float32))
            test_data_all_subject.append(
                np.asarray(all_data_per_char_as_matrix[test_indexes]).astype(
                    np.float32))

            train_tags_all_subject.append(
                target_per_char_as_matrix[train_indexes])
            test_tags_all_subject.append(
                target_per_char_as_matrix[test_indexes])

            from keras.optimizers import RMSprop

            print "after compile"
            from sklearn.externals import joblib
            model = joblib.load(model_file_name_full)

            train_data = stats.zscore(np.vstack(train_data_all_subject),
                                      axis=1)
            train_tags = np.vstack(train_tags_all_subject).flatten()

            test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
            test_tags = np.vstack(test_tags_all_subject).flatten()

            accuracy_train, auc_score_train = predict_using_model(
                model,
                test_data.reshape(test_data.shape[0] * test_data.shape[1], -1),
                test_tags)
            #
            print "{} before train accuracy_test {}:{}, auc_score_test:{} ".format(
                subject, rep_per_sub, accuracy_train, auc_score_train)

            accuracy_train, auc_score_train = predict_using_model(
                model,
                train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                   -1), train_tags)

            print "{} before train  accuracy_train {}:{}, auc_score_train:{} ".format(
                subject, rep_per_sub, accuracy_train, auc_score_train)

            print "end ----------{}-------".format(file_name)
Exemple #7
0
def train_on_subjset(all_subjects, model_file_name):

    train_data_all_subject = []
    test_data_all_subject = []

    train_tags_all_subject = []
    test_tags_all_subject = []

    for experiment_counter, subject in enumerate(all_subjects):
        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(
                    cross_validation.KFold(len(train_mode_per_block) / 10,
                                           n_folds=4,
                                           random_state=42,
                                           shuffle=True))):
            batch_size = 20
            select = 1
            train_as_p300 = False
            train_indexes = train_mode_per_block == 1
            validation_indexes = train_mode_per_block == 2
            test_indexes = train_mode_per_block != 1

            if train_as_p300:

                data_generator_batch = triplet_data_generator_no_dict(
                    all_data_per_char_as_matrix[train_indexes],
                    target_per_char_as_matrix[train_indexes],
                    batch_size=batch_size,
                    select=select,
                    debug_mode=False)
            else:
                # cross_validation_indexes = list(cross_validation.KFold(len(train_mode_per_block)/10, n_folds=4,
                #                                                               random_state=42, shuffle=True))

                def flatten_repetitions(data_to_flatten):
                    return np.reshape(
                        np.reshape(data_to_flatten.T * 10,
                                   (-1, 1)) + np.arange(10), (-1))

                train_indexes = flatten_repetitions(
                    cross_validation_indexes[0])
                test_indexes = flatten_repetitions(cross_validation_indexes[1])

                # data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                   target_per_char_as_matrix[train_indexes], shuffle_data=False)
                #
                # test_data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                      target_per_char_as_matrix[train_indexes],
                #                                                      shuffle_data=False)

                train_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[train_indexes]).astype(
                            np.float32))
                test_data_all_subject.append(
                    np.asarray(
                        all_data_per_char_as_matrix[test_indexes]).astype(
                            np.float32))

                train_tags_all_subject.append(
                    target_per_char_as_matrix[train_indexes])
                test_tags_all_subject.append(
                    target_per_char_as_matrix[test_indexes])

            break

    model = LDA()

    from keras.optimizers import RMSprop

    print "after compile"

    train_data = stats.zscore(np.vstack(train_data_all_subject), axis=1)
    train_tags = np.vstack(train_tags_all_subject).flatten()

    test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
    test_tags = np.vstack(test_tags_all_subject).flatten()
    model.fit(
        train_data.reshape(train_data.shape[0] * train_data.shape[1], -1),
        train_tags)

    for i in range(1):
        model.fit(
            train_data.reshape(train_data.shape[0] * train_data.shape[1], -1),
            train_tags)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            test_data.reshape(test_data.shape[0] * test_data.shape[1], -1),
            test_tags)

        print "accuracy_test {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

        accuracy_train, auc_score_train = predict_using_model(
            model,
            train_data.reshape(train_data.shape[0] * train_data.shape[1], -1),
            train_tags)

        print "accuracy_train {}:{}, auc_score_train:{} ".format(
            i, accuracy_train, auc_score_train)

    from sklearn.externals import joblib
    joblib.dump(model, os.path.join(r"c:\temp", model_file_name + "_lda.plk"))
    print "temp"
def train_on_subjset(all_subjects, model_file_name):
    print "start ----------{}-------".format(model_file_name)
    # all_subjects = ["RSVP_Color116msVPgcd.mat",
    #                 "RSVP_Color116msVPgcc.mat",
    #                 "RSVP_Color116msVPpia.mat",
    #                 "RSVP_Color116msVPgcb.mat",
    #                 "RSVP_Color116msVPgcf.mat",
    #                 "RSVP_Color116msVPgcg.mat",
    #                 "RSVP_Color116msVPgch.mat",
    #                 # "RSVP_Color116msVPiay.mat",
    #                 "RSVP_Color116msVPicn.mat",
    #                 "RSVP_Color116msVPicr.mat",
    #                 "RSVP_Color116msVPfat.mat",
    #
    #                 ];
    #
    #
    # all_subjects = [
    #     "RSVP_Color116msVPiay.mat",
    # ];

    parser = argparse.ArgumentParser()
    parser.add_argument("-start_sub_idx", help="first sub",
                        type=int, default=0)
    parser.add_argument("-end_sub_idx", help="first sub",
                        type=int, default=len(all_subjects))
    # parser.add_argument("start_sub_idx", help="first sub",
    #                     type=int, default=len(all_subjects))
    # parser.add_argument("last_sub_idx", help="last sub",
    #                 type=int, default=len(all_subjects))

    args = parser.parse_args()
    start_idx = args.start_sub_idx
    end_idx = args.end_sub_idx

    train_data_all_subject = []
    test_data_all_subject = []

    train_tags_all_subject = []
    test_tags_all_subject = []

    for experiment_counter, subject in enumerate(all_subjects[start_idx:end_idx]):
        print "start subject:{}".format(subject)

        file_name = os.path.join(data_base_dir, subject)
        all_data_per_char, target_per_char, train_mode_per_block, all_data_per_char_as_matrix, target_per_char_as_matrix = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)



        for noise_jump in range(-40*3,40*3,40):
        all_data_per_char_noise, _, _, all_data_per_char_as_matrix_noise, _ = create_data_rep_training(
            file_name, -200, 800, downsampe_params=8)

        for rep_per_sub, cross_validation_indexes in enumerate(
                list(cross_validation.KFold(len(train_mode_per_block) / 10, n_folds=4,
                                            random_state=42, shuffle=True))):
            batch_size = 20
            select = 1
            train_as_p300 = False
            train_indexes = train_mode_per_block == 1
            validation_indexes = train_mode_per_block == 2
            test_indexes = train_mode_per_block != 1

            if train_as_p300:

                data_generator_batch = triplet_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                                                                      target_per_char_as_matrix[train_indexes],
                                                                      batch_size=batch_size, select=select,
                                                                      debug_mode=False)
            else:
                # cross_validation_indexes = list(cross_validation.KFold(len(train_mode_per_block)/10, n_folds=4,
                #                                                               random_state=42, shuffle=True))

                def flatten_repetitions(data_to_flatten):
                    return np.reshape(np.reshape(data_to_flatten.T * 10, (-1, 1)) + np.arange(10), (-1))


                train_indexes = flatten_repetitions(cross_validation_indexes[0])
                test_indexes = flatten_repetitions(cross_validation_indexes[1])

                # data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                   target_per_char_as_matrix[train_indexes], shuffle_data=False)
                #
                # test_data_generator_batch = simple_data_generator_no_dict(all_data_per_char_as_matrix[train_indexes],
                #                                                      target_per_char_as_matrix[train_indexes],
                #                                                      shuffle_data=False)

                train_data_all_subject.append(np.asarray(all_data_per_char_as_matrix[train_indexes]).astype(np.float32))
                test_data_all_subject.append(np.asarray(all_data_per_char_as_matrix[test_indexes]).astype(np.float32))

                train_tags_all_subject.append(target_per_char_as_matrix[train_indexes])
                test_tags_all_subject.append(target_per_char_as_matrix[test_indexes])

            break
    eeg_sample_shape = (25, 55)
    only_p300_model_1 = get_only_P300_model_LSTM_CNN(eeg_sample_shape)
    only_p300_model_1.summary()

    from keras.optimizers import RMSprop

    only_p300_model_1.compile(optimizer=RMSprop(), loss='binary_crossentropy', metrics=['accuracy'], )
    model = only_p300_model_1
    print "after compile"

    # model = LDA()
    train_data = stats.zscore(np.vstack(train_data_all_subject), axis=1)
    train_tags = np.vstack(train_tags_all_subject).flatten()

    test_data = stats.zscore(np.vstack(test_data_all_subject), axis=1)
    test_tags = np.vstack(test_tags_all_subject).flatten()

    for i in range(1):
        model.fit(train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                     train_data.shape[2], train_data.shape[3]), train_tags,
                  verbose=1, nb_epoch=30, batch_size=600, shuffle=True)

        accuracy_train, auc_score_train = predict_using_model(model,
                                                              test_data.reshape(test_data.shape[0] * test_data.shape[1],
                                                                                test_data.shape[2], test_data.shape[3]),
                                                              test_tags)

        print "accuracy_test {}:{}, auc_score_train:{} ".format(i, accuracy_train, auc_score_train)

        accuracy_train, auc_score_train = predict_using_model(model,
                                                              train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                                                                 train_data.shape[2], train_data.shape[3]),
                                                              train_tags)

        print "accuracy_train {}:{}, auc_score_train:{} ".format(i, accuracy_train, auc_score_train)

    model.optimizer.lr.set_value(0.0001)
    for i in range(1):
        model.fit(train_data.reshape(train_data.shape[0] * train_data.shape[1],
                                     train_data.shape[2], train_data.shape[3]), train_tags,
                  verbose=1, nb_epoch=30, batch_size=600, shuffle=True)

        accuracy_train, auc_score_train = predict_using_model(model,
                                                              test_data.reshape(test_data.shape[0] * test_data.shape[1],
                                                                                test_data.shape[2], test_data.shape[3]),
                                                              test_tags)

        print "accuracy_test {}:{}, auc_score_train:{} ".format(i, accuracy_train, auc_score_train)

        accuracy_train, auc_score_train = predict_using_model(model,
                                                              train_data.reshape(
                                                                  train_data.shape[0] * train_data.shape[1],
                                                                  train_data.shape[2], train_data.shape[3]), train_tags)

        print "accuracy_train {}:{}, auc_score_train:{} ".format(i, accuracy_train, auc_score_train)

    model.save(r"c:\temp\{}.h5".format(model_file_name,overwrite=True))
    print "end ----------{}-------".format(file_name)

    pass

    # from keras.models import Sequential
    #
    # from keras.layers import merge, Input, Dense, Flatten, Activation, Lambda, LSTM, noise
    #
    # eeg_sample_shape = (25, 55)
    # only_p300_model_1 = get_only_P300_model_LSTM(eeg_sample_shape)
    #
    # use_p300net = False
    # if use_p300net:
    #     model = get_P300_model(only_p300_model_1, select=select)
    # else:
    #
    #     only_p300_model_1.compile(optimizer = 'rmsprop',loss = 'binary_crossentropy', metrics=['accuracy'], )
    #     model= only_p300_model_1
    # print "after compile"
    #
    #
    #
    #
    # test_tags = target_per_char_as_matrix[test_indexes]
    #
    # test_data = all_data_per_char_as_matrix[test_indexes].reshape(-1,all_data_per_char_as_matrix.shape[2],all_data_per_char_as_matrix.shape[3])
    #
    # validation_tags = target_per_char_as_matrix[validation_indexes]
    # vaidation_data = all_data_per_char_as_matrix[validation_indexes].reshape(-1,all_data_per_char_as_matrix.shape[2],all_data_per_char_as_matrix.shape[3])
    #
    #
    # train_for_inspecting_tag = target_per_char_as_matrix[train_indexes]
    # train_for_inspecting_data = all_data_per_char_as_matrix[train_indexes].reshape(-1,
    #                                                                            all_data_per_char_as_matrix.shape[2],
    #                                                                            all_data_per_char_as_matrix.shape[3])


    # np.save(os.path.join(experiments_dir, RESULTS_DIR,
    #                          subject[-7:-4] + "test_data_{}_".format(rep_per_sub) + ".npy"),test_data)
    #
    # np.save(os.path.join(experiments_dir, RESULTS_DIR,
    #                      subject[-7:-4] + "train_for_inspecting_data_{}_".format(rep_per_sub) + ".npy"), train_for_inspecting_data)
    #
    # np.save(os.path.join(experiments_dir, RESULTS_DIR,
    #                      subject[-7:-4] + "train_for_inspecting_tag_{}_".format(rep_per_sub) + ".npy"),
    #         train_for_inspecting_tag)
    #
    # np.save(os.path.join(experiments_dir, RESULTS_DIR,
    #                      subject[-7:-4] + "test_tags_{}_".format(rep_per_sub) + ".npy"),
    #         test_tags)



    # class LossHistory(keras.callbacks.Callback):
    #
    #     def on_epoch_end(self, epoch, logs={}):
    #         from sklearn.metrics import roc_auc_score
    #         if epoch  in  [0,11, 12, 13]:
    #             save_path = os.path.join(experiments_dir, RESULTS_DIR,
    #                          subject[-7:-4] + "weight_{}_{}_".format(rep_per_sub, epoch) + ".h5")
    #             self.model.save(save_path)
    #             # os.path.join( experiments_dir, RESULTS_DIR, subject[-7:-4] + "_{}_".format(rep_per_sub) + ".npy")
    #             # self.save('')
    #             all_prediction_P300Net = model.predict(stats.zscore(test_data, axis=1).astype(np.float32))
    #             actual = np.argmax(np.mean(all_prediction_P300Net.reshape((-1, 10, 30)), axis=1), axis=1);
    #             gt = np.argmax(np.mean(test_tags.reshape((-1, 10, 30)), axis=1), axis=1)
    #             tests_accuracy = np.sum(actual == gt) / float(len(gt))
    #             auc_score_test = roc_auc_score(test_tags.flatten(), all_prediction_P300Net)
    #
    #
    #             # all_prediction_P300Net = model.predict(stats.zscore(vaidation_data, axis=1).astype(np.float32))
    #             # actual = np.argmax(np.mean(all_prediction_P300Net.reshape((-1, 10, 30)), axis=1), axis=1);
    #             # gt = np.argmax(np.mean(validation_tags.reshape((-1, 10, 30)), axis=1), axis=1)
    #             # validation_accuracy = np.sum(actual == gt) / float(len(gt))
    #             # auc_score_validation = roc_auc_score(validation_tags.flatten(), all_prediction_P300Net)
    #
    #             all_prediction_P300Net = model.predict(stats.zscore(train_for_inspecting_data, axis=1).astype(np.float32))
    #             actual = np.argmax(np.mean(all_prediction_P300Net.reshape((-1, 10, 30)), axis=1), axis=1);
    #             gt = np.argmax(np.mean(train_for_inspecting_tag.reshape((-1, 10, 30)), axis=1), axis=1)
    #             train_accuracy = np.sum(actual == gt) / float(len(gt))
    #             auc_score_train = roc_auc_score(train_for_inspecting_tag.flatten(), all_prediction_P300Net)
    #             from keras.callbacks import ModelCheckpoint
    #
    #             logs['tests_accuracy'] = tests_accuracy
    #             logs['accuracy_train'] = train_accuracy
    #             logs['validation_accuracy'] = train_accuracy
    #             logs['auc_score_train'] = auc_score_train
    #             logs['auc_score_test'] = auc_score_test
    #             logs['auc_score_validation'] = auc_score_test
    #             logs['subject'] = subject
    #
    #
    #
    #
    #             print "\n*{} epoch:{} mid****accuracy*: {} *accuracy_train*:{}  auc_score_train:{} auc_score_test:{} \n"\
    #                 .format(subject,epoch,tests_accuracy,train_accuracy, auc_score_train, auc_score_test)
    #
    #
    #
    # history = LossHistory()
    #
    # # model.fit_generator(data_generator_batch, 7200, 20, callbacks=[history],nb_worker=1,max_q_size=1)
    #
    # use_generator = False
    # if use_generator:
    #     log_history = model.fit_generator(data_generator_batch, 7200, 20, callbacks=[history], nb_worker=1, max_q_size=1)
    # else:
    #     samples_weight = np.ones_like(data_generator_batch[1])
    #     samples_weight[samples_weight  == 1] = 30
    #     log_history = model.fit(data_generator_batch[0], data_generator_batch[1], nb_epoch=21, batch_size=900,verbose=1,
    #                             callbacks=[history], shuffle=False, validation_split=0.1,sample_weight=samples_weight)
    #
    # results_directory =os.path.join(experiments_dir, RESULTS_DIR)
    # if not os.path.exists(results_directory):
    #     os.makedirs(results_directory)
    #
    # #np.save(os.path.join(experiments_dir, RESULTS_DIR, subject[-7:-4]+"_{}_".format(rep_per_sub)+".npy"), log_history.history)
    #
    # all_prediction_P300Net = model.predict(stats.zscore(test_data,axis=1).astype(np.float32))
    # import theano
    # import theano.tensor as T
    #
    # x = T.dmatrix('x')
    # softmax_res_func = theano.function([x], T.nnet.softmax(x))
    #
    #
    # actual = np.argmax(np.mean(all_prediction_P300Net.reshape((-1, 10, 30)), axis=1), axis=1);
    # gt = np.argmax(np.mean(test_tags.reshape((-1, 10, 30)), axis=1), axis=1)
    # accuracy = np.sum(actual == gt) / float(len(gt))
    # print "subject:{},  accuracy: {}".format(subject, accuracy)
    # break

    # count False positive





    # print ("temp")


if __name__ == "__main__":
    all_subjects = ["RSVP_Color116msVPgcd.mat",
                    "RSVP_Color116msVPgcc.mat",
                    "RSVP_Color116msVPpia.mat",
                     "RSVP_Color116msVPgcb.mat",
                    "RSVP_Color116msVPgcf.mat",
                    "RSVP_Color116msVPgcg.mat",
                    "RSVP_Color116msVPgch.mat",
                    "RSVP_Color116msVPiay.mat",
                    "RSVP_Color116msVPicn.mat",
                    "RSVP_Color116msVPicr.mat",
                    "RSVP_Color116msVPfat.mat",
                    ];

    for left_out_subject in all_subjects[3:]:
        training_set = list(set(all_subjects).difference(set([left_out_subject])))
        model_file_name = os.path.basename(left_out_subject).split(".")[0]
        train_on_subjset(training_set, model_file_name)
        print "stam"