Example #1
0
def testCORAL(dataset_name='seed4', FOIT_type='cross-all'):
    data, label = utils.load_source_data(dataset_name=dataset_name,
                                         FOIT_type=FOIT_type)
    cd_count = 16 if dataset_name == 'seed4' else 9 if dataset_name == 'seed3' else print(
        'Wrong dataset_name')
    iteration_number = 3 if FOIT_type == 'cross-subject' else 15
    accs = []
    times = []
    for ite in range(iteration_number):
        session_id = -1
        sub_id = -1
        if FOIT_type == 'cross-subject':
            session_id = ite
            sub_id = 14
        elif FOIT_type == 'cross-session':
            session_id = 2
            sub_id = ite
        elif FOIT_type == 'cross-all':
            session_id = 1
            sub_id = ite
        else:
            print('Wrong FOIT type!')
        cd_data, cd_label, ud_data, ud_label = utils.pick_one_data(
            dataset_name,
            session_id=session_id,
            cd_count=cd_count,
            sub_id=sub_id)
        cd_data, cd_label = shuffle(cd_data, cd_label, random_state=0)
        ud_data, ud_label = shuffle(ud_data, ud_label, random_state=0)
        # cd_data_min, cd_data_max = np.min(cd_data), np.max(cd_data)
        cd_data = utils.normalization(cd_data)  # labelled data
        ud_data = utils.normalization(ud_data)  # test data
        if FOIT_type == 'cross-all':
            data_ite, label_ite = data.copy(), label.copy()
            for i in range(len(data)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data, label, random_state=0)
            for i in range(len(data)):
                data_ite[i] = utils.normalization(data_ite[i])
            # data_ite = utils.normalization(data_ite)
        elif FOIT_type == 'cross-session':
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
                data_ite[i] = utils.normalization(data_ite[i])
                # data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min, cd_data_max)
            # data_ite = utils.normalization(data_ite)
        else:
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data_ite, label_ite, random_state=0)
            for i in range(len(data_ite)):
                data_ite[i] = utils.normalization(data_ite[i])
                # data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min, cd_data_max)
        s_data_all, s_label_all = utils.stack_list(data_ite, label_ite)
        number_of_data = s_label_all.shape[0]
        temp_array = list(range(number_of_data))
        number_to_sample = 1500 if len(temp_array) < 2000 else 2500
        # number_to_sample = 1000
        temp_index = random.sample(temp_array, number_to_sample)
        new_data_all = np.array([s_data_all[i] for i in temp_index])
        new_label_all = np.array([s_label_all[i] for i in temp_index])

        start_time = time.time()
        coral = CORAL()
        acc, ypre = coral.fit_predict(new_data_all, new_label_all.squeeze(),
                                      cd_data, cd_label.squeeze(), ud_data,
                                      ud_label.squeeze())
        coral_time = time.time() - start_time
        times.append(coral_time)
        accs.append(acc)
    print("Time: ", np.mean(times))
    print("Accs: ", np.mean(accs), np.std(accs))
Example #2
0
def al(dataset_name='seed4', FOIT_type='cross-all', rounds=10, batch_size=50):
    data, label = utils.load_source_data(dataset_name=dataset_name,
                                         FOIT_type=FOIT_type)
    _, number_label, _ = utils.get_number_of_label_n_trial(dataset_name)
    # data, label = utils.load_session_data_label(dataset_name, 0) # as unlabelled data

    cd_count = 16 if dataset_name == 'seed4' else 9 if dataset_name == 'seed3' else print(
        'Wrong dataset_name')
    iteration_number = 3 if FOIT_type == 'cross-subject' else 15
    accs = [([]) for i in range(iteration_number)]
    times = [([]) for i in range(iteration_number)]

    for ite in range(iteration_number):
        session_id = -1
        sub_id = -1
        if FOIT_type == 'cross-subject':
            session_id = ite
            sub_id = 14
        elif FOIT_type == 'cross-session':
            session_id = 2
            sub_id = ite
        elif FOIT_type == 'cross-all':
            session_id = 1
            sub_id = ite
        else:
            print('Wrong FOIT type!')
        # print("Ite: ", ite)
        cd_data, cd_label, ud_data, ud_label = utils.pick_one_data(
            dataset_name,
            session_id=session_id,
            cd_count=cd_count,
            sub_id=sub_id)
        cd_data, cd_label = shuffle(cd_data, cd_label, random_state=0)
        ud_data, ud_label = shuffle(ud_data, ud_label, random_state=0)
        cd_data_min, cd_data_max = np.min(cd_data), np.max(cd_data)
        cd_data = utils.normalization(cd_data)  # labelled data
        ud_data = utils.normalization(ud_data)  # test data
        if FOIT_type == 'cross-all':
            data_ite, label_ite = data.copy(), label.copy()
            for i in range(len(data)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data, label, random_state=0)
            for i in range(len(data)):
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
            # data_ite = utils.normalization(data_ite)
        elif FOIT_type == 'cross-session':
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
                # data_ite[i] = utils.normalization(data_ite[i])
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
            # data_ite = utils.normalization(data_ite)
        else:
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data_ite, label_ite, random_state=0)
            for i in range(len(data_ite)):
                # data_ite[i] = utils.normalization(data_ite[i])
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
        # data_ite, label_ite = data.copy(), label.copy()
        # for i in range(len(data)):
        #     data_ite[i], label_ite[i] = shuffle(data_ite[i], label_ite[i], random_state=0)
        # for i in range(len(data)):
        #     data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min, cd_data_max)

        # baseline
        clf = svm.LinearSVC(max_iter=30000)
        clf = CalibratedClassifierCV(clf, cv=5)
        since = time.time()
        clf.fit(cd_data, cd_label.squeeze())
        time_baseline = time.time() - since
        scoreA = utils.test(clf, ud_data, ud_label.squeeze())
        accs[ite].append(scoreA)
        times[ite].append(time_baseline)

        # select the data from the reservoir iteratively
        s_data_all, s_label_all = utils.stack_list(data_ite, label_ite)
        L_S_data = None
        L_S_label = None
        for i in range(rounds):
            # print("Rounds: ", i)
            # print(type(s_data_all))
            # print(s_data_all.shape)
            s_data_all_predict_proba = clf.predict_proba(s_data_all)
            s_label_all_proba = utils.get_one_hot(s_label_all.squeeze(),
                                                  number_label)
            confidence = np.zeros((s_label_all_proba.shape[0], 1))
            for i in range(s_label_all_proba.shape[0]):
                confidence[i] = s_label_all_proba[i].dot(
                    s_data_all_predict_proba[i].T)
                # confidence[i] = log_loss(s_label_all_proba[i], s_data_all_predict_proba[i])
            indices = np.argsort(confidence,
                                 axis=0)  # take the minimum topK indices
            topK_indices = indices[:batch_size]
            S_data = None
            S_label = None
            for i in topK_indices:
                one_data = s_data_all[i]
                one_label = s_label_all[i]
                if S_data is not None:
                    S_data = np.vstack((S_data, one_data))
                    S_label = np.vstack((S_label, one_label))
                else:
                    S_data = one_data
                    S_label = one_label
            for i in range(len(s_data_all) - 1, -1, -1):
                if i in topK_indices:
                    s_data_all = np.delete(s_data_all, i, axis=0)
                    s_label_all = np.delete(s_label_all, i, axis=0)
            if L_S_data is None:
                L_S_data = cd_data.copy()
                L_S_label = cd_label.copy()
            else:
                pass
            L_S_data = np.vstack((L_S_data, S_data))
            L_S_label = np.vstack((L_S_label, S_label))
            L_S_data, L_S_label = shuffle(L_S_data, L_S_label, random_state=0)
            clf.fit(L_S_data, L_S_label.squeeze())
            time_updated_time = time.time() - since
            times[ite].append(time_updated_time)
            scoreTMP = utils.test(clf, ud_data, ud_label.squeeze())
            accs[ite].append(scoreTMP)
    ResultTime = []
    ResultAcc = []
    ResultStd = []
    for i in range(rounds + 1):
        tmpTime = []
        tmpAcc = []
        for j in range(iteration_number):
            tmpTime.append(times[j][i])
            tmpAcc.append(accs[j][i])
        ResultTime.append(np.mean(tmpTime))
        ResultAcc.append(np.mean(tmpAcc))
        ResultStd.append(np.std(tmpAcc))
    print("Time: ", ResultTime)
    print("Accs: ", ResultAcc)
    print("Stds: ", ResultStd)
Example #3
0
dataset_name = 'seed4'
if dataset_name == 'seed3':
    cd_count = 9
elif dataset_name == 'seed4':
    cd_count = 16
else:
    print("Unexcepted dataset name!")
    raise EnvironmentError
number_trial, number_label, labels = utils.get_number_of_label_n_trial(
    dataset_name)
subs_data, subs_label = utils.load_session_data_label(dataset_name, 0)

for sub_number in range(15):
    print("Sub id: ", sub_number)
    session_id = 1
    cd_data, cd_label, ud_data, ud_label = utils.pick_one_data(
        dataset_name, session_id, cd_count, sub_id=sub_number)

    subs_data, subs_label = shuffle(subs_data, subs_label, random_state=0)
    # subs_data = utils.normalization(subs_data)
    ## ???????????????????????????????? 现在是一次性全部norm,试试对于每一个sub分别norm
    for i in range(len(subs_data)):
        subs_data[i] = utils.normalization(subs_data[i])

    cd_data, cd_label = shuffle(cd_data, cd_label, random_state=0)
    cd_data = utils.normalization(cd_data)
    ud_data, ud_label = shuffle(ud_data, ud_label, random_state=0)
    ud_data = utils.normalization(ud_data)
    '''
    a)
    '''
    # clf = svm.LinearSVC(max_iter=10000)
Example #4
0
cd_count = 0
dataset_name = 'seed4'
if dataset_name == 'seed3':
    cd_count = 9
elif dataset_name == 'seed4':
    cd_count = 16
number_trial, number_label, labels = utils.get_number_of_label_n_trial(
    dataset_name)

sub_data, sub_label = utils.load_by_session(dataset_name)  # 3*14*(m*310)

for ses_number in range(3):
    print("Session id: ", ses_number)
    # cross-subject, 取sub15
    cd_data, cd_label, ud_data, ud_label = utils.pick_one_data(dataset_name,
                                                               ses_number,
                                                               cd_count,
                                                               sub_id=14)
    sub_data_ses, sub_label_ses = sub_data[ses_number], sub_label[
        ses_number]  # 14*(m*310)
    sub_data_ses, sub_label_ses = shuffle(sub_data_ses,
                                          sub_label_ses,
                                          random_state=0)
    cd_data, cd_label = shuffle(cd_data, cd_label, random_state=0)
    ud_data, ud_label = shuffle(ud_data, ud_label, random_state=0)
    '''
    a)
    '''
    clf = svm.LinearSVC(max_iter=10000)
    # clf = LogisticRegression(max_iter=10000)
    clf = CalibratedClassifierCV(clf, cv=5)
    since = time.time()
Example #5
0
def FOIT(dataset_name='seed4',
         rho=1,
         clf_name='lr',
         threshold=0.6,
         with_balance=True,
         FOIT_type='cross-all'):
    # time and accs
    c0_acc = []
    c0u_acc = []
    foit_acc = []
    time_c0 = []
    time_c0u = []
    time_foit = []

    cd_count = 16 if dataset_name == 'seed4' else 9 if dataset_name == 'seed3' else print(
        'Wrong dataset_name')
    iteration_number = 3 if FOIT_type == 'cross-subject' else 15

    number_trial, number_label, labels = utils.get_number_of_label_n_trial(
        dataset_name)
    data, label = utils.load_source_data(dataset_name=dataset_name,
                                         FOIT_type=FOIT_type)

    for ite in range(iteration_number):
        # The data of 15th sub is taken as the cd and ud data in cross-subject for each session (iteration_number=3)
        # The data of 3rd session is taken as the cd and ud data in cross-session for each subject (iteration_number=14)
        # For each iteration, the data of one sub from session 2 is taken as the cd and ud data in cross-all situation
        # print("Iteration: {}".format(ite))
        '''
        Parameters
        '''
        session_id = -1
        sub_id = -1
        accs_number = -1
        if FOIT_type == 'cross-subject':
            session_id = ite
            sub_id = 14
            accs_number = 14
        elif FOIT_type == 'cross-session':
            session_id = 2
            sub_id = ite
            accs_number = 2
        elif FOIT_type == 'cross-all':
            session_id = 1
            sub_id = ite
            accs_number = 15
        else:
            print('Wrong FOIT type!')
        '''
        Data
        '''
        # data
        cd_data, cd_label, ud_data, ud_label = utils.pick_one_data(
            dataset_name,
            session_id=session_id,
            cd_count=cd_count,
            sub_id=sub_id)
        cd_data, cd_label = shuffle(cd_data, cd_label, random_state=0)
        ud_data, ud_label = shuffle(ud_data, ud_label, random_state=0)
        cd_data_min, cd_data_max = np.min(cd_data), np.max(cd_data)
        cd_data = utils.normalization(cd_data)
        ud_data = utils.normalization(ud_data)
        if FOIT_type == 'cross-all':
            data_ite, label_ite = data.copy(), label.copy()
            for i in range(len(data)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data, label, random_state=0)
            for i in range(len(data)):
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
            # data_ite = utils.normalization(data_ite)
        elif FOIT_type == 'cross-session':
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
                # data_ite[i] = utils.normalization(data_ite[i])
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
            # data_ite = utils.normalization(data_ite)
        else:
            data_ite, label_ite = data[ite], label[ite]
            for i in range(len(data_ite)):
                data_ite[i], label_ite[i] = shuffle(data_ite[i],
                                                    label_ite[i],
                                                    random_state=0)
            # data_ite, label_ite = shuffle(data_ite, label_ite, random_state=0)
            for i in range(len(data_ite)):
                # data_ite[i] = utils.normalization(data_ite[i])
                data_ite[i] = utils.norm_with_range(data_ite[i], cd_data_min,
                                                    cd_data_max)
            # data_ite = utils.normalization(data_ite)
        # print(len(data_ite), len(label_ite[0]), len(label_ite[0][0]))
        '''
        A)
        '''
        if clf_name == 'svm':
            clf = svm.LinearSVC(max_iter=30000)
            # clf = svm.SVC(probability=True, max_iter=10000)
        elif clf_name == 'lr':
            clf = LogisticRegression(max_iter=30000)
        else:
            print('Unexcepted clf name, using LR as the baseline now.')
            clf = svm.LinearSVC(max_iter=30000)
        clf = CalibratedClassifierCV(clf, cv=5)
        since = time.time()
        clf.fit(cd_data, cd_label.squeeze())
        time_baseline = time.time() - since
        # print('Baseline training complete in {:.4f}'.format(time_baseline))
        scoreA = utils.test(clf, ud_data, ud_label.squeeze())
        # print('Baseline score: {}'.format(scoreA))
        time_c0.append(time_baseline)
        c0_acc.append(scoreA)
        '''
        B)
        '''
        accs = []
        clf_sources = []
        for i in range(accs_number):
            if FOIT_type == 'cross-subject':
                path = 'models/' + dataset_name + '/csu/sesn' + str(
                    ite) + '/lr' + str(i) + '.m'
            elif FOIT_type == 'cross-session':
                path = 'models/' + dataset_name + '/csn/sub' + str(
                    ite) + '/lr' + str(i) + '.m'
            else:
                path = 'models/' + dataset_name + '/csun/lr' + str(i) + '.m'
            temp_clf = joblib.load(path)
            clf_sources.append(temp_clf)
            score = utils.test(temp_clf, ud_data, ud_label.squeeze())
            accs.append(score)
        if FOIT_type == 'cross-session':
            pass
        else:
            accs = utils.normalization(accs)
        # print('Accs of classifiers, normalized: {}'.format(accs))
        '''
        C)
        '''
        s_data_all, s_label_all = utils.stack_list(data_ite, label_ite)
        s_data_all_predict_proba = clf.predict_proba(s_data_all)
        s_label_all_proba = utils.get_one_hot(s_label_all.squeeze(),
                                              number_label)
        confidence = np.zeros((s_label_all_proba.shape[0], 1))
        for i in range(s_label_all_proba.shape[0]):
            confidence[i] = s_label_all_proba[i].dot(
                s_data_all_predict_proba[i].T)

        if with_balance:
            ## divide into 4 categories
            data_ite_divided = []
            label_ite_divided = []
            conf_divided = []
            for i in range(number_label):
                data_ite_divided.append([])
                label_ite_divided.append([])
                conf_divided.append([])
            for i in range(len(s_data_all)):
                temp_label = s_label_all[i][0]
                data_ite_divided[temp_label].append(s_data_all[i])
                label_ite_divided[temp_label].append(s_label_all[i])
                conf_divided[temp_label].append(confidence[i])
            indices = []
            for i in range(number_label):
                indices.append(np.argsort(conf_divided[i], axis=0)[::-1])
            topK_indices = [
                indices[i][:int(rho * len(cd_label) / 4)]
                for i in range(len(indices))
            ]
            S_data = None
            S_label = None
            for i in range(len(topK_indices)):
                for j in topK_indices[i]:
                    # temp_conf = conf_divided[i][j[0]]
                    one_data = data_ite_divided[i][j[0]]
                    one_label = label_ite_divided[i][j[0]]
                    if S_data is not None:
                        S_data = np.vstack((S_data, one_data))
                        S_label = np.vstack((S_label, one_label))
                    else:
                        S_data = one_data
                        S_label = one_label
        elif with_balance is False:
            indices = np.argsort(confidence, axis=0)[::-1]
            topK_indices = indices[:int(rho * len(cd_label))]
            S_data = None
            S_label = None
            for i in topK_indices:
                one_data = s_data_all[i]
                one_label = s_label_all[i]
                if S_data is not None:
                    S_data = np.vstack((S_data, one_data))
                    S_label = np.vstack((S_label, one_label))
                else:
                    S_data = one_data
                    S_label = one_label
        else:
            print('Unexcepted value of with balance!')
            raise EnvironmentError
        '''
        D)
        '''
        L_S_data = cd_data.copy()
        L_S_label = cd_label.copy()
        L_S_data = np.vstack((L_S_data, S_data))
        L_S_label = np.vstack((L_S_label, S_label))
        L_S_data, L_S_label = shuffle(L_S_data, L_S_label, random_state=0)
        # L_S_data = utils.normalization(L_S_data) # to decide
        clf.fit(L_S_data, L_S_label.squeeze())
        time_updated_baseline = time.time() - since
        # print('Updated baseline training complete in {:.4f}s'.format(time_updated_baseline))
        time_c0u.append(time_updated_baseline)
        scoreD = utils.test(clf, ud_data, ud_label.squeeze())
        # print('Updated model score: {}'.format(scoreD))
        c0u_acc.append(scoreD)
        '''
        E)
        '''
        weight = (len(accs) + 1) / 2
        proba_result_all = clf.predict_proba(ud_data) * weight

        if FOIT_type == 'cross-session':
            weight_for_clfs = utils.decide_which_clf_to_use(scoreD, accs)
            for j in range(len(weight_for_clfs)):
                proba_result_all += clf_sources[j].predict_proba(
                    utils.normalization(ud_data)) * weight_for_clfs[j]
        else:
            for i in range(len(clf_sources)):
                if accs[i] > threshold:
                    proba_result_all += clf_sources[i].predict_proba(
                        ud_data) * accs[i]
        corrects = np.sum(
            np.argmax(proba_result_all, axis=1) == ud_label.squeeze())
        time_ensemble = time.time() - since
        time_foit.append(time_ensemble)
        scoreE = corrects / len(ud_label)
        # print('Ensembled model score: {}'.format(scoreE))
        foit_acc.append(scoreE)
    # print(c0_acc)
    # print(c0u_acc)
    # print(foit_acc)
    print('Mean acc and std of A: {} {}'.format(np.mean(c0_acc),
                                                np.std(c0_acc)))
    print('Mean acc and std of D: {} {}'.format(np.mean(c0u_acc),
                                                np.std(c0u_acc)))
    print('Mean acc and std of E: {} {}'.format(np.mean(foit_acc),
                                                np.std(foit_acc)))
    print("Time cost for training baseline: ", np.mean(time_c0))
    print("Time cost for training updated baseline: ", np.mean(time_c0u))
    print("Time cost for training FOIT: ", np.mean(time_foit))