Exemple #1
0
    def learn(self):       
        # seeding
        classes = self.short_df['grades_round'].unique()
        seed_index = []
        for i in classes:
            seed_index.append(self.short_df['grades_round'][self.short_df['grades_round']==i].index[0])
        seed_index

        act_data = self.short_df.copy()
        accuracy_list = []

        # initialising
        train_idx = seed_index
        X_train = self.X[train_idx]
        y_train = self.Y[train_idx]

        # generating the pool
        X_pool = np.delete(self.X, train_idx, axis=0)
        y_pool = np.delete(self.Y, train_idx)

        act_data = act_data.drop(axis=0,index = train_idx)
        act_data.reset_index(drop = True,inplace=True)
        
        initiated_committee = []
        for learner_idx,model in enumerate(self.learners):
            learner = ActiveLearner(
                estimator=model,
                X_training=X_train, y_training=y_train
            )
            initiated_committee.append(learner)
        # Commitee creation
        committee = Committee(
            learner_list= initiated_committee,
#             query_strategy=vote_entropy_sampling
        )
        
        committee.teach(X_train,y_train)
        
        # pool-based sampling
        n_queries = int(len(X)/(100/self.percent))
        for idx in range(n_queries):
            query_idx = np.random.choice(range(len(X_pool))) 
            committee.teach(
                X=X_pool[query_idx].reshape(1, -1),
                y=y_pool[query_idx].reshape(1, )
            )

            # remove queried instance from pool
            X_pool = np.delete(X_pool, query_idx, axis=0)
            y_pool = np.delete(y_pool, query_idx)

            act_data = act_data.drop(axis=0,index = query_idx)
            act_data.reset_index(drop=True, inplace=True)

            accuracy_list.append(accuracy_score(committee.predict(X_pool),y_pool))
#             print('Accuracy after query no. %d: %f' % (idx+1, accuracy_score(committee.predict(X_pool),y_pool)))
        print("By just labelling ",round(n_queries*100.0/len(X),2),"% of total data accuracy of ", round(accuracy_score(committee.predict(X_pool),y_pool),3), " % is achieved on the unseen data" )
        model_pred = committee.predict(X_pool)
        model_f1 = f1_score(y_pool,model_pred,average='weighted')
        return accuracy_list,model_f1        
class Committee2Level:
    def __init__(self,estimators_1,estimators_2,X,Y):
        indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y))))
        X1 = X
        Y1 = np.array([i if i==2 else 1 for i in Y])
        X2 = X[indexs2]
        Y2 = Y[indexs2]
        learners1 = [ActiveLearner(e,X_training=X1,y_training=Y1) for e in estimators_1]
        learners2 = [ActiveLearner(e,X_training=X2,y_training=Y2) for e in estimators_2]
        self.osCommittee = Committee(learners1)
        self.pnCommittee = Committee(learners2)
        self.choice_indx = 0
    def teach(self,X,Y):
        indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y))))
        X1 = X
        Y1 = np.array([i if i==2 else 1 for i in Y])
        X2 = X[indexs2]
        Y2 = Y[indexs2]
        self.osCommittee.teach(X1,Y1)
        if len(Y2) > 0:
            self.pnCommittee.teach(X2,Y2)
    def predict(self,X):
        predicts1 = self.osCommittee.predict(X)
        indexs = range(len(X))
        predicts2 = self.pnCommittee.predict(X)
        predicts = [predicts1[i] if predicts1[i]==2 else predicts2[i] for i in indexs]
        return predicts
    def query(self,X,Y):
        indexs2 = list(filter(lambda i: Y[i]!=2,range(len(Y))))
        X1 = X
        X2 = X[indexs2]
        q2 = ([],2)
        if len(X2)>0:
            q2 = self.pnCommittee.query(X2)
        alternatives = [self.osCommittee.query(X1), q2]
        r = alternatives[self.choice_indx%2]
        #print(self.choice_indx)
        self.choice_indx +=1
        if len(r[0]) == 0:
            return alternatives[self.choice_indx%2]
        return r
Exemple #3
0
def run(X, y, n_samples_for_intial, n_queries, n_comittee_members, estimator):
    # start timer
    start_time = time.time()

    # init list of different learners 
    learners = []

    X_train, y_train, X_pool, y_pool = create_random_pool_and_initial_sets(X, y, n_samples_for_intial)

    for member_idx in range(n_comittee_members):
        learners.append(ActiveLearner(estimator=estimator, X_training=X_train, y_training=y_train))
        
    # init committee
    committee = Committee(learner_list=learners, query_strategy=max_disagreement_sampling)

    unqueried_score = committee.score(X, y)
    print('Score over unqueried samples {:0.4f}'.format(unqueried_score))

    performance_history = []

    f1_score = 0
    index = 0
    while f1_score < 0.65:
        index += 1

        # get sample from pool
        query_idx, query_instance = committee.query(X_pool)

        # retrain comittee with new sample
        committee.teach(
            X=X_pool[query_idx].reshape(1, -1),
            y=y_pool[query_idx].reshape(1, )
        )

        # remove queried instance from pool
        X_pool = delete_rows_csr(X_pool, query_idx)
        y_pool = np.delete(y_pool, query_idx)

        y_pred = committee.predict(X)
        f1_score = metrics.f1_score(y, y_pred, average='micro')

        if index % 100 == 0:
            print('F1 score after {n} training samples: {f1:0.4f}'.format(n=index, f1=f1_score))

        # save accuracy score
        performance_history.append(f1_score)
    print("--- %s seconds ---" % (time.time() - start_time))

    print(performance_history)
    return index
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members * 7, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0],
                    y=pca[:, 1],
                    c=learner.predict(iris['data']),
                    cmap='viridis',
                    s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()

# visualizing the initial predictions
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(iris['data'])
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions, accuracy = %1.3f' %
              committee.score(iris['data'], iris['target']))
    plt.show()

    # query by committee
    n_queries = 10
    for idx in range(n_queries):
        query_idx, query_instance = committee.query(X_pool)
        committee.teach(X=X_pool[query_idx].reshape(1, -1),
                        y=y_pool[query_idx].reshape(1, ))
        # remove queried instance from pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx)
Exemple #5
0
# assembling the Committee
committee = Committee(learner_list)

# visualizing every learner in the Committee
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7*n_learners, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_learners, learner_idx+1)
        plt.imshow(learner.predict(X_pool).reshape(im_height, im_width))
        plt.title('Learner no. %d' % (learner_idx + 1))
    plt.show()

# visualizing the Committee's predictions per learner
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    plt.imshow(committee.predict(X_pool).reshape(im_height, im_width))
    plt.title('Committee consensus predictions')
    plt.show()

# rebagging the data
committee.rebag()

# visualizing the learners in the retrained Committee
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7*n_learners, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_learners, learner_idx+1)
        plt.imshow(learner.predict(X_pool).reshape(im_height, im_width))
        plt.title('Learner no. %d after rebagging' % (learner_idx + 1))
    plt.show()
Exemple #6
0
    def learn(self):
        # seeding
        classes = self.short_df['grades_round'].unique()
        seed_index = []
        for i in classes:
            seed_index.append(self.short_df['grades_round'][
                self.short_df['grades_round'] == i].index[0])
        seed_index

        act_data = self.short_df.copy()
        accuracy_list = []
        f1_total_list = []
        kappa_total_list = []

        # initialising
        train_idx = seed_index
        X_train = self.X[train_idx]
        y_train = self.Y[train_idx]

        # generating the pool
        X_pool = np.delete(self.X, train_idx, axis=0)
        y_pool = np.delete(self.Y, train_idx)

        act_data = act_data.drop(axis=0, index=train_idx)
        act_data.reset_index(drop=True, inplace=True)

        initiated_committee = []
        for learner_idx, model in enumerate(self.learners):
            learner = ActiveLearner(estimator=model,
                                    X_training=X_train,
                                    y_training=y_train)
            initiated_committee.append(learner)
        # Commitee creation
        committee = Committee(
            learner_list=initiated_committee,
            #             query_strategy=vote_entropy_sampling
        )

        committee.teach(X_train, y_train)

        # pool-based sampling
        n_queries = int(len(X) / (100 / self.percent))
        for idx in range(n_queries):
            query_idx = np.random.choice(range(len(X_pool)))
            committee.teach(X=X_pool[query_idx].reshape(1, -1),
                            y=y_pool[query_idx].reshape(1, ))

            # remove queried instance from pool
            X_pool = np.delete(X_pool, query_idx, axis=0)
            y_pool = np.delete(y_pool, query_idx)

            act_data = act_data.drop(axis=0, index=query_idx)
            act_data.reset_index(drop=True, inplace=True)

            accuracy_list.append(
                accuracy_score(committee.predict(X_pool), y_pool))

            model_pred = committee.predict(X_pool)
            f1_total_list.append(
                f1_score(y_pool,
                         model_pred,
                         average="weighted",
                         labels=np.unique(model_pred)))
            kappa_total_list.append(cohen_kappa_score(y_pool, model_pred))
        return accuracy_list, f1_total_list, kappa_total_list
# assembling the committee
committee = Committee(learner_list=learner_list)

# visualizing the initial predictions
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members*7, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0], y=pca[:, 1], c=learner.predict(iris['data']), cmap='viridis', s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()

# visualizing the Committee's predictions per learner
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(iris['data'])
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions')
    plt.show()

# query by committee
n_queries = 10
for idx in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)
    committee.teach(
        X=X_pool[query_idx].reshape(1, -1),
        y=y_pool[query_idx].reshape(1, )
    )
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx)
# assembling the Committee
committee = Committee(learner_list)

# visualizing every learner in the Committee
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7 * n_learners, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_learners, learner_idx + 1)
        plt.imshow(learner.predict(X_pool).reshape(im_height, im_width))
        plt.title('Learner no. %d' % (learner_idx + 1))
    plt.show()

# visualizing the Committee's predictions per learner
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    plt.imshow(committee.predict(X_pool).reshape(im_height, im_width))
    plt.title('Committee consensus predictions')
    plt.show()

# rebagging the data
committee.rebag()

# visualizing the learners in the retrained Committee
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7 * n_learners, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_learners, learner_idx + 1)
        plt.imshow(learner.predict(X_pool).reshape(im_height, im_width))
        plt.title('Learner no. %d after rebagging' % (learner_idx + 1))
    plt.show()
Exemple #9
0
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(n_members * 7, 7))
    for learner_idx, learner in enumerate(committee):
        plt.subplot(1, n_members, learner_idx + 1)
        plt.scatter(x=pca[:, 0],
                    y=pca[:, 1],
                    c=learner.predict(data),
                    cmap='viridis',
                    s=50)
        plt.title('Learner no. %d initial predictions' % (learner_idx + 1))
    plt.show()

# visualizing the initial predictions
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(7, 7))
    prediction = committee.predict(data)
    plt.scatter(x=pca[:, 0], y=pca[:, 1], c=prediction, cmap='viridis', s=50)
    plt.title('Committee initial predictions, accuracy = %1.3f' %
              committee.score(data, target))
    plt.show()

# query by committee
n_queries = 10
for idx in range(n_queries):
    query_idx, query_instance = committee.query(X_pool)
    committee.teach(X=X_pool[query_idx].reshape(1, -1),
                    y=y_pool[query_idx].reshape(1, ))
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx)
Exemple #10
0
        X_initial, y_initial, X_pool, y_pool = init_data_sampling(xtrain, ytrain)

        # creating learners. if committee flag == True then create a committee with a subset with the returned learners.
        # else: use only the first learner
        learner1, learner2, learner3 = create_learners(qs, X_initial, y_initial)
        if committee_flag:
            learner = Committee(
                learner_list=[learner1, learner3],
                query_strategy=vote_entropy_sampling
            )
        else:
            learner = learner1

        # the active learning loop
        update_results(ytest, learner.predict(xtest), i)
        n_queries = 10
        for idx in range(n_queries):
            query_idx, query_instance = learner.query(X_pool, n_instances=300)
            learner.teach(
                X=X_pool[query_idx], y=y_pool[query_idx]
            )
            # remove queried instance from pool
            X_pool = np.delete(X_pool, query_idx, axis=0)
            y_pool = np.delete(y_pool, query_idx, axis=0)
            print(idx)
            update_results(ytest, learner.predict(xtest), i)
        plots(i)
        reset_tables()
    f1_plot()