def test_logistic_regression(self):
        clf = sklearn.linear_model.LogisticRegression()
        clf.fit(self.X_train, self.y_train)
        lr = LogisticRegression()
        lr.train(Dataset(self.X_train, self.y_train))

        assert_array_equal(clf.predict(self.X_train), lr.predict(self.X_train))
        assert_array_equal(clf.predict(self.X_test), lr.predict(self.X_test))
        self.assertEqual(clf.score(self.X_train, self.y_train),
                         lr.score(Dataset(self.X_train, self.y_train)))
        self.assertEqual(clf.score(self.X_test, self.y_test),
                         lr.score(Dataset(self.X_test, self.y_test)))
    def test_LogisticRegression(self):
        clf = sklearn.linear_model.LogisticRegression()
        clf.fit(self.X_train, self.y_train)
        lr = LogisticRegression()
        lr.train(Dataset(self.X_train, self.y_train))

        assert_array_equal(
            clf.predict(self.X_train), lr.predict(self.X_train))
        assert_array_equal(
            clf.predict(self.X_test), lr.predict(self.X_test))
        self.assertEqual(
            clf.score(self.X_train, self.y_train),
            lr.score(Dataset(self.X_train, self.y_train)))
        self.assertEqual(
            clf.score(self.X_test, self.y_test),
            lr.score(Dataset(self.X_test, self.y_test)))
    def get_state_score(self):
        # type: () -> float
        """ adds the state's textual state to the dataset and check the increase in accuracy"""
        if self.prev_state is None:
            return 0  # initial state score
        from ResearchNLP.z_experiments.ex_insertion_order import scores_per_add_default
        from ResearchNLP.text_synthesis.heuristic_functions.heuristics.al_heuristics import SynStateUncertainty
        ds = SynStateUncertainty.build_query_strategy(self.sent_df,
                                                      self.col_names)._dataset
        clf = LogisticRegression()
        clf.train(ds)
        p0, p1 = clf.predict_proba(
            np.array(ds.data[self.state_idx][0].reshape(1, -1)))[0]
        labeled_df = self.sent_df[self.sent_df[self.col_names.text].notnull()]

        def kfold_gain(train_set, dev_set, state_df, col_names):
            def depth1_gain(labeled_state_df):
                ex_added_list, res_list = scores_per_add_default(
                    labeled_state_df, train_set, dev_set)
                f1_list = ExprScores.list_to_f1(res_list)
                return f1_list[1] - f1_list[
                    0]  # difference in f1 score. NOT NORMALIZED, but its supposed to be OK

            state_df.loc[0, col_names.tag] = 0
            change0 = depth1_gain(state_df)
            state_df.loc[0, col_names.tag] = 1
            change1 = depth1_gain(state_df)
            cn.add_experiment_param("5_spits_with_prob_kfold_gain")
            return p0 * change0 + p1 * change1

        # total_gain = kfold_gain(labeled_df, labeled_df, self.state_df, self.col_names)
        from sklearn.model_selection import KFold
        total_gains = []
        kf = KFold(n_splits=5)
        labeled_train_df = self.sent_df[self.sent_df[
            self.col_names.tag].notnull()].reset_index(drop=True)
        for train, dev in kf.split(range(len(labeled_train_df))):
            train_df = labeled_train_df.iloc[train]
            dev_df = labeled_train_df.iloc[dev]
            total_gains.append(
                kfold_gain(train_df, dev_df, self.state_df, self.col_names))
        inst_gain = sum(total_gains) / len(total_gains)

        return inst_gain
Exemple #4
0
def main():
    quota = 10  # ask human to label 30 samples
    n_classes = 5
    E_out1, E_out2 = [], []

    trn_ds, tst_ds, ds = split_train_test(n_classes)
    trn_ds2 = copy.deepcopy(trn_ds)

    qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression())
    qs2 = RandomSampling(trn_ds2)

    model = LogisticRegression()

    fig = plt.figure()
    ax = fig.add_subplot(2, 1, 1)
    ax.set_xlabel('Number of Queries')
    ax.set_ylabel('Error')

    model.train(trn_ds)
    E_out1 = np.append(E_out1, 1 - model.score(tst_ds))
    model.train(trn_ds2)
    E_out2 = np.append(E_out2, 1 - model.score(tst_ds))

    query_num = np.arange(0, 1)
    p1, = ax.plot(query_num, E_out1, 'g', label='qs Eout')
    p2, = ax.plot(query_num, E_out2, 'k', label='random Eout')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True,
               shadow=True, ncol=5)
    plt.show(block=False)

    img_ax = fig.add_subplot(2, 1, 2)
    box = img_ax.get_position()
    img_ax.set_position([box.x0, box.y0 - box.height * 0.1, box.width,
                         box.height * 0.9])
    # Give each label its name (labels are from 0 to n_classes-1)
    lbr = InteractiveLabeler(label_name=[str(lbl) for lbl in range(n_classes)])

    for i in range(quota):
        ask_id = qs.make_query()
        print("asking sample from Uncertainty Sampling")
        # reshape the image to its width and height
        lb = lbr.label(trn_ds.data[ask_id][0].reshape(8, 8))
        trn_ds.update(ask_id, lb)
        model.train(trn_ds)
        E_out1 = np.append(E_out1, 1 - model.score(tst_ds))

        ask_id = qs2.make_query()
        print("asking sample from Random Sample")
        lb = lbr.label(trn_ds2.data[ask_id][0].reshape(8, 8))
        trn_ds2.update(ask_id, lb)
        model.train(trn_ds2)
        E_out2 = np.append(E_out2, 1 - model.score(tst_ds))
Exemple #5
0
def main():
    quota = 10  # ask human to label 10 samples
    n_classes = 5
    E_out1, E_out2 = [], []

    trn_ds, tst_ds, ds = split_train_test(n_classes)
    trn_ds2 = copy.deepcopy(trn_ds)
    # print(trn_ds.get_entries())
    # print(len(trn_ds))
    qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression())
    qs2 = RandomSampling(trn_ds2)

    model = LogisticRegression()

    fig = plt.figure()
    ax = fig.add_subplot(2, 1, 1)
    ax.set_xlabel('Number of Queries')
    ax.set_ylabel('Error')

    model.train(trn_ds)
    E_out1 = np.append(E_out1, 1 - model.score(tst_ds))
    model.train(trn_ds2)
    E_out2 = np.append(E_out2, 1 - model.score(tst_ds))

    query_num = np.arange(0, 1)
    p1, = ax.plot(query_num, E_out1, 'g', label='qs Eout')
    p2, = ax.plot(query_num, E_out2, 'k', label='random Eout')
    plt.legend(loc='upper center',
               bbox_to_anchor=(0.5, -0.05),
               fancybox=True,
               shadow=True,
               ncol=5)
    plt.show(block=False)

    img_ax = fig.add_subplot(2, 1, 2)
    box = img_ax.get_position()
    img_ax.set_position(
        [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9])
    # Give each label its name (labels are from 0 to n_classes-1)
    lbr = InteractiveLabeler(label_name=[str(lbl) for lbl in range(n_classes)])

    for i in range(quota):
        ask_id = qs.make_query()
        print("asking sample from Uncertainty Sampling")
        # reshape the image to its width and height
        lb = lbr.label(trn_ds.data[ask_id][0].reshape(8, 8))
        trn_ds.update(ask_id, lb)
        model.train(trn_ds)
        E_out1 = np.append(E_out1, 1 - model.score(tst_ds))

        ask_id = qs2.make_query()
        print("asking sample from Random Sample")
        lb = lbr.label(trn_ds2.data[ask_id][0].reshape(8, 8))
        trn_ds2.update(ask_id, lb)
        model.train(trn_ds2)
        E_out2 = np.append(E_out2, 1 - model.score(tst_ds))

        ax.set_xlim((0, i + 1))
        ax.set_ylim((0, max(max(E_out1), max(E_out2)) + 0.2))
        query_num = np.arange(0, i + 2)
        p1.set_xdata(query_num)
        p1.set_ydata(E_out1)
        p2.set_xdata(query_num)
        p2.set_ydata(E_out2)

        plt.draw()

    input("Press any key to continue...")
def main(args):

    acc_reviewer, acc_train, acc_test = [], [], []

    trn_ds, tst_ds, y_train = split_train_test()

    # query strategy
    # https://libact.readthedocs.io/en/latest/libact.query_strategies.html
    # #libact-query-strategies-uncertainty-sampling-module
    qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression())

    # The passive learning model. The model given in the query strategy is not
    # the same. Have a look at this one.
    model = LogisticRegression()

    fig = plt.figure()
    ax = fig.add_subplot(2, 1, 1)
    ax.set_xlabel('Number of Queries')
    ax.set_ylabel('Error')

    oracle = y_train[get_indices_labeled_entries(trn_ds)]
    review = [label for feat, label in trn_ds.get_labeled_entries()]
    reviewer_acc = accuracy_score(oracle, review)

    # Train the model on the train dataset.
    # Append the score (error).
    model.train(trn_ds)
    acc_reviewer = np.append(acc_reviewer, reviewer_acc)
    acc_train = np.append(
        acc_train,
        model.model.score([x[0] for x in trn_ds.get_entries()], y_train))
    acc_test = np.append(acc_test, model.score(tst_ds))

    query_num = np.arange(0, 1)
    p0, = ax.plot(query_num, acc_reviewer, 'g', label='Acc reviewer')
    p1, = ax.plot(query_num, acc_reviewer, 'b', label='Acc train')
    p2, = ax.plot(query_num, acc_test, 'r', label='Acc test')

    plt.legend(loc='upper center',
               bbox_to_anchor=(0.5, -0.05),
               fancybox=True,
               shadow=True,
               ncol=5)
    plt.show(block=False)

    img_ax = fig.add_subplot(2, 1, 2)
    box = img_ax.get_position()
    img_ax.set_position(
        [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9])

    # Give each label its name (labels are from 0 to n_classes-1)
    lbr = InteractiveLabeler(label_name=["0", "1"])
    # lbr = InteractivePaperLabeler(label_name=["0", "1"])

    for i in range(args.quota):

        # make a query from the pool
        ask_id = qs.make_query()
        print("asking sample from Uncertainty Sampling")

        # reshape the image to its width and height
        data_point = trn_ds.data[ask_id][0].reshape(8, 8)
        lb = lbr.label(data_point)

        # update the label in the train dataset
        trn_ds.update(ask_id, lb)

        # train the model again
        model.train(trn_ds)

        # compute accuracy of the reviewer
        oracle = y_train[get_indices_labeled_entries(trn_ds)]
        review = [label for feat, label in trn_ds.get_labeled_entries()]
        reviewer_acc = accuracy_score(oracle, review)

        # append the score to the model
        acc_reviewer = np.append(acc_reviewer, reviewer_acc)
        acc_train = np.append(
            acc_train,
            model.model.score([x[0] for x in trn_ds.get_entries()], y_train))
        acc_test = np.append(acc_test, model.score(tst_ds))

        # adjust the limits of the axes
        ax.set_xlim((0, i + 1))
        ax.set_ylim((0, max(acc_test) + 0.2))

        query_num = np.arange(0, i + 2)
        p0.set_xdata(query_num)
        p0.set_ydata(acc_reviewer)
        p1.set_xdata(query_num)
        p1.set_ydata(acc_train)
        p2.set_xdata(query_num)
        p2.set_ydata(acc_test)

        plt.draw()

    input("Press any key to continue...")
Exemple #7
0
def main():
    global pos_filepath, dataset_filepath, csv_filepath, vectors_list, ids_list
    dataset_filepath = "/Users/dndesign/Desktop/active_learning/vecteurs_et_infos/vectors_2015.txt"
    csv_filepath = "/Users/dndesign/Desktop/active_learning/donnees/corpus_2015_id-time-text.csv"
    pos_filepath = "/Users/dndesign/Desktop/active_learning/donnees/oriane_pos_id-time-text.csv"
    vectors_list, ids_list = get_vectors_list(dataset_filepath)

    timestr = time.strftime("%Y%m%d_%H%M%S")
    text_file = codecs.open("task_" + str(timestr) + ".txt", "w", "utf-8")

    print("Loading data...")
    text_file.write("Loading data...\n")
    # Open this file
    t0 = time.time()
    file = openfile_txt(dataset_filepath)
    num_lines = sum(1 for line in file)
    print("Treating " + str(num_lines) + " entries...")
    text_file.write("Treating : %s entries...\n" % str(num_lines))

    # Number of queries to ask human to label
    quota = 10
    E_out1, E_out2, E_out3, E_out4, E_out5, E_out6, E_out7 = [], [], [], [], [], [], []
    trn_ds, tst_ds = split_train_test(csv_filepath)

    # model = SVM(kernel='linear')
    model = LogisticRegression()
    ''' UncertaintySampling (Least Confident)
     
        UncertaintySampling : it queries the instances about which 
        it is least certain how to label
        
        Least Confident : it queries the instance whose posterior 
        probability of being positive is nearest 0.5
    '''
    qs = UncertaintySampling(trn_ds,
                             method='lc',
                             model=LogisticRegression(C=.01))
    model.train(trn_ds)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out1 = np.append(E_out1, score)
    E_out1 = np.append(E_out1, 1 - model.score(tst_ds))
    # E_out1 = np.append(E_out1, model.score(tst_ds))
    ''' UncertaintySampling (Smallest Margin) 
    
        Smallest Margin : it queries the instance whose posterior
        probability gap between the most and the second probable labels is
        minimal
    '''
    trn_ds2 = copy.deepcopy(trn_ds)
    qs2 = UncertaintySampling(trn_ds2,
                              method='sm',
                              model=LogisticRegression(C=.01))
    model.train(trn_ds2)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out2 = np.append(E_out2, score)
    E_out2 = np.append(E_out2, 1 - model.score(tst_ds))
    # E_out2 = np.append(E_out2, model.score(tst_ds))
    ''' UncertaintySampling (Entropy) 
    
        Entropy : it reduces to the margin and least confident strategies
        
        NB : We notice that all those three strategies are equivalent for binary classification
    '''
    trn_ds3 = copy.deepcopy(trn_ds)
    qs3 = UncertaintySampling(trn_ds3,
                              method='entropy',
                              model=LogisticRegression(C=.01))
    model.train(trn_ds3)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out3 = np.append(E_out3, score)
    E_out3 = np.append(E_out3, 1 - model.score(tst_ds))
    # E_out3 = np.append(E_out3, model.score(tst_ds))
    ''' Random Sampling
    
        Random : it chooses randomly a query
    '''
    trn_ds4 = copy.deepcopy(trn_ds)
    qs4 = RandomSampling(trn_ds4, random_state=1126)
    model.train(trn_ds4)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out4 = np.append(E_out4, score)
    E_out4 = np.append(E_out4, 1 - model.score(tst_ds))
    # E_out4 = np.append(E_out4, model.score(tst_ds))
    ''' QUIRE
        
    '''
    trn_ds5 = copy.deepcopy(trn_ds)
    # qs5 = QUIRE(trn_ds5, kernel='linear')
    qs5 = QUIRE(trn_ds5)
    model.train(trn_ds5)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out5 = np.append(E_out5, score)
    E_out5 = np.append(E_out5, 1 - model.score(tst_ds))
    # E_out5 = np.append(E_out5, model.score(tst_ds))
    ''' QueryByCommittee (Vote Entropy)
    
        QueryByCommittee : it keeps a committee of classifiers and queries 
        the instance that the committee members disagree, it  also examines 
        unlabeled examples and selects only those that are most informative 
        for labeling
        
        Vote Entropy : a way of measuring disagreement 
        
        Disadvantage : it does not consider the committee members’ class 
        distributions. It also misses some informative unlabeled examples 
        to label 
    '''
    trn_ds6 = copy.deepcopy(trn_ds)
    qs6 = QueryByCommittee(trn_ds6,
                           disagreement='vote',
                           models=[
                               LogisticRegression(C=1.0),
                               LogisticRegression(C=0.01),
                               LogisticRegression(C=100)
                           ],
                           random_state=1126)
    model.train(trn_ds6)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out6 = np.append(E_out6, score)
    E_out6 = np.append(E_out6, 1 - model.score(tst_ds))
    # E_out6 = np.append(E_out6, model.score(tst_ds))
    ''' QueryByCommittee (Kullback-Leibler Divergence)
    
            QueryByCommittee : it examines unlabeled examples and selects only 
            those that are most informative for labeling
            
            Disadvantage :  it misses some examples on which committee members 
            disagree
    '''
    trn_ds7 = copy.deepcopy(trn_ds)
    qs7 = QueryByCommittee(trn_ds7,
                           disagreement='kl_divergence',
                           models=[
                               LogisticRegression(C=1.0),
                               LogisticRegression(C=0.01),
                               LogisticRegression(C=100)
                           ],
                           random_state=1126)
    model.train(trn_ds7)
    # preds = model.predict(tst_ds.format_sklearn()[0])
    # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
    # E_out7 = np.append(E_out7, score)
    E_out7 = np.append(E_out7, 1 - model.score(tst_ds))
    # E_out7 = np.append(E_out7, model.score(tst_ds))

    # HintSVM
    ''' trn_ds8 = copy.deepcopy(trn_ds)
    qs8 = HintSVM(trn_ds8, random_state=1126) 
    model.train(trn_ds8)
    E_out8 = np.append(E_out8, 1 - model.score(tst_ds))'''

    with sns.axes_style("darkgrid"):
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)

    query_num = np.arange(0, 1)
    p1, = ax.plot(query_num, E_out1, 'red')
    p2, = ax.plot(query_num, E_out2, 'blue')
    p3, = ax.plot(query_num, E_out3, 'green')
    p4, = ax.plot(query_num, E_out4, 'orange')
    p5, = ax.plot(query_num, E_out5, 'yellow')
    p6, = ax.plot(query_num, E_out6, 'black')
    p7, = ax.plot(query_num, E_out7, 'purple')
    plt.legend(('Least Confident', 'Smallest Margin', 'Entropy',
                'Random Sampling', 'QUIRE', 'Vote Entropy', 'KL Divergence'),
               loc=1)
    # plt.legend(('Least Confident', 'Smallest Margin', 'Entropy', 'Random Sampling', 'Vote Entropy', 'KL Divergence'), loc=4)
    plt.ylabel('Accuracy')
    plt.xlabel('Number of Queries')
    plt.title('Active Learning - Query choice strategies')
    plt.ylim([0, 1])
    plt.show(block=False)

    for i in range(quota):
        print("\n#################################################")
        print("Query number " + str(i) + " : ")
        print("#################################################\n")
        text_file.write(
            "\n#################################################\n")
        text_file.write("Query number %s : " % str(i))
        text_file.write(
            "\n#################################################\n")

        ask_id = qs.make_query()
        print("\033[4mUsing Uncertainty Sampling (Least confident) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Least confident) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out1 = np.append(E_out1, score)
        E_out1 = np.append(E_out1, 1 - model.score(tst_ds))
        # E_out1 = np.append(E_out1, model.score(tst_ds))

        ask_id = qs2.make_query()
        print("\033[4mUsing Uncertainty Sampling (Smallest Margin) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Smallest Margin) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds2.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds2)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out2 = np.append(E_out2, score)
        E_out2 = np.append(E_out2, 1 - model.score(tst_ds))
        # E_out2 = np.append(E_out2, model.score(tst_ds))

        ask_id = qs3.make_query()
        print("\033[4mUsing Uncertainty Sampling (Entropy) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Entropy) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds3.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds3)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out3 = np.append(E_out3, score)
        E_out3 = np.append(E_out3, 1 - model.score(tst_ds))
        # E_out3 = np.append(E_out3, model.score(tst_ds))

        ask_id = qs4.make_query()
        print("\033[4mUsing Random Sampling :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Random Sampling :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds4.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds4)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out4 = np.append(E_out4, score)
        E_out4 = np.append(E_out4, 1 - model.score(tst_ds))
        # E_out4 = np.append(E_out4, model.score(tst_ds))

        ask_id = qs5.make_query()
        print("\033[4mUsing QUIRE :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using QUIRE :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds5.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds5)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out5 = np.append(E_out5, score)
        E_out5 = np.append(E_out5, 1 - model.score(tst_ds))
        # E_out5 = np.append(E_out5, model.score(tst_ds))

        ask_id = qs6.make_query()
        print("\033[4mUsing QueryByCommittee (Vote Entropy) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using QueryByCommittee (Vote Entropy) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds6.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds6)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out6 = np.append(E_out6, score)
        E_out6 = np.append(E_out6, 1 - model.score(tst_ds))
        # E_out6 = np.append(E_out6, model.score(tst_ds))

        ask_id = qs7.make_query()
        print("\033[4mUsing QueryByCommittee (KL Divergence) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " +
              str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using QueryByCommittee (KL Divergence) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" %
                        str(simulate_human_decision(ask_id)))
        trn_ds7.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds7)
        # preds = model.predict(tst_ds.format_sklearn()[0])
        # score = accuracy_score(tst_ds.format_sklearn()[1], preds)
        # E_out7 = np.append(E_out7, score)
        E_out7 = np.append(E_out7, 1 - model.score(tst_ds))
        # E_out7 = np.append(E_out7, model.score(tst_ds))

        ax.set_xlim((0, i + 1))
        ax.set_ylim((0,
                     max(max(E_out1), max(E_out2), max(E_out3), max(E_out4),
                         max(E_out5), max(E_out6), max(E_out7)) + 0.2))
        # ax.set_ylim((0, max(max(E_out1), max(E_out2), max(E_out3), max(E_out4), max(E_out6), max(E_out7)) + 0.2))
        query_num = np.arange(0, i + 2)
        p1.set_xdata(query_num)
        p1.set_ydata(E_out1)
        p2.set_xdata(query_num)
        p2.set_ydata(E_out2)
        p3.set_xdata(query_num)
        p3.set_ydata(E_out3)
        p4.set_xdata(query_num)
        p4.set_ydata(E_out4)
        p5.set_xdata(query_num)
        p5.set_ydata(E_out5)
        p6.set_xdata(query_num)
        p6.set_ydata(E_out6)
        p7.set_xdata(query_num)
        p7.set_ydata(E_out7)

        plt.draw()

    t2 = time.time()
    time_total = t2 - t0
    print("\n\n\n#################################################\n")
    print("Execution time : %fs \n\n" % time_total)
    text_file.write(
        "\n\n\n#################################################\n")
    text_file.write("Execution time : %fs \n" % time_total)
    text_file.close()
    input("Press any key to save the plot...")
    plt.savefig('task_' + str(timestr) + '.png')

    print("Done")