Python SVM.train Examples

Programming Language: Python

Namespace/Package Name: libact.models

Class/Type: SVM

Method/Function: train

Examples at hotexamples.com: 5

Python SVM.train - 5 examples found. These are the top rated real world Python examples of libact.models.SVM.train extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SVM(16)

train(4)

score(2)

predict(1)

Frequently Used Methods

SVM (16)

train (4)

score (2)

predict (1)

Example #1

Show file

    def test_svm(self):
        svc_clf = SVC(gamma="auto")
        svc_clf.fit(self.X_train, self.y_train)
        svm = SVM(gamma="auto")
        svm.train(Dataset(self.X_train, self.y_train))

        assert_array_equal(svc_clf.predict(self.X_train),
                           svm.predict(self.X_train))
        assert_array_equal(svc_clf.predict(self.X_test),
                           svm.predict(self.X_test))
        self.assertEqual(svc_clf.score(self.X_train, self.y_train),
                         svm.score(Dataset(self.X_train, self.y_train)))
        self.assertEqual(svc_clf.score(self.X_test, self.y_test),
                         svm.score(Dataset(self.X_test, self.y_test)))

Example #2

Show file

File: test_svm.py Project: Edgar324/libact

    def test_svm(self):
        svc_clf = SVC()
        svc_clf.fit(self.X_train, self.y_train)
        svm = SVM()
        svm.train(Dataset(self.X_train, self.y_train))

        assert_array_equal(
            svc_clf.predict(self.X_train), svm.predict(self.X_train))
        assert_array_equal(
            svc_clf.predict(self.X_test), svm.predict(self.X_test))
        self.assertEqual(
            svc_clf.score(self.X_train, self.y_train),
            svm.score(Dataset(self.X_train, self.y_train)))
        self.assertEqual(
            svc_clf.score(self.X_test, self.y_test),
            svm.score(Dataset(self.X_test, self.y_test)))

Example #3

Show file

def main():
    global pos_filepath, dataset_filepath, csv_filepath, vectors_list, ids_list
    dataset_filepath = "/Users/dndesign/Desktop/active_learning/vecteurs_et_infos/vectors_2015.txt"
    csv_filepath = "/Users/dndesign/Desktop/active_learning/donnees/corpus_2015_id-time-text.csv"
    pos_filepath = "/Users/dndesign/Desktop/active_learning/donnees/oriane_pos_id-time-text.csv"
    vectors_list, ids_list = get_vectors_list(dataset_filepath)

    timestr = time.strftime("%Y%m%d_%H%M%S")
    text_file = codecs.open("task_" + str(timestr) + ".txt", "w", "utf-8")

    print("Loading data...")
    text_file.write("Loading data...\n")
    # Open this file
    t0 = time.time()
    file = openfile_txt(dataset_filepath)
    num_lines = sum(1 for line in file)
    print("Treating " + str(num_lines) + " entries...")
    text_file.write("Treating : %s entries...\n" % str(num_lines))

    # Number of queries to ask human to label
    quota = 10
    E_out1, E_out2, E_out3, E_out4, E_out6, E_out7 = [], [], [], [], [], []
    trn_ds, tst_ds = split_train_test(csv_filepath)

    model = SVM(kernel='linear')
    # model = LogisticRegression()

    ''' UncertaintySampling (Least Confident)
     
        UncertaintySampling : it queries the instances about which 
        it is least certain how to label
        
        Least Confident : it queries the instance whose posterior 
        probability of being positive is nearest 0.5
    '''
    qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression(C=.01))
    model.train(trn_ds)
    E_out1 = np.append(E_out1, 1 - model.score(tst_ds))

    ''' UncertaintySampling (Max Margin) 

    '''
    trn_ds2 = copy.deepcopy(trn_ds)
    qs2 = USampling(trn_ds2, method='mm', model=SVM(kernel='linear'))
    model.train(trn_ds2)
    E_out2 = np.append(E_out2, 1 - model.score(tst_ds))

    ''' CMB Sampling   
        Combination of active learning algorithms (distance-based (DIST), diversity-based (DIV)) 
    '''
    trn_ds3 = copy.deepcopy(trn_ds)
    qs3 = CMBSampling(trn_ds3, model=SVM(kernel='linear'))
    model.train(trn_ds3)
    E_out3 = np.append(E_out3, 1 - model.score(tst_ds))

    ''' Random Sampling   
        Random : it chooses randomly a query
    '''
    trn_ds4 = copy.deepcopy(trn_ds)
    qs4 = RandomSampling(trn_ds4, random_state=1126)
    model.train(trn_ds4)
    E_out4 = np.append(E_out4, 1 - model.score(tst_ds))

    ''' QueryByCommittee (Vote Entropy)
    
        QueryByCommittee : it keeps a committee of classifiers and queries 
        the instance that the committee members disagree, it  also examines 
        unlabeled examples and selects only those that are most informative 
        for labeling
        
        Vote Entropy : a way of measuring disagreement 
        
        Disadvantage : it does not consider the committee members’ class 
        distributions. It also misses some informative unlabeled examples 
        to label 
    '''
    trn_ds6 = copy.deepcopy(trn_ds)
    qs6 = QueryByCommittee(trn_ds6, disagreement='vote',
                              models=[LogisticRegression(C=1.0),
                                      LogisticRegression(C=0.01),
                                      LogisticRegression(C=100)],
                              random_state=1126)
    model.train(trn_ds6)
    E_out6 = np.append(E_out6, 1 - model.score(tst_ds))

    ''' QueryByCommittee (Kullback-Leibler Divergence)
    
            QueryByCommittee : it examines unlabeled examples and selects only 
            those that are most informative for labeling
            
            Disadvantage :  it misses some examples on which committee members 
            disagree
    '''
    trn_ds7 = copy.deepcopy(trn_ds)
    qs7 = QueryByCommittee(trn_ds7, disagreement='kl_divergence',
                                  models=[LogisticRegression(C=1.0),
                                          LogisticRegression(C=0.01),
                                          LogisticRegression(C=100)],
                                  random_state=1126)
    model.train(trn_ds7)
    E_out7 = np.append(E_out7, 1 - model.score(tst_ds))

    with sns.axes_style("darkgrid"):
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)

    query_num = np.arange(0, 1)
    p1, = ax.plot(query_num, E_out1, 'red')
    p2, = ax.plot(query_num, E_out2, 'blue')
    p3, = ax.plot(query_num, E_out3, 'green')
    p4, = ax.plot(query_num, E_out4, 'orange')
    p6, = ax.plot(query_num, E_out6, 'black')
    p7, = ax.plot(query_num, E_out7, 'purple')
    plt.legend(('Least Confident', 'Max Margin', 'Distance Diversity CMB', 'Random Sampling', 'Vote Entropy', 'KL Divergence'), loc=1)
    plt.ylabel('Accuracy')
    plt.xlabel('Number of Queries')
    plt.title('Active Learning - Query choice strategies')
    plt.ylim([0, 1])
    plt.show(block=False)

    for i in range(quota):
        print("\n#################################################")
        print("Query number " + str(i) + " : ")
        print("#################################################\n")
        text_file.write("\n#################################################\n")
        text_file.write("Query number %s : " % str(i))
        text_file.write("\n#################################################\n")

        ask_id = qs.make_query()
        print("\033[4mUsing Uncertainty Sampling (Least confident) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Least confident) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds)
        E_out1 = np.append(E_out1, 1 - model.score(tst_ds))

        ask_id = qs2.make_query()
        print("\033[4mUsing Uncertainty Sampling (Max Margin) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Smallest Margin) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds2.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds2)
        E_out2 = np.append(E_out2, 1 - model.score(tst_ds))

        ask_id = qs3.make_query()
        print("\033[4mUsing CMB Distance-Diversity Sampling :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Uncertainty Sampling (Entropy) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds3.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds3)
        E_out3 = np.append(E_out3, 1 - model.score(tst_ds))

        ask_id = qs4.make_query()
        print("\033[4mUsing Random Sampling :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using Random Sampling :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds4.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds4)
        E_out4 = np.append(E_out4, 1 - model.score(tst_ds))

        ask_id = qs6.make_query()
        print("\033[4mUsing QueryByCommittee (Vote Entropy) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using QueryByCommittee (Vote Entropy) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds6.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds6)
        E_out6 = np.append(E_out6, 1 - model.score(tst_ds))

        ask_id = qs7.make_query()
        print("\033[4mUsing QueryByCommittee (KL Divergence) :\033[0m")
        print("Tweet :" + define_tweet_by_id(ask_id), end='', flush=True)
        print("Simulating human response : " + str(simulate_human_decision(ask_id)) + " \n")
        text_file.write("Using QueryByCommittee (KL Divergence) :\n")
        text_file.write("Tweet : %s \n" % str(define_tweet_by_id(ask_id)))
        text_file.write("Simulating human response : %s \n\n" % str(simulate_human_decision(ask_id)))
        trn_ds7.update(ask_id, simulate_human_decision(ask_id))
        model.train(trn_ds7)
        E_out7 = np.append(E_out7, 1 - model.score(tst_ds))

        ax.set_xlim((0, i + 1))
        ax.set_ylim((0, max(max(E_out1), max(E_out2), max(E_out3), max(E_out4), max(E_out6), max(E_out7)) + 0.2))
        query_num = np.arange(0, i + 2)
        p1.set_xdata(query_num)
        p1.set_ydata(E_out1)
        p2.set_xdata(query_num)
        p2.set_ydata(E_out2)
        p3.set_xdata(query_num)
        p3.set_ydata(E_out3)
        p4.set_xdata(query_num)
        p4.set_ydata(E_out4)
        p6.set_xdata(query_num)
        p6.set_ydata(E_out6)
        p7.set_xdata(query_num)
        p7.set_ydata(E_out7)

        plt.draw()

    t2 = time.time()
    time_total = t2 - t0
    print("\n\n\n#################################################\n")
    print("Execution time : %fs \n\n" % time_total)
    text_file.write("\n\n\n#################################################\n")
    text_file.write("Execution time : %fs \n" % time_total)
    text_file.close()
    input("Press any key to save the plot...")
    plt.savefig('task_' + str(timestr) + '.png')

    print("Done")

Example #4

Show file

def main():
    X_train, y_train = load_data(DATA_TRAIN)
    X_test, y_test = load_data(DATA_TEST)
    X_all, y_all = load_data(DATA_ALL)

    trn_ds_eal = make_active_learning_dataset(len(y_train), X_all, y_all)
    trn_ds_al = copy.deepcopy(trn_ds_eal)
    trn_ds_pl = copy.deepcopy(trn_ds_eal)
    svm_model = SVM(kernel=KERNEL, probability=True)

    trn_datasets = [trn_ds_al, trn_ds_eal, trn_ds_pl]
    accs_list = [[], [], []]
    mccs_list = [[], [], []]

    for strategy in STRATEGIES:
        trn_ds = trn_datasets[strategy]
        svm_model.train(trn_ds)
        acc, mcc = compute_acc_mcc(svm_model.model, X_test, y_test)
        accs_list[strategy].append(acc)
        mccs_list[strategy].append(mcc)

    for i in range(ROUNDS):
        for strategy in STRATEGIES:
            trn_ds = trn_datasets[strategy]
            svm_model.train(trn_ds)
            pool_indices, X_pool = zip(*trn_ds.get_unlabeled_entries())
            pool_indices = list(pool_indices)
            certainties = get_certainties(svm_model.model, X_pool)

            if strategy == AL:
                query_indices = select_batch(1, pool_indices, X_pool,
                                             certainties, "q-best")
                query_index = query_indices[0]
                x1, x2 = X_all[query_index]

            elif strategy == EAL:
                query_indices = select_batch(CANDIDATES, pool_indices, X_pool,
                                             certainties, "k-means-uncertain")
                query_indices_q2_q4 = []
                for q in query_indices:
                    x1, x2 = X_all[q]
                    if quadrant(x1, x2) in ["Q2", "Q4"]:
                        query_indices_q2_q4.append(q)

                if query_indices_q2_q4:
                    query_indices = query_indices_q2_q4

                query_index = query_indices[randint(0, len(query_indices) - 1)]

            elif strategy == PL:
                query_index = choice(pool_indices)
                x1, x2 = X_all[query_index]

            trn_ds.update(query_index, y_all[query_index])
            svm_model.train(trn_ds)
            acc, mcc = compute_acc_mcc(svm_model.model, X_test, y_test)
            accs_list[strategy].append(acc)
            mccs_list[strategy].append(mcc)

    for strategy in STRATEGIES:
        strategy_name = STRATEGIY_NAMES[strategy]
        accs_list[strategy] = map(lambda x: pretty_float(x),
                                  accs_list[strategy])
        mccs_list[strategy] = map(lambda x: pretty_float(x),
                                  mccs_list[strategy])
        print "{0}_ACC,".format(strategy_name) + ",".join(accs_list[strategy])
        print "{0}_MCC,".format(strategy_name) + ",".join(mccs_list[strategy])

Example #5

Show file

File: drp_eal.py Project: darkreactions/explaining-active-learning-queries

def run_active_learning():
    logger = SimpleLogger(LOG_FILE)
    dm = DataManager()
    im = InterpretableDataManager()
    drp_model = SVM(kernel=KERNEL, probability=True)
    lime_model = svm.SVC(kernel=KERNEL, probability=True)
    accs = [[], [], []]
    mccs = [[], [], []]

    labeled_indices = dm.get_labeled_indices()
    logger.log(0, labeled_indices)

    for strategy in STRATEGIES:
        trn_ds = dm.trn_ds_list[strategy]
        drp_model.train(trn_ds)
        update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy)

    print_last_round_mcc(0, accs, mccs)
    assert (AL_ROUNDS <= len(dm.y_train) - INITIAL_INSTANCES)

    for round in xrange(1, AL_ROUNDS + 1):
        print "================================================="
        print "Round", round
        print "================================================="
        for strategy in STRATEGIES:
            trn_ds = dm.trn_ds_list[strategy]
            exclusion = set()
            batch = set()

            unlabeled_indices, unlabeled_X_scaled = zip(
                *trn_ds.get_unlabeled_entries())
            certainties = get_certainties(drp_model.model, dm.X_train_scaled)
            if strategy == EAL:
                threshold = get_certainty_threshold(drp_model.model,
                                                    dm.X_train_scaled,
                                                    THRESHOLD)
                y_certainty = discretize_certainties(certainties, threshold)

                lime_model.fit(dm.X_train_scaled_e, y_certainty)
                if SHOW_LIME:
                    certainties_test = get_certainties(drp_model.model,
                                                       dm.X_test_scaled)
                    y_certainty_test = discretize_certainties(
                        certainties_test, threshold)
                    print_lime_model_performance(lime_model, dm,
                                                 y_certainty_test)

                while (len(batch) < BATCH_SIZE):
                    query_id = query_least_confident(unlabeled_indices,
                                                     certainties, exclusion)
                    query = dm.X_train_scaled[query_id]
                    query_unscaled = dm.X_train_e[query_id]
                    instance_certainty = get_certainty(drp_model.model, query)
                    print "Explaining Query with id #{:d}".format(query_id)
                    print "Certainty {:.3f}".format(instance_certainty)

                    explainer = LimeTabularExplainer(
                        dm.X_train_e,
                        training_labels=y_certainty,
                        feature_names=dm.feature_names_e,
                        class_names=["uncertain", "certain"],
                        discretize_continuous=True,
                        discretizer="entropy")

                    predict_fn = lambda x: lime_model.predict_proba(
                        dm.scaler_e.transform(x)).astype(float)

                    for i in xrange(0, MAX_EXP_FEATURE, 2):
                        exp = explainer.explain_instance(
                            query_unscaled,
                            predict_fn,
                            num_features=NUM_FEATURES + i)
                        uncertain_exp_list = get_uncertain_exps(exp)
                        if (len(uncertain_exp_list) >= NUM_FEATURES - 2):
                            break
                        print "INFO: looping"

                    if SHOW_LIME:
                        print_lime_model_prediction(predict_fn, query_unscaled)

                    exp_indices = get_indices_exp_region(
                        exp, dm, unlabeled_indices, y_certainty)
                    exp_instances = get_values_of_indices(
                        exp_indices, dm.X_train_scaled)
                    exp_certainties = get_values_of_indices(
                        exp_indices, certainties)
                    batch_indices = select_batch(
                        min(BATCH_SIZE, BATCH_SIZE - len(batch)), exp_indices,
                        exp_instances, exp_certainties, "k-means-uncertain")

                    if len(batch_indices) == 0:
                        exclusion.add(query_id)
                        continue

                    print ""
                    print_explanation_drp(uncertain_exp_list, False)
                    print ""
                    print "Instances in the batch: {}".format(
                        len(batch_indices))
                    im.describe_instances(batch_indices)
                    print ""
                    im.describe_instance(query_id)
                    print ""

                    exclusion.update(set(exp_indices))
                    if ask_expert():
                        batch.update(set(batch_indices))
                    else:
                        print "INFO: Not including in the batch"

                logger.log(round, batch)
                print "INFO: Labeling the batch"
                label_batch(trn_ds, dm.y_train, batch)

            elif strategy == AL:  # AL + k-means-uncertain
                unlabeled_X_scaled = get_values_of_indices(
                    unlabeled_indices, dm.X_train_scaled)
                unlabeled_certainties = get_values_of_indices(
                    unlabeled_indices, certainties)
                batch_indices = select_batch(BATCH_SIZE, unlabeled_indices,
                                             unlabeled_X_scaled,
                                             unlabeled_certainties,
                                             "k-means-uncertain")
                label_batch(trn_ds, dm.y_train, batch_indices)

            elif strategy == PL:  # Passive Learning
                batch_indices = random.sample(unlabeled_indices, BATCH_SIZE)
                label_batch(trn_ds, dm.y_train, batch_indices)

            drp_model.train(trn_ds)
            update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy)

    print_mcc_summary(mccs)