Beispiel #1
0
def QBoost(X_train, y_train, X_test, y_test):
    NUM_READS = 1000
    DW_PARAMS = {'num_reads': NUM_READS,
                 'auto_scale': True,
                 'num_spin_reversal_transforms': 10,
                 'postprocess': 'optimization',
                 }

    from dwave.system.samplers import DWaveSampler
    from dwave.system.composites import EmbeddingComposite

    dwave_sampler = DWaveSampler(solver={'qpu': True}) 
    emb_sampler = EmbeddingComposite(dwave_sampler)

    from qboost import WeakClassifiers, QBoostClassifier

    clf4 = QBoostClassifier(n_estimators=30, max_depth=2)
    clf4.fit(X_train, y_train, emb_sampler, lmd=1.0, **DW_PARAMS)
    y_train4 = clf4.predict(X_train)
    y_test4 = clf4.predict(X_test)

    from sklearn.metrics import accuracy_score

    print('Accuracy for training data: \t', (accuracy_score(y_train, y_train4)))
    print('Accuracy for test data: \t', (accuracy_score(y_test, y_test4)))
    
    return clf4
Beispiel #2
0
def train_model(X_train, y_train, X_test, y_test, lmd):
    """
    Train qboost model

    :param X_train: train input
    :param y_train: train label
    :param X_test: test input
    :param y_test: test label
    :param lmd: lmbda to control regularization term
    :return:
    """
    NUM_READS = 3000
    NUM_WEAK_CLASSIFIERS = 35
    # lmd = 0.5
    TREE_DEPTH = 3

    # define sampler
    dwave_sampler = DWaveSampler(solver={'qpu': True})
    # sa_sampler = micro.dimod.SimulatedAnnealingSampler()
    emb_sampler = EmbeddingComposite(dwave_sampler)

    N_train = len(X_train)
    N_test = len(X_test)

    print("\n======================================")
    print("Train#: %d, Test: %d" % (N_train, N_test))
    print('Num weak classifiers:', NUM_WEAK_CLASSIFIERS)
    print('Tree depth:', TREE_DEPTH)

    # input: dataset X and labels y (in {+1, -1}

    # Preprocessing data
    # imputer = SimpleImputer()
    scaler = preprocessing.StandardScaler()  # standardize features
    normalizer = preprocessing.Normalizer()  # normalize samples

    # X = imputer.fit_transform(X)
    X_train = scaler.fit_transform(X_train)
    X_train = normalizer.fit_transform(X_train)

    # X_test = imputer.fit_transform(X_test)
    X_test = scaler.fit_transform(X_test)
    X_test = normalizer.fit_transform(X_test)

    ## Adaboost
    print('\nAdaboost')

    clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS)

    # scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
    print('fitting...')
    clf.fit(X_train, y_train)

    hypotheses_ada = clf.estimators_
    # clf.estimator_weights_ = np.random.uniform(0,1,size=NUM_WEAK_CLASSIFIERS)
    print('testing...')
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    print('accu (train): %5.2f' % (metric(y_train, y_train_pred)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_pred)))

    # Ensembles of Decision Tree
    print('\nDecision tree')

    clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS,
                           max_depth=TREE_DEPTH)
    clf2.fit(X_train, y_train)

    y_train_pred2 = clf2.predict(X_train)
    y_test_pred2 = clf2.predict(X_test)
    print(clf2.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train_pred2)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_pred2)))

    # Ensembles of Decision Tree
    print('\nQBoost')

    DW_PARAMS = {
        'num_reads': NUM_READS,
        'auto_scale': True,
        # "answer_mode": "histogram",
        'num_spin_reversal_transforms': 10,
        # 'annealing_time': 10,
        'postprocess': 'optimization',
    }

    clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS,
                            max_depth=TREE_DEPTH)
    clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS)

    y_train_dw = clf3.predict(X_train)
    y_test_dw = clf3.predict(X_test)

    print(clf3.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train_dw)))
    print('accu (test): %5.2f' % (metric(y_test, y_test_dw)))

    # Ensembles of Decision Tree
    print('\nQBoostPlus')
    clf4 = QboostPlus([clf, clf2, clf3])
    clf4.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS)
    y_train4 = clf4.predict(X_train)
    y_test4 = clf4.predict(X_test)
    print(clf4.estimator_weights)

    print('accu (train): %5.2f' % (metric(y_train, y_train4)))
    print('accu (test): %5.2f' % (metric(y_test, y_test4)))

    print("=============================================")
    print("Method \t Adaboost \t DecisionTree \t Qboost \t QboostIt")
    print("Train\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" %
          (metric(y_train, y_train_pred), metric(y_train, y_train_pred2),
           metric(y_train, y_train_dw), metric(y_train, y_train4)))
    print("Test\t %5.2f \t\t %5.2f \t\t\t %5.2f \t\t %5.2f" %
          (metric(y_test, y_test_pred), metric(y_test, y_test_pred2),
           metric(y_test, y_test_dw), metric(y_test, y_test4)))
    print("=============================================")

    # plt.subplot(211)
    # plt.bar(range(len(y_test)), y_test)
    # plt.subplot(212)
    # plt.bar(range(len(y_test)), y_test_dw)
    # plt.show()

    return
Beispiel #3
0
        X, y = make_blob_data(
            n_samples=n_samples, n_features=n_features, n_informative=n_informative)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.4)

        if args.cross_validation:
            # See Boyda et al. (2017), Eq. (17) regarding normalization
            normalized_lambdas = np.linspace(0.0, 0.5, 10)
            lambdas = normalized_lambdas / n_features
            print('Performing cross-validation using {} values of lambda, this may take several minutes...'.format(len(lambdas)))
            qboost, lam = qboost_lambda_sweep(
                X_train, y_train, lambdas, verbose=args.verbose)
        else:
            qboost = QBoostClassifier(X_train, y_train, args.lam)

        if args.verbose:
            qboost.report_baseline(X_test, y_test)

        print('Informative features:', list(range(n_informative)))
        print('Selected features:', qboost.get_selected_features())

        print('Score on test set: {:.3f}'.format(qboost.score(X_test, y_test)))

    elif args.dataset == 'digits':
        if args.digit1 == args.digit2:
            raise ValueError("must use two different digits")

        X, y = get_handwritten_digits_data(args.digit1, args.digit2)
        n_features = np.size(X, 1)
Beispiel #4
0
 def setUpClass(cls):
     cls.X, cls.y = make_blob_data()
     cls.clf = QBoostClassifier(cls.X, cls.y, 0.0)
Beispiel #5
0
def train_models(X_train, y_train, X_test, y_test, lmd, verbose=False):
    NUM_READS = 3000
    NUM_WEAK_CLASSIFIERS = 35
    # lmd = 0.5
    TREE_DEPTH = 3

    # define sampler
    dwave_sampler = DWaveSampler()
    emb_sampler = EmbeddingComposite(dwave_sampler)

    N_train = len(X_train)
    N_test = len(X_test)

    print('Size of training set:', N_train)
    print('Size of test set:    ', N_test)
    print('Number of weak classifiers:', NUM_WEAK_CLASSIFIERS)
    print('Tree depth:', TREE_DEPTH)

    # input: dataset X and labels y (in {+1, -1}

    # Preprocessing data
    scaler = preprocessing.StandardScaler()  # standardize features
    normalizer = preprocessing.Normalizer()  # normalize samples

    X_train = scaler.fit_transform(X_train)
    X_train = normalizer.fit_transform(X_train)

    X_test = scaler.fit_transform(X_test)
    X_test = normalizer.fit_transform(X_test)

    # ===============================================
    print('\nAdaboost:')

    clf = AdaBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS)

    clf.fit(X_train, y_train)

    hypotheses_ada = clf.estimators_
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)

    print_accuracy(y_train, y_train_pred, y_test, y_test_pred)

    # ===============================================
    print('\nDecision tree:')

    clf2 = WeakClassifiers(n_estimators=NUM_WEAK_CLASSIFIERS,
                           max_depth=TREE_DEPTH)
    clf2.fit(X_train, y_train)

    y_train_pred2 = clf2.predict(X_train)
    y_test_pred2 = clf2.predict(X_test)

    if verbose:
        print('weights:\n', clf2.estimator_weights)

    print_accuracy(y_train, y_train_pred2, y_test, y_test_pred2)

    # ===============================================
    print('\nQBoost:')

    DW_PARAMS = {
        'num_reads': NUM_READS,
        'auto_scale': True,
        'num_spin_reversal_transforms': 10,
    }

    clf3 = QBoostClassifier(n_estimators=NUM_WEAK_CLASSIFIERS,
                            max_depth=TREE_DEPTH)
    clf3.fit(X_train, y_train, emb_sampler, lmd=lmd, **DW_PARAMS)

    y_train_dw = clf3.predict(X_train)
    y_test_dw = clf3.predict(X_test)

    if verbose:
        print('weights\n', clf3.estimator_weights)

    print_accuracy(y_train, y_train_dw, y_test, y_test_dw)