Esempio n. 1
0
def test_sparse_and_verbose():
    """
    Make sure RBM works with sparse input when verbose=True
    """
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    from scipy.sparse import csc_matrix
    X = csc_matrix([[0.], [1.]])
    rbm = BernoulliRBM(n_components=2,
                       batch_size=2,
                       n_iter=1,
                       random_state=42,
                       verbose=True)
    try:
        rbm.fit(X)
        s = sys.stdout.getvalue()
        # make sure output is sound
        assert_true(
            re.match(
                r"\[BernoulliRBM\] Iteration 1,"
                r" pseudo-likelihood = -?(\d)+(\.\d+)?,"
                r" time = (\d|\.)+s", s))
    finally:
        sio = sys.stdout
        sys.stdout = old_stdout
Esempio n. 2
0
def do_train(
        hdf='/home/yacc/packages/btc-trade-result-history/btc_all_in_one.h5',
        dataset='a'):
    """
    TODO add some comments
    """
    h = pd.HDFStore(hdf, 'r')
    df = h[dataset]
    h.close()
    X, y = gen_dataset_from_price(df, step=200, ahead=20, percent=0.01)
    print('\n data generated.')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    print('train test split done.')
    #params = {'learning_rate': 0.1,'n_iter':20}
    #reg_clf = GradientBoostingRegressor(verbose=True,**params)
    reg_clf = SGDRegressor(verbose=True, n_iter=100)
    clf_rbm1 = BernoulliRBM(n_components=1024, verbose=True)
    clf_rbm2 = BernoulliRBM(n_components=512, verbose=True)
    clf_rbm3 = BernoulliRBM(n_components=256, verbose=True)
    clf = Pipeline(
        steps=[('clf1', clf_rbm1), ('clf2',
                                    clf_rbm2), ('clf3',
                                                clf_rbm3), ('clf_last',
                                                            reg_clf)])
    print('start training')
    clf.fit(X_train, y_train)
    import datetime
    with open('clf_pipeline_pick.pkl', 'a+') as f:
        pickle.dump(clf, f)
        print('pickle done.')
Esempio n. 3
0
    def pretrain(self, save=True):

        visual_layer = self.data

        for i in range(len(self.hidden_sizes)):
            print("[DBN] Layer {} Pre-Training".format(i + 1))

            rbm = BernoulliRBM(n_components=self.hidden_sizes[i],
                               n_iter=self.rbm_iters[i],
                               learning_rate=self.rbm_learning_rate[i],
                               verbose=True,
                               batch_size=32)
            rbm.fit(visual_layer)
            self.rbm_weights.append(rbm.components_)
            self.rbm_biases.append(rbm.intercept_hidden_)
            self.rbm_h_act.append(rbm.transform(visual_layer))

            visual_layer = self.rbm_h_act[-1]

        if save:
            with open(self.outdir + "rbm_weights.p", 'wb') as f:
                pickle.dump(self.rbm_weights, f)

            with open(self.outdir + "rbm_biases.p", 'wb') as f:
                pickle.dump(self.rbm_biases, f)

            with open(self.outdir + "rbm_hidden.p", 'wb') as f:
                pickle.dump(self.rbm_h_act, f)
Esempio n. 4
0
def restrictedBoltzmannMachine(trainData, trainLabels, testData):
	logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=10000, multi_class='multinomial')
	rbm = BernoulliRBM(random_state=0, batch_size = 2000, verbose=True)

	rbm_features_classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

	# #############################################################################
	# Training

	# Hyper-parameters. These were set by cross-validation,
	# using a GridSearchCV. Here we are not performing cross-validation to
	# save time.
	rbm.learning_rate = 0.06
	rbm.n_iter = 20
	# More components tend to give better prediction performance, but larger
	# fitting time
	rbm.n_components = 100
	logistic.C = 6000

	# Training RBM-Logistic Pipeline
	rbm_features_classifier.fit(trainData, trainLabels)
	labels = rbm_features_classifier.predict(testData)

	#labels = list(labels)
	return labels

	'''
Esempio n. 5
0
 def fit(self, X, y=None):
     num = self.patch_num // X.size
     data = []
     for item in X:
         img = imread(str(item[0]))
         img = img_as_ubyte(rgb2gray(img))
         #img = self.binary(img) # 二值化
         tmp = extract_patches_2d(img, self.patch_size, max_patches = num,\
                                 random_state=np.random.RandomState())
         data.append(tmp)
     
     data = np.vstack(data)
     data = data.reshape(data.shape[0], -1)
     data = np.asarray(data, 'float32')
     
     # 二值化后不需要0-1归化
     data = data - np.min(data, 0)
     data = data/(np.max(data, 0) + 0.0001)  # 0-1 scaling
     
     self.rbm = BernoulliRBM(n_components=self.n_components,\
                     learning_rate=self.learning_rate, \
                     n_iter=self.n_iter,\
                     batch_size=self.batch_size,\
                     verbose=True)
     self.rbm.fit(data)
     return self
Esempio n. 6
0
def test_nn(folder='data_270_json'):
    all_data = put_together(folder)
    vec = DictVectorizer()
    all_detects_vec = vec.fit_transform(all_data['defects'])
    model = BernoulliRBM()
    model.fit(all_detects_vec)
    ready = []
    for fn in os.listdir(folder):
        data = None
        fullname = os.path.join(folder, fn)
        if os.path.isfile(fullname):
            with open(fullname) as f:
                try:
                    data = json.load(f)
                except:
                    pass
        if data:
            fe = get_features(data)
            if len(fe['defects']) > 0:
                vec = vec.transform(fe['defects'])
                p = model.transform(vec)
                data['vd'] = p.tolist()
                r = {}
                r['vzw'] = data['vzw']
                r['defects'] = p.tolist()
                r['measurement'] = fe['measurement']
            ready.append(r)
Esempio n. 7
0
def init_coefs_(X, y):
    model = BernoulliRBM(random_state=0,
                         verbose=True,
                         learning_rate=0.1,
                         n_iter=20)
    model.fit(X, y)
    return model.intercept_visible_
Esempio n. 8
0
def train_rbm_stack(data,
                    network_structure,
                    batch_size=10,
                    learning_rate=0.1,
                    n_iter=10,
                    random_state=None,
                    verbose=0):
    weights = []
    visible_unit_samples = data
    for layer in network_structure:

        model = BernoulliRBM(n_components=layer,
                             batch_size=batch_size,
                             learning_rate=learning_rate,
                             n_iter=n_iter,
                             random_state=random_state,
                             verbose=verbose)

        hidden_unit_samples = model.fit_transform(visible_unit_samples)

        weights.append(model.components_)

        visible_unit_samples = hidden_unit_samples

    return weights
Esempio n. 9
0
def train_nn(data, expected_values):
    data, expected_values = preprocess_data(data,
                                            expected_values,
                                            remove_high_rr=False)
    logger.info("Starting feature reduction.")
    X = np.asarray(data[1:], 'float64')
    logger.info("Done with feature reduction.")
    Y = expected_values
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)

    logger.info("Starting NeuralNetwork training.")

    logistic = linear_model.LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    clf = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 100
    logistic.C = 1.0

    clf.fit(X_train, Y_train)

    # Evaluation
    #TODO: Make unified evaluation
    logger.info("Logistic regression using RBM features:\n%s\n" %
                (metrics.classification_report(Y_test, clf.predict(X_test))))

    logger.info("Done with NeuralNetwork training.")
    return lambda x: wrap_threshold_distribtuion(
        np.array(clf.predict(x)).astype(float))
Esempio n. 10
0
def RBM_SVM(trainfeatures, testfeatures, trainlabels, testlabels):
    # ******************* Scikit-learning RBM + SVM *******************
    print "train RBM+SVM model"

    ##    trainfeatures = (trainfeatures - np.min(trainfeatures, 0)) / (np.max(trainfeatures, 0) + 0.0001)  # 0-1 scaling
    min_max_scaler = preprocessing.MinMaxScaler()
    trainfeatures_fs = min_max_scaler.fit_transform(trainfeatures)
    testfeatures_fs = min_max_scaler.transform(testfeatures)

    # SVM parameters
    clf = svm.SVC(C=5.0, kernel='sigmoid', degree=3, gamma=0.5, coef0=10.0,
                  shrinking=True, probability=False, tol=0.001, cache_size=200,
                  class_weight=None, verbose=False, max_iter=-1, random_state=None)

    # RBM parameters
    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.06
    rbm.n_iter = 20

    # Machine learning pipeline
    classifier = Pipeline(steps=[('rbm', rbm), ('svm', clf)])

    # More components tend to give better prediction performance, but larger
    # fitting time
    rbm.n_components = 400
    classifier.fit(trainfeatures_fs, trainlabels)
    results = classifier.predict(testfeatures_fs)

    results = results.ravel()
    testerror = float(len(testlabels)
                      - np.sum(testlabels == results))/float(len(testlabels))
    # print"error rate with SVM  is %.4f" %testerror

    return testerror
Esempio n. 11
0
def SGD():
    SGD = linear_model.SGDClassifier(loss='hinge',penalty='l2',random_state=42,n_jobs=-1,epsilon=0.001)
    rbm = BernoulliRBM(random_state=0, verbose=True)
    classifier = Pipeline(steps=[('rbm', rbm), ('SGD', SGD)])
    # RBM parameters obtained after cross-validation
    rbm.learning_rate = 0.01
    rbm.n_iter = 15
    rbm.n_components = 50
    SGD.alpha=0.0001
    SGD.C=1 
    # Training SGD
    SGD_classifier = linear_model.SGDClassifier(loss='hinge',penalty='l2',random_state=42,n_jobs=-1,alpha=0.0001, epsilon=0.001)
    SGD_classifier.fit(data_train,target_train)
    # Training RBM-SGD Pipeline    
    classifier.fit(data_train,target_train)
    print("printing_results")
    
    print("SGD using RBM features:\n%s\n" % (metrics.classification_report(target_test,classifier.predict(data_test))))
    cm = confusion_matrix(target_test,classifier.predict(data_test))
    plt.matshow(cm)
    plt.title('Confusion Matrix SVM with SDG with RBM Features')
    plt.colorbar()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix1.jpg')
    print("SGD using raw pixel features:\n%s\n" % (metrics.classification_report(target_test,SGD_classifier.predict(data_test))))
    cm1 = confusion_matrix(target_test,SGD_classifier.predict(data_test))
    plt.matshow(cm1)
    plt.title('Confusion Matrix SVM with SDG Raw Features')
    plt.colorbar()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix2.jpg')
Esempio n. 12
0
def train_rbm_pcd(x_train, x_val, n_hidden, lr, inftype, n_iter=1000):
    assert n_iter > 100  # we checkpoint every 100 iterations
    rbm = BernoulliRBM(
        n_components=n_hidden,
        learning_rate=lr,
        batch_size=x_train.shape[0],
        n_iter=n_iter,
        verbose=0,
    )
    best_score, best_rbm = np.inf, None
    for it in range(n_iter):
        rbm.partial_fit(x_train)
        if (it + 1) % 20 == 0:  # checkpoint every 20
            score = test_rbm_pcd(
                x_val,
                rbm.components_,
                rbm.intercept_hidden_,
                rbm.intercept_visible_,
                inftype,
            )
            if score < best_score:
                best_score = score
                best_rbm = (
                    rbm.components_.copy(),
                    rbm.intercept_hidden_.copy(),
                    rbm.intercept_visible_.copy(),
                )
    return best_rbm, best_score
Esempio n. 13
0
def build_classifier(clf_name):

    clf = None
    parameters = {}

    if clf_name == "svm":
        clf = svm.SVC(kernel='linear', C=10)
        parameters = {}

    elif clf_name == "knn":
        clf = neighbors.KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='brute', leaf_size=30,
                                             metric='cosine', metric_params=None)

    elif clf_name == "rmb":
        logistic = linear_model.LogisticRegression()
        rbm = BernoulliRBM(random_state=0, verbose=True)
        rbm.learning_rate = 0.01
        rbm.n_iter = 20
        rbm.n_components = 100
        logistic.C = 6000
        clf = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
        #parameters = {'clf__C': (1, 10)}

    elif clf_name == "tsne":
        clf = TSNE(n_components=2, init='random', metric='cosine')

    return clf, parameters
Esempio n. 14
0
def rbm_001():
    s = 15
    crop = 150
    n_patches = 400000
    rf_size = 5

    train_x_crop_scale = CropScaleImageTransformer(training=True,
                                                   result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s),
                                                   crop_size=crop,
                                                   scaled_size=s,
                                                   n_jobs=-1,
                                                   memmap=True)

    patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches,
                                                         patch_size=rf_size,
                                                         n_jobs=-1)
    images = train_x_crop_scale.transform()
    images = images.reshape((images.shape[0], 15 * 15 * 3))

    # rbm needs inputs to be between 0 and 1
    scaler = MinMaxScaler()
    images = scaler.fit_transform(images)

    # Training takes a long time, says 80 seconds per iteration, but seems like longer
    # And this is only with 256 components
    rbm = BernoulliRBM(verbose=1)
    rbm.fit(images)

    train_x = rbm.transform(images)
    train_y = classes.train_solutions.data

    # 0.138 CV on 50% of the dataset
    wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1)
    wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
Esempio n. 15
0
def SGD_cross_validation():
    SGD = linear_model.SGDClassifier(loss='hinge',
                                     penalty='l2',
                                     random_state=42,
                                     n_jobs=-1,
                                     epsilon=0.001)
    # cross-validaiotn for SGD classifier
    rbm = BernoulliRBM(random_state=0, verbose=True)
    classifier = Pipeline(steps=[('rbm', rbm), ('SGD', SGD)])
    rbm.n_iter = 100
    cv = cross_validation.StratifiedKFold(output, 3)
    score_func = metrics.f1_score
    parameters = {
        "rbm__learning_rate": [0.1, 0.01, 0.001, 0.0001],
        "rbm__n_components": [100, 200, 300, 400, 500, 600, 700, 800],
        "SGD__alpha": [0.1, 0.01, 0.001, 0.0001],
        "SGD__C": [1, 100, 1000, 10000]
    }
    grid_search = GridSearchCV(classifier,
                               parameters,
                               score_func=score_func,
                               cv=cv)
    grid_search.fit(input, output)
    print "Best %s: %0.3f" % (score_func.__name__, grid_search.best_score_)
    print "Best parameters set:"
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print "\t%s: %r" % (param_name, best_parameters[param_name])
Esempio n. 16
0
def test_rbm_pcd_gibbs(x_test,
                       Whv,
                       bh,
                       bv,
                       p_target=0.5,
                       n_gibbs_steps=5000,
                       thinning=10,
                       burnin=20):
    rbm = BernoulliRBM(n_components=Whv.shape[0], learning_rate=0.0)
    rbm.components_, rbm.intercept_hidden_, rbm.intercept_visible_ = Whv, bh, bv
    evidence_mask = np.random.binomial(
        1, p_target, x_test.shape)  # 0: target node, 1: evidence node,

    V = np.random.binomial(1, p_target, x_test.shape)
    V = x_test * evidence_mask + V * (1 - evidence_mask)
    prob1 = np.zeros_like(V)
    count = 0
    for it in range(n_gibbs_steps):
        V = rbm.gibbs(V)
        V = x_test * evidence_mask + V * (1 - evidence_mask)
        if (it + 1) % thinning == 0 and it > burnin:
            prob1 += V
            count += 1
    prob1 /= count
    prob1_clipped = prob1.clip(1e-15, 1 - 1e-15)
    target_mask = 1 - evidence_mask
    logp = x_test * np.log(prob1_clipped) + (
        1 - x_test) * np.log(1 - prob1_clipped)
    logp *= target_mask
    return -logp.sum() / target_mask.sum() / np.log(2)
Esempio n. 17
0
 def train(self, train_set, train_labels):
     if self.supervised and train_labels is not None:
         return self._train_unsupervised_methods_per_class(train_set, train_labels)
     else:
         model = BernoulliRBM(**self.classifier_kwargs)
         model.fit(train_set)
         return model
Esempio n. 18
0
def rbm():
    X_train, Y_train, X_test, Y_test = train_test_data(is_feature=False)

    rbm = BernoulliRBM(random_state=0, verbose=True)
    logistic = linear_model.LogisticRegression(solver='newton-cg', tol=1)
    rbm_features_classifier = Pipeline(steps=[('rbm',
                                               rbm), ('logistic', logistic)])

    rbm.learning_rate = 0.06
    rbm.n_iter = 10
    # More components tend to give better prediction performance, but larger
    # fitting time
    rbm.n_components = 100
    logistic.C = 50

    X_train = X_train.reshape(X_train.shape[0], -1)
    # Training RBM-Logistic Pipeline
    rbm_features_classifier.fit(X_train, Y_train)

    # # Training the Logistic regression classifier directly on the pixel
    # raw_pixel_classifier = clone(logistic)
    # raw_pixel_classifier.C = 100.
    # raw_pixel_classifier.fit(X_train, Y_train)

    X_test = X_test.reshape(X_test.shape[0], -1)
    Y_pred = rbm_features_classifier.predict(X_test)

    # print("Logistic regression using RBM features:\n%s\n" % (
    #     metrics.classification_report(Y_test, Y_pred)))

    # Y_pred = raw_pixel_classifier.predict(X_test)

    result_analysis(Y_pred, Y_test, 'BernoulliRBM')
def process_machine_learning(symbol, i, path):
    
    params['path']= path
    label, feature= load_data(params['path'])

    #scales values in features so that they range from 0 to 1
    minmaxScaler = MinMaxScaler()
    feature = minmaxScaler.fit_transform(feature)
    
    print("Dimensions")
    print("label", label.shape)
    print("feature", feature.shape)

    #feature selection using RBM

    start_time = time.time()

    rbm = BernoulliRBM(n_components=params['reduced_feature'], learning_rate=params['learning_rate'], batch_size=params['batchsize'], n_iter=params['n_iter'])
    feature = rbm.fit_transform(feature)

    print("RBM--- %s seconds ---" % (time.time() - start_time))

    print("Dimensions after RBM")
    print("label", label.shape)
    print("feature", feature.shape)

    x_train, x_test, y_train, y_test = train_test_split(feature, label, i)
    y_pred = random_forest(x_train, x_test, y_train)
    signal_pd=pd.DataFrame({'y_test':y_test[:,0],'y_pred':y_pred})
    signal_pd.to_csv(os.path.join('..', 'data', 'rbm_random_forest',symbol,symbol+'_'+str(i)+'.csv'))
Esempio n. 20
0
def run_test(params, model):
    
    if model == "rf":
        n_tree, mtry = params
        print "# Trees: ", n_tree
        print "mtry: ", mtry
        rf = RandomForestClassifier(n_estimators= int(n_tree), verbose = True, 
                                n_jobs = -1, max_features= int(mtry))
        rf.fit(X, y)
        modelPred = rf.predict(X)
    elif model == "svm":
        C, kernel = params
        print "# Cost: ", C
        print "kernel: ", kernel
        svmod = SVC(int(C), kernel)
        svmod.fit(X, y)
        modelPred = svmod.predict(X)
    elif model == "knn":
        k = params
        print "# k: ", k
        knnmod = KNeighborsClassifier(int(k))
        knnmod.fit(X, y)
        modelPred =knnmod.predict(X)
    elif model == "NeuralNetwork":
        n_components, learning_rate, batch_size, n_iter = params
        print "# n_components: ", n_components
        print "# learning_rate: ", learning_rate
        print "# batch_size: ", batch_size
        print "# n_iter: ", n_iter 
        nnmod = BernoulliRBM(int(n_components), learning_rate, int(batch_size), int(n_iter))
        nnmod.fit(X, y)
        modelPred =nnmod.score_samples(X)
    
    accuError = AccuracyErrorCalc(y, modelPred)
    return accuError
Esempio n. 21
0
def Logistic():
    logistic = linear_model.LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
    # RBM parameters obtained after cross-validation
    rbm.learning_rate = 0.01
    rbm.n_iter = 121
    rbm.n_components = 700
    logistic.C= 1.0  
    # Training RBM-Logistic Pipeline
    classifier.fit(data_train,target_train)
    # Training Logistic regression
    logistic_classifier = linear_model.LogisticRegression(C=1.0)
    logistic_classifier.fit(data_train,target_train)    
    print("printing_results")
    print("Logistic regression using RBM features:\n%s\n" % (metrics.classification_report(target_test,classifier.predict(data_test))))
    cm3 = confusion_matrix(target_test,classifier.predict(data_test))
    plt.matshow(cm3)
    plt.title('Confusion Matrix Logistic Regression with RBM Features')
    plt.colorbar()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix3.jpg')
    print("Logistic regression using raw pixel features:\n%s\n" % (metrics.classification_report(target_test,logistic_classifier.predict(data_test))))
    cm4 = confusion_matrix(target_test,logistic_classifier.predict(data_test))
    plt.matshow(cm4)
    plt.title('Confusion Matrix Logistic Regression')
    plt.colorbar()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('confusion_matrix4.jpg')
#Logistic()
def words_to_vec(df):
    print("Method: words_to_vec. Working on words to vecs....")

    buzzCount = CountVectorizer(stop_words='english', max_features=50, ngram_range=(1, 1), token_pattern=u'.*_.*')
    buzzCount_te_sparse = buzzCount.fit_transform(df["buzzers"])

    buzzTFid = TfidfVectorizer(stop_words='english', max_features=500, ngram_range=(2, 9))
    buzzTFid_te_sparse = buzzTFid.fit_transform(df["description"])
    _boltzman = BernoulliRBM(n_components=35)
    _boltzman.fit(buzzTFid_te_sparse)
    buzzTFid_boltzman = _boltzman.transform(buzzTFid_te_sparse)

    buzzCount_df = pd.DataFrame(buzzCount_te_sparse.toarray(), columns=buzzCount.get_feature_names())
    buzzTFid_boltzman_cols = ['buzz_boltz_' + str(ag) for ag in range(1, buzzTFid_boltzman.shape[1] + 1)]
    buzzTFid_boltzman_df = pd.DataFrame(buzzTFid_boltzman, columns=buzzTFid_boltzman_cols)
    df = pd.concat([df, buzzCount_df, buzzTFid_boltzman_df], axis=1)

    #fagg = FeatureAgglomeration(n_clusters=100)
    #fagg.fit(buzzTFid_te_sparse.toarray())
    #buzzTFid_fagg = fagg.transform(buzzTFid_te_sparse.toarray())
    #buzzCount_df = pd.DataFrame(buzzCount_te_sparse.toarray(), columns=buzzCount.get_feature_names())
    #buzzTFid_fagg_cols = ['buzz_fagg' + str(ag) for ag in range(1, buzzTFid_fagg.shape[1] + 1)]
    #buzzTFid_fagg_df = pd.DataFrame(buzzTFid_fagg, columns=buzzTFid_fagg_cols)
    #df = pd.concat([df, buzzTFid_fagg_df], axis=1)

    print("Method: words_to_vec. Returning words to vecs....")
    return df
Esempio n. 23
0
    def fit(self, X, y):
        self.rbm_1 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_first,
                                  n_iter=self.n_iter_first,
                                  learning_rate=self.learning_rate_first)
        self.rbm_2 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_second,
                                  n_iter=self.n_iter_second,
                                  learning_rate=self.learning_rate_second)
        self.first_pipeline = Pipeline(
            steps=[('rbm_1', self.rbm_1), ('rbm_2', self.rbm_2)])
        self.first_pipeline.fit(X, y)

        # TODO improve. Look at how it is done in classify
        new_features = []
        for example, label in zip(X, y):
            transformed = self.first_pipeline.transform(example)[0]
            new_features.append(
                np.concatenate((transformed, self.label_to_feature(label))))

        self.rbm_3 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_third,
                                  n_iter=self.n_iter_third,
                                  learning_rate=self.learning_rate_third)
        self.rbm_3.fit(new_features, y)
Esempio n. 24
0
def combine(data):

    # unpack data
    (numerical, categorical, other, nan) = data

    # create numlog (add a little bit to prevent values <= 0)
    numlog = np.log(numerical + 0.01)
    numlog = (numlog - numlog.mean()) / (numlog.max() - numlog.min())
    numlog = numlog.fillna(0)

    # normalize and impute numerical
    numerical = (numerical - numerical.mean()) / (numerical.max() -
                                                  numerical.min())
    numerical = numerical.fillna(0)

    # RBM categorical
    rbmcat = pd.get_dummies(categorical)

    # RBM other
    rbmother = pd.get_dummies(pd.DataFrame(splitcomplex(np.array(other))))

    # factorize categorical
    for column in categorical:
        categorical[column], _ = pd.factorize(categorical[column])
    categorical = (categorical - categorical.mean()) / (categorical.max() -
                                                        categorical.min())

    # factorize other
    for column in other:
        other[column], _ = pd.factorize(other[column])
    other = (other - other.mean()) / (other.max() - other.min())

    ### CONVERT TO NUMPY ###
    numerical = np.array(numerical)
    numlog = np.array(numlog)
    categorical = np.array(categorical)
    rbmcat = np.array(rbmcat)
    other = np.array(other)
    rbmother = np.array(rbmother)
    nan = np.array(nan)
    ########################

    # rbm over rbmcat and rbmother
    rbm = BernoulliRBM(n_components=100,
                       batch_size=100,
                       n_iter=50,
                       learning_rate=0.02,
                       verbose=1,
                       random_state=1)
    rbmdata = rbm.fit_transform(np.concatenate((rbmcat, rbmother), axis=1))
    rbmdata = (rbmdata - rbmdata.mean()) / (rbmdata.max() - rbmdata.min())

    # normalize nan
    nan = (nan - nan.mean()) / (nan.max() - nan.min())

    # concat and return
    data = np.concatenate(
        (numerical, numlog, categorical, other, rbmdata, nan), axis=1)
    return data
Esempio n. 25
0
def neural_network_classify(train_data,train_label,test_data):

    nnc=BernoulliRBM(random_state=0, verbose=True)
    nnc.fit(train_data, ravel(train_label))
    test_label=ncc.predict(test_data)

    save_result(test_label,'sklearn_neural_network_classify_Result.csv')  
    return test_label 
def build_model(training_data):
    """
    build and train the rbm.
    """
    rbm = BernoulliRBM(random_state=0, verbose=True, n_components=100,
                       n_iter=50)
    rbm.fit(training_data)
    return rbm
Esempio n. 27
0
def boltzmann_machine(train_matrix, n_comp, learning_rate=0.06, n_iter=20):
    from sklearn.neural_network import BernoulliRBM
    rbm = BernoulliRBM(n_components=n_com,
                       learning_rate=learning_rate,
                       n_iter=n_iter)
    rbm_transformed = rbm.fit_transform(train_matrix)
    print("successful RBM transform", rbm_transformed.shape)
    return rbm_transformed
Esempio n. 28
0
def neural_network_classify(train_data,train_label,test_data):
    # nnc=MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    nnc=BernoulliRBM(random_state=0, verbose=True)
    nnc.fit(train_data, ravel(train_label))
    test_label=ncc.predict(test_data)

    save_result(test_label,'sklearn_neural_network_classify_Result.csv')  
    return test_label 
Esempio n. 29
0
def test_rbm_verbose():
    rbm = BernoulliRBM(n_iter=2, verbose=10)
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        rbm.fit(Xdigits)
    finally:
        sys.stdout = old_stdout
Esempio n. 30
0
 def _after_publish(self, documents):
     self._tfidf_matrix = self._vectorizer.fit_transform(documents)
     self._bernoulliRBM = BernoulliRBM(learning_rate=1)
     self._rbm_matrix = self._bernoulliRBM.fit_transform(self._tfidf_matrix)
     self._bernoulliRBM_index = NearestNeighbors(len(self._service_array),
                                                 algorithm='brute',
                                                 metric='euclidean')
     self._bernoulliRBM_index.fit(self._rbm_matrix)
Esempio n. 31
0
def test_rbm_verbose():
    rbm = BernoulliRBM(n_iter=2, verbose=10)
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        rbm.fit(Xdigits)
    finally:
        sys.stdout = old_stdout
Esempio n. 32
0
def Bernoulli(X_train, X_test, y_train, y_test):
    mod = BernoulliRBM(random_state=0, verbose=True)
    mod.fit(X_train, y_train)
    print "Done training"
    bernoulli_labels = mod.predict(X_test)
    print "Done testing"
    bernoulli_score = mod.score(X_test, y_test)
    return bernoulli_score, bernoulli_labels
Esempio n. 33
0
def runRBM(arr, clsfr):#iters, lrn_rate, logistic_c_val, logistic_c_val2, n_comp, filename):
    global file_dir, nEvents, solutionFile
    iters = int(arr[0]*10)
    lrn_rate = arr[1]
    logistic_c_val = arr[2]*1000.0
    logistic_c_val2 = arr[3]*100.0
    n_comp = int(arr[4]*100)
    filename = 'rbm_iter'+str(iters)+'_logc'+str(log_c_val)+'_logcc'+str(log_c_val2)+'_lrn'+str(learn_rate)+'_nc'+str(n_comp)# low
    logistic = linear_model.LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    
    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    ###############################################################################
    # Training

    # Hyper-parameters. These were set by cross-validation,
    # using a GridSearchCV. Here we are not performing cross-validation to
    # save time.
    rbm.learning_rate = lrn_rate #0.10#0.06
    rbm.n_iter = iters #20
    # More components tend to give better prediction performance, but larger
    # fitting time
    rbm.n_components = n_comp # 250
    logistic.C = logistic_c_val #6000.0

    # Training RBM-Logistic Pipeline
    classifier.fit(sigtr[train_input].values, sigtr['Label'].values)

    # Training Logistic regression
    logistic_classifier = linear_model.LogisticRegression(C=logistic_c_val2)#100.0
    logistic_classifier.fit(sigtr[train_input].values, sigtr['Label'].values)

    ###############################################################################
    # Evaluation
    if clsfr == 0:
        clsnn_pred=classifier.predict(sigtest[train_input].values)
        solnFile('clsnn_'+filename,clsnn_pred,sigtest['EventId'].values)#,bkgtest)
        ams_score = ams.AMS_metric(solutionFile, file_dir+filename+'.out', nEvents)
        print ams_score
        logfile.write(filename+': ' + str(ams_score)+'\n')
    
    elif clsfr == 1:
        log_cls_pred = logistic_classifier.predict(sigtest[train_input].values)
        solnFile('lognn_'+filename,log_cls_pred,sigtest['EventId'].values)#,bkgtest)
        ams_score = ams.AMS_metric(solutionFile, file_dir+'lognn_'+filename+'.out', nEvents)
        print ams_score
        logfile.write('lognn ' + filename+': ' + str(ams_score)+'\n')
    else:
        logistic_classifier_tx = linear_model.LogisticRegression(C=logistic_c_val2)
        logistic_classifier_tx.fit_transform(sigtr[train_input].values, sigtr['Label'].values)
        log_cls_tx_pred = logistic_classifier_tx.predict(sigtest[train_input].values)
        solnFile('lognntx_'+filename,log_cls_tx_pred,sigtest['EventId'].values)#,bkgtest)
        ams_score = ams.AMS_metric(solutionFile, file_dir+filename+'.out', nEvents)
        print ams_score
        logfile.write('lognntx '+ filename+': ' + str(ams_score)+'\n')

    return -1.0*float(ams_score)
Esempio n. 34
0
    def transform(self, X):
        brbm = BernoulliRBM(n_components=256,
                            learning_rate=0.1,
                            batch_size=10,
                            n_iter=10,
                            verbose=0,
                            random_state=None)

        return pd.DataFrame(brbm.fit_transform(X))
Esempio n. 35
0
def test_feature_names_out(method):
    """Check `get_feature_names_out` for `BernoulliRBM`."""
    n_components = 10
    rbm = BernoulliRBM(n_components=n_components)
    getattr(rbm, method)(Xdigits)

    names = rbm.get_feature_names_out()
    expected_names = [f"bernoullirbm{i}" for i in range(n_components)]
    assert_array_equal(expected_names, names)
Esempio n. 36
0
def test_transformer_dtypes_casting(dtype_in, dtype_out):
    X = Xdigits[:100].astype(dtype_in)
    rbm = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42)
    Xt = rbm.fit_transform(X)

    # dtype_in and dtype_out should be consistent
    assert Xt.dtype == dtype_out, "transform dtype: {} - original dtype: {}".format(
        Xt.dtype, X.dtype
    )
Esempio n. 37
0
def test_transform():
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42)
    rbm1.fit(X)

    Xt1 = rbm1.transform(X)
    Xt2 = rbm1._mean_hiddens(X)

    assert_array_equal(Xt1, Xt2)
Esempio n. 38
0
def BernoulliRBM_classifier(best_parameters={}):
    from sklearn.neural_network import BernoulliRBM
    if len(best_parameters) > 0:
        model = BernoulliRBM(n_components=best_parameters['Model__n_components'], learning_rate=best_parameters['Model__learning_rate'],
                             batch_size=best_parameters['Model__batch_size'], n_iter=best_parameters['Model__n_iter'],
                             verbose=best_parameters['Model__verbose'], random_state=best_parameters['Model__random_state'])
    else:
        model = BernoulliRBM()
    return model
Esempio n. 39
0
class DeepRbmMnistClassifier:
    def __init__(self):
        self.n_components_first = 500
        self.n_components_second = 500
        self.n_components_third = 2000
        self.n_iter_first = 20
        self.n_iter_second = 20
        self.n_iter_third = 20
        self.learning_rate_first = 0.06
        self.learning_rate_second = 0.06
        self.learning_rate_third = 0.06
        self.verbose = True

    def label_to_feature(self, y):
        feature = [0] * 10
        feature[y] = 1
        return feature

    def fit(self, X, y):
        self.rbm_1 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_first,
                                  n_iter=self.n_iter_first,
                                  learning_rate=self.learning_rate_first)
        self.rbm_2 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_second,
                                  n_iter=self.n_iter_second,
                                  learning_rate=self.learning_rate_second)
        self.first_pipeline = Pipeline(
            steps=[('rbm_1', self.rbm_1), ('rbm_2', self.rbm_2)])
        self.first_pipeline.fit(X, y)

        # TODO improve. Look at how it is done in classify
        new_features = []
        for example, label in zip(X, y):
            transformed = self.first_pipeline.transform(example)[0]
            new_features.append(
                np.concatenate((transformed, self.label_to_feature(label))))

        self.rbm_3 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_third,
                                  n_iter=self.n_iter_third,
                                  learning_rate=self.learning_rate_third)
        self.rbm_3.fit(new_features, y)

    def classify(self, X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate(
            (transformed, [[0] * 10] * len(transformed)), axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:, -10:]
        return np.argmax(results, axis=1)
Esempio n. 40
0
def test_gibbs_smoke():
    """ just seek if we don't get NaNs sampling the full digits dataset """
    rng = np.random.RandomState(42)
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=10,
                        n_iter=20, random_state=rng)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
Esempio n. 41
0
def test_fit():
    X = Xdigits.copy()

    rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9)
    rbm.fit(X)

    assert_almost_equal(rbm.score_samples(X).mean(), -21.0, decimal=0)

    # in-place tricks shouldn't have modified X
    assert_array_equal(X, Xdigits)
Esempio n. 42
0
def brbm_rf(Xtr, ytr, Xte=None, yte=None):
    randomforest = ensemble.RandomForestClassifier(n_jobs=-1, n_estimators=100)
    rbm = BernoulliRBM(random_state=0)
    classifier = Pipeline(steps=[('rbm', rbm), ('randomforest', randomforest)])

    rbm.learning_rate = 0.025
    rbm.n_iter = 250
    rbm.n_components = 100

    return simple_classification(classifier, Xtr, ytr, Xte, yte)
Esempio n. 43
0
def test_sample_hiddens():
    rng = np.random.RandomState(0)
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=2, batch_size=5, n_iter=5, random_state=42)
    rbm1.fit(X)

    h = rbm1._mean_hiddens(X[0])
    hs = np.mean([rbm1._sample_hiddens(X[0], rng) for i in range(100)], 0)

    assert_almost_equal(h, hs, decimal=1)
def rbm_knn_train_and_predict(train_set_x,train_set_y,test_set_x,test_set_y):
    knn = KNeighborsClassifier(n_neighbors=5)
    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 100
    classifier = Pipeline(steps=[('rbm', rbm), ('knn', knn)])
    classifier.fit(train_set_x,train_set_y)
    PRED = classifier.predict(test_set_x)
    return PRED
def rbm_dbn_train_and_predict(train_set_x,train_set_y,test_set_x,test_set_y):
    dbn = DBN(epochs=200,learn_rates=0.01)
    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 100
    classifier = Pipeline(steps=[('rbm', rbm), ('dbn', dbn)])
    classifier.fit(train_set_x,train_set_y)
    PRED = classifier.predict(test_set_x)
    return PRED   
def rbm_logistic_train_and_predict(train_set_x,train_set_y,test_set_x,test_set_y):
    logistic = linear_model.LogisticRegression(C=6000)
    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 100
    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
    classifier.fit(train_set_x,train_set_y)
    PRED = classifier.predict(test_set_x)
    return PRED
Esempio n. 47
0
def test_gibbs_smoke():
    """Check if we don't get NaNs sampling the full digits dataset.
    Also check that sampling again will yield different results."""
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
    X_sampled2 = rbm1.gibbs(X)
    assert_true(np.all((X_sampled != X_sampled2).max(axis=1)))
Esempio n. 48
0
class BernoulliRBMSearchEngine(SmartSearchEngine):
    #
    # Registry implementation using ball-tree

    def __init__(self):
        super(BernoulliRBMSearchEngine, self).__init__()
        self._service_array = []
        self._bernoulliRBM_index = None
        self._tfidf_matrix = None

    def load_configuration(self, configuration_file):
        super(BernoulliRBMSearchEngine,
              self).load_configuration(configuration_file)

        self._vectorizer = TfidfVectorizer(
            sublinear_tf=False,
            analyzer='word',
            lowercase=False,
            use_bm25idf=self._use_bm25idf,
            bm25_tf=self._use_bm25tf,
            k=self._bm25_k,
            preprocessor=StringPreprocessorAdapter())

    def unpublish(self, service):
        pass

    def _preprocess(self, bag_of_words):
        return bag_of_words.get_words_str()

    def _after_publish(self, documents):
        self._tfidf_matrix = self._vectorizer.fit_transform(documents)
        self._bernoulliRBM = BernoulliRBM(learning_rate=1)
        self._rbm_matrix = self._bernoulliRBM.fit_transform(self._tfidf_matrix)
        self._bernoulliRBM_index = NearestNeighbors(len(self._service_array),
                                                    algorithm='brute',
                                                    metric='euclidean')
        self._bernoulliRBM_index.fit(self._rbm_matrix)

    def publish(self, service):
        pass

    def find(self, query):
        query = StringTransformer().transform(query)
        query_array = self._vectorizer.transform(
            [self._query_transformer.transform(query).get_words_str()])
        query_array = self._bernoulliRBM.transform(query_array.toarray())
        result = self._bernoulliRBM_index.kneighbors(query_array,
                                                     return_distance=False)[0]
        result_list = []
        for index in result:
            result_list.append(self._service_array[index])
        return result_list

    def number_of_services(self):
        pass
Esempio n. 49
0
class DeepRbmMnistClassifier:

    def __init__(self):
        self.n_components_first = 500
        self.n_components_second = 500
        self.n_components_third = 2000
        self.n_iter_first = 20
        self.n_iter_second = 20
        self.n_iter_third = 20
        self.learning_rate_first = 0.06
        self.learning_rate_second = 0.06
        self.learning_rate_third = 0.06
        self.verbose = True

    def label_to_feature(self,y):
        feature = [0]*10
        feature[y] = 1
        return feature

    def fit(self,X,y):
        self.rbm_1 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_first,
                            n_iter=self.n_iter_first,
                            learning_rate=self.learning_rate_first)
        self.rbm_2 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_second,
                            n_iter=self.n_iter_second,
                            learning_rate=self.learning_rate_second)
        self.first_pipeline = Pipeline(steps=[('rbm_1',self.rbm_1), ('rbm_2',self.rbm_2)])
        self.first_pipeline.fit(X,y)

        # TODO improve. Look at how it is done in classify
        new_features = []
        for example,label in zip(X,y):
            transformed = self.first_pipeline.transform(example)[0]
            new_features.append(np.concatenate((transformed,self.label_to_feature(label))))

        self.rbm_3 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_third,
                            n_iter=self.n_iter_third,
                            learning_rate=self.learning_rate_third)
        self.rbm_3.fit(new_features,y)

    def classify(self,X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:,-10:]
        return np.argmax(results,axis=1)
Esempio n. 50
0
def test_fit_transform():
    """Check proper implementation of fit_transform"""
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=16, batch_size=5,
                        n_iter=5, random_state=42)
    rbm2 = clone(rbm1)

    Xt1 = rbm1.fit(X).transform(X)
    Xt2 = rbm2.fit_transform(X)

    assert_array_equal(Xt1, Xt2)
Esempio n. 51
0
def test_score_samples():
    """Check that the pseudo likelihood is computed without clipping.

    http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/
    """
    rng = np.random.RandomState(42)
    X = np.vstack([np.zeros(1000), np.ones(1000)])
    rbm1 = BernoulliRBM(n_components=10, batch_size=2,
                        n_iter=10, random_state=rng)
    rbm1.fit(X)
    assert((rbm1.score_samples(X) < -300).all())
Esempio n. 52
0
    def classify(self,X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:,-10:]
        return np.argmax(results,axis=1)
Esempio n. 53
0
def test_fit_gibbs():
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]]
    # from the same input
    rng = np.random.RandomState(42)
    X = np.array([[0.], [1.]])
    rbm1 = BernoulliRBM(n_components=2, batch_size=2,
                        n_iter=42, random_state=rng)
    # you need that much iters
    rbm1.fit(X)
    assert_almost_equal(rbm1.components_,
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
    assert_almost_equal(rbm1.gibbs(X), X)
    return rbm1
Esempio n. 54
0
def bernoulli_rbm(data, labels):
	
	
	print '> running rbm'
	print 'visible units: %d' % len(data)
	print 'hidden units: %d' % hidden_units
	print 'epochs size: %d' % epochs_size
	print '-------------'
	
	rbm = BernoulliRBM(batch_size=32, learning_rate=0.1, n_components=5, n_iter=10, random_state=numpy.RandomState, verbose=True)
	rbm.fit(data, labels)
	training_data = np.array(data)
	rbm.train(training_data, epochs_size, True)
Esempio n. 55
0
    def getNeuralModel(self,X,Y):

            logistic = linear_model.LogisticRegression()
            rbm = BernoulliRBM(verbose=True)

            classifier = linear_model.LogisticRegression(penalty='l2', tol=.0001)#Pipeline(steps = [('rbm', rbm),('logistic',logistic)])
            rbm.learning_rate = 0.0001
            rbm.n_iter = 1000
            rbm.n_components = 1000

            classifier.fit(X, Y)

            return classifier
Esempio n. 56
0
def test_partial_fit():
    X = Xdigits.copy()
    rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=20, random_state=9)
    n_samples = X.shape[0]
    n_batches = int(np.ceil(float(n_samples) / rbm.batch_size))
    batch_slices = np.array_split(X, n_batches)

    for i in range(7):
        for batch in batch_slices:
            rbm.partial_fit(batch)

    assert_almost_equal(rbm.score_samples(X).mean(), -21.0, decimal=0)
    assert_array_equal(X, Xdigits)
Esempio n. 57
0
def test_fit_gibbs_sparse():
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from
    # the same input even when the input is sparse, and test against non-sparse
    rbm1 = test_fit_gibbs()
    rng = np.random.RandomState(42)
    from scipy.sparse import csc_matrix
    X = csc_matrix([[0.], [1.]])
    rbm2 = BernoulliRBM(n_components=2, batch_size=2,
                        n_iter=42, random_state=rng)
    rbm2.fit(X)
    assert_almost_equal(rbm2.components_,
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
    assert_almost_equal(rbm2.gibbs(X), X.toarray())
    assert_almost_equal(rbm1.components_, rbm2.components_)
Esempio n. 58
0
    def _RBM(self, X, y):

        from sklearn.neural_network import BernoulliRBM

        # PCA model creation, number of components
        # feature extraction method. Used here (after sampling) because we are
        # creating an universal model and not this_dataset-specific.
        neural_network = BernoulliRBM(n_components=self.k_features)

        neural_network.fit(X, y)
        X = neural_network.transform(X)

        self.feature_reduction_method = neural_network

        return X