コード例 #1
0
	def RBM_ensemble(predict_all):
		'''
		RBM Ensemble method
		'''
		#Convert '-1's to '0'
		predict_all[predict_all==-1]=0
		
		if ENSEMBLE_TRAIN==True:
			#Train RBM 
			rbm = BernoulliRBM(n_components = N_COMPONENTS, n_iter=N_ITERATIONS, learning_rate=LEARNING_RATE, batch_size=BATCH_SIZE)
			print(f'\nStarting RBM training.... {datetime.datetime.now().time()}')
			rbm.fit(predict_all)
			print(f'\nRBM training complete.... - {datetime.datetime.now().time()}')
			#dump(rbm, 'rbm_ensemble_OLAK.joblib', compress=True)	#Save RBM model
			anomalous_test_predict_all = predict_all[5855:,:]
			normal_test_predict_all = predict_all[0:5855,:]
		elif ENSEMBLE_TRAIN==False:
			rbm = load('rbm_ensemble_OLAK.joblib')					#Load stored RBM model
			anomalous_test_predict_all = predict_all[52692:,:]
			normal_test_predict_all = predict_all[0:52692,:]
			print(f'Sizes - {anomalous_test_predict_all.shape}')
		
		anomalous_test_probability = rbm.transform(anomalous_test_predict_all)
		normal_test_probability = rbm.transform(normal_test_predict_all)
		print(f'\nRBM complete.... - {datetime.datetime.now().time()}')
		
		true_positives = int((anomalous_test_probability[anomalous_test_probability<=T1].shape[0])/N_COMPONENTS)
		false_negatives = int((anomalous_test_probability[T1<anomalous_test_probability].shape[0])/N_COMPONENTS)
		false_positives = int((normal_test_probability[normal_test_probability<=T1].shape[0])/N_COMPONENTS)
		true_negatives = int((normal_test_probability[T1<normal_test_probability].shape[0])/N_COMPONENTS)
		MLmodel_evaluation(true_positives, false_positives, false_negatives, true_negatives)
コード例 #2
0
ファイル: plankton.py プロジェクト: chrissly31415/amimanera
def testRBM():
  X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
  print X
  model = BernoulliRBM(n_components=2)
  model.fit(X)
  print dir(model)
  print model.transform(X)
  print model.score_samples(X)
  print model.gibbs
コード例 #3
0
def words_to_vec(df):
    print("Method: words_to_vec. Working on words to vecs....")

    buzzCount = CountVectorizer(stop_words='english', max_features=50, ngram_range=(1, 1), token_pattern=u'.*_.*')
    buzzCount_te_sparse = buzzCount.fit_transform(df["buzzers"])

    buzzTFid = TfidfVectorizer(stop_words='english', max_features=500, ngram_range=(2, 9))
    buzzTFid_te_sparse = buzzTFid.fit_transform(df["description"])
    _boltzman = BernoulliRBM(n_components=35)
    _boltzman.fit(buzzTFid_te_sparse)
    buzzTFid_boltzman = _boltzman.transform(buzzTFid_te_sparse)

    buzzCount_df = pd.DataFrame(buzzCount_te_sparse.toarray(), columns=buzzCount.get_feature_names())
    buzzTFid_boltzman_cols = ['buzz_boltz_' + str(ag) for ag in range(1, buzzTFid_boltzman.shape[1] + 1)]
    buzzTFid_boltzman_df = pd.DataFrame(buzzTFid_boltzman, columns=buzzTFid_boltzman_cols)
    df = pd.concat([df, buzzCount_df, buzzTFid_boltzman_df], axis=1)

    #fagg = FeatureAgglomeration(n_clusters=100)
    #fagg.fit(buzzTFid_te_sparse.toarray())
    #buzzTFid_fagg = fagg.transform(buzzTFid_te_sparse.toarray())
    #buzzCount_df = pd.DataFrame(buzzCount_te_sparse.toarray(), columns=buzzCount.get_feature_names())
    #buzzTFid_fagg_cols = ['buzz_fagg' + str(ag) for ag in range(1, buzzTFid_fagg.shape[1] + 1)]
    #buzzTFid_fagg_df = pd.DataFrame(buzzTFid_fagg, columns=buzzTFid_fagg_cols)
    #df = pd.concat([df, buzzTFid_fagg_df], axis=1)

    print("Method: words_to_vec. Returning words to vecs....")
    return df
コード例 #4
0
ファイル: DBN.py プロジェクト: zawecha1/RBM_DBN
    def pretrain(self, save=True):

        visual_layer = self.data

        for i in range(len(self.hidden_sizes)):
            print("[DBN] Layer {} Pre-Training".format(i + 1))

            rbm = BernoulliRBM(n_components=self.hidden_sizes[i],
                               n_iter=self.rbm_iters[i],
                               learning_rate=self.rbm_learning_rate[i],
                               verbose=True,
                               batch_size=32)
            rbm.fit(visual_layer)
            self.rbm_weights.append(rbm.components_)
            self.rbm_biases.append(rbm.intercept_hidden_)
            self.rbm_h_act.append(rbm.transform(visual_layer))

            visual_layer = self.rbm_h_act[-1]

        if save:
            with open(self.outdir + "rbm_weights.p", 'wb') as f:
                pickle.dump(self.rbm_weights, f)

            with open(self.outdir + "rbm_biases.p", 'wb') as f:
                pickle.dump(self.rbm_biases, f)

            with open(self.outdir + "rbm_hidden.p", 'wb') as f:
                pickle.dump(self.rbm_h_act, f)
コード例 #5
0
ファイル: avia.py プロジェクト: chencen2000/aviapy
def test_nn(folder='data_270_json'):
    all_data = put_together(folder)
    vec = DictVectorizer()
    all_detects_vec = vec.fit_transform(all_data['defects'])
    model = BernoulliRBM()
    model.fit(all_detects_vec)
    ready = []
    for fn in os.listdir(folder):
        data = None
        fullname = os.path.join(folder, fn)
        if os.path.isfile(fullname):
            with open(fullname) as f:
                try:
                    data = json.load(f)
                except:
                    pass
        if data:
            fe = get_features(data)
            if len(fe['defects']) > 0:
                vec = vec.transform(fe['defects'])
                p = model.transform(vec)
                data['vd'] = p.tolist()
                r = {}
                r['vzw'] = data['vzw']
                r['defects'] = p.tolist()
                r['measurement'] = fe['measurement']
            ready.append(r)
コード例 #6
0
def rbm_001():
    s = 15
    crop = 150
    n_patches = 400000
    rf_size = 5

    train_x_crop_scale = CropScaleImageTransformer(training=True,
                                                   result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s),
                                                   crop_size=crop,
                                                   scaled_size=s,
                                                   n_jobs=-1,
                                                   memmap=True)

    patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches,
                                                         patch_size=rf_size,
                                                         n_jobs=-1)
    images = train_x_crop_scale.transform()
    images = images.reshape((images.shape[0], 15 * 15 * 3))

    # rbm needs inputs to be between 0 and 1
    scaler = MinMaxScaler()
    images = scaler.fit_transform(images)

    # Training takes a long time, says 80 seconds per iteration, but seems like longer
    # And this is only with 256 components
    rbm = BernoulliRBM(verbose=1)
    rbm.fit(images)

    train_x = rbm.transform(images)
    train_y = classes.train_solutions.data

    # 0.138 CV on 50% of the dataset
    wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1)
    wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
コード例 #7
0
ファイル: test_rbm.py プロジェクト: amitmse/scikit-learn
def test_transform():
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42)
    rbm1.fit(X)

    Xt1 = rbm1.transform(X)
    Xt2 = rbm1._mean_hiddens(X)

    assert_array_equal(Xt1, Xt2)
コード例 #8
0
ファイル: plankton.py プロジェクト: chrissly31415/amimanera
def testRBM():
  X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
  print(X)
  model = BernoulliRBM(n_components=2)
  model.fit(X)
  print(dir(model))
  print(model.transform(X))
  print(model.score_samples(X))
  print(model.gibbs)
コード例 #9
0
def test_transform():
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42)
    rbm1.fit(X)

    Xt1 = rbm1.transform(X)
    Xt2 = rbm1._mean_hiddens(X)

    assert_array_equal(Xt1, Xt2)
コード例 #10
0
class DeepRbmMnistClassifier:
    def __init__(self):
        self.n_components_first = 500
        self.n_components_second = 500
        self.n_components_third = 2000
        self.n_iter_first = 20
        self.n_iter_second = 20
        self.n_iter_third = 20
        self.learning_rate_first = 0.06
        self.learning_rate_second = 0.06
        self.learning_rate_third = 0.06
        self.verbose = True

    def label_to_feature(self, y):
        feature = [0] * 10
        feature[y] = 1
        return feature

    def fit(self, X, y):
        self.rbm_1 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_first,
                                  n_iter=self.n_iter_first,
                                  learning_rate=self.learning_rate_first)
        self.rbm_2 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_second,
                                  n_iter=self.n_iter_second,
                                  learning_rate=self.learning_rate_second)
        self.first_pipeline = Pipeline(
            steps=[('rbm_1', self.rbm_1), ('rbm_2', self.rbm_2)])
        self.first_pipeline.fit(X, y)

        # TODO improve. Look at how it is done in classify
        new_features = []
        for example, label in zip(X, y):
            transformed = self.first_pipeline.transform(example)[0]
            new_features.append(
                np.concatenate((transformed, self.label_to_feature(label))))

        self.rbm_3 = BernoulliRBM(verbose=self.verbose,
                                  n_components=self.n_components_third,
                                  n_iter=self.n_iter_third,
                                  learning_rate=self.learning_rate_third)
        self.rbm_3.fit(new_features, y)

    def classify(self, X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate(
            (transformed, [[0] * 10] * len(transformed)), axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:, -10:]
        return np.argmax(results, axis=1)
コード例 #11
0
class DeepRbmMnistClassifier:

    def __init__(self):
        self.n_components_first = 500
        self.n_components_second = 500
        self.n_components_third = 2000
        self.n_iter_first = 20
        self.n_iter_second = 20
        self.n_iter_third = 20
        self.learning_rate_first = 0.06
        self.learning_rate_second = 0.06
        self.learning_rate_third = 0.06
        self.verbose = True

    def label_to_feature(self,y):
        feature = [0]*10
        feature[y] = 1
        return feature

    def fit(self,X,y):
        self.rbm_1 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_first,
                            n_iter=self.n_iter_first,
                            learning_rate=self.learning_rate_first)
        self.rbm_2 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_second,
                            n_iter=self.n_iter_second,
                            learning_rate=self.learning_rate_second)
        self.first_pipeline = Pipeline(steps=[('rbm_1',self.rbm_1), ('rbm_2',self.rbm_2)])
        self.first_pipeline.fit(X,y)

        # TODO improve. Look at how it is done in classify
        new_features = []
        for example,label in zip(X,y):
            transformed = self.first_pipeline.transform(example)[0]
            new_features.append(np.concatenate((transformed,self.label_to_feature(label))))

        self.rbm_3 = BernoulliRBM(verbose=self.verbose,
                            n_components=self.n_components_third,
                            n_iter=self.n_iter_third,
                            learning_rate=self.learning_rate_third)
        self.rbm_3.fit(new_features,y)

    def classify(self,X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:,-10:]
        return np.argmax(results,axis=1)
コード例 #12
0
class BernoulliRBMSearchEngine(SmartSearchEngine):
    #
    # Registry implementation using ball-tree

    def __init__(self):
        super(BernoulliRBMSearchEngine, self).__init__()
        self._service_array = []
        self._bernoulliRBM_index = None
        self._tfidf_matrix = None

    def load_configuration(self, configuration_file):
        super(BernoulliRBMSearchEngine,
              self).load_configuration(configuration_file)

        self._vectorizer = TfidfVectorizer(
            sublinear_tf=False,
            analyzer='word',
            lowercase=False,
            use_bm25idf=self._use_bm25idf,
            bm25_tf=self._use_bm25tf,
            k=self._bm25_k,
            preprocessor=StringPreprocessorAdapter())

    def unpublish(self, service):
        pass

    def _preprocess(self, bag_of_words):
        return bag_of_words.get_words_str()

    def _after_publish(self, documents):
        self._tfidf_matrix = self._vectorizer.fit_transform(documents)
        self._bernoulliRBM = BernoulliRBM(learning_rate=1)
        self._rbm_matrix = self._bernoulliRBM.fit_transform(self._tfidf_matrix)
        self._bernoulliRBM_index = NearestNeighbors(len(self._service_array),
                                                    algorithm='brute',
                                                    metric='euclidean')
        self._bernoulliRBM_index.fit(self._rbm_matrix)

    def publish(self, service):
        pass

    def find(self, query):
        query = StringTransformer().transform(query)
        query_array = self._vectorizer.transform(
            [self._query_transformer.transform(query).get_words_str()])
        query_array = self._bernoulliRBM.transform(query_array.toarray())
        result = self._bernoulliRBM_index.kneighbors(query_array,
                                                     return_distance=False)[0]
        result_list = []
        for index in result:
            result_list.append(self._service_array[index])
        return result_list

    def number_of_services(self):
        pass
コード例 #13
0
ファイル: neural.py プロジェクト: ismailezzaki96/HiggsMl
def scale(params):
    xTrain = params[0]
    yTrain = params[1]
    xValid = params[2]
    print 'neuralizing'
    global neural
    neural = Bernoulli()
    xTrain = neural.fit_transform(xTrain, yTrain)
    xValid = neural.transform(xValid)

    return [xTrain, yTrain, xValid]
コード例 #14
0
    def classify(self,X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:,-10:]
        return np.argmax(results,axis=1)
コード例 #15
0
    def classify(self, X):
        transformed = self.first_pipeline.transform(X)
        transformed = np.concatenate(
            (transformed, [[0] * 10] * len(transformed)), axis=1)

        # The inverse of rbm_3 to go from hidden layer to visible layer
        rbm_aux = BernoulliRBM()
        rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_
        rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_
        rbm_aux.components_ = np.transpose(self.rbm_3.components_)
        results = rbm_aux.transform(self.rbm_3.transform(transformed))
        results = results[:, -10:]
        return np.argmax(results, axis=1)
コード例 #16
0
ファイル: bernoulli_rbm.py プロジェクト: tdoublep/lale
class BernoulliRBMImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
コード例 #17
0
ファイル: train.py プロジェクト: seifed/pyImageClassification
def temp(features):
    [featuresNorm, MAX, MIN] = normalizeFeatures(features)
    [X, Y] = listOfFeatures2Matrix(featuresNorm)
    rbm = BernoulliRBM(n_components = 10, n_iter = 1000, learning_rate = 0.01,  verbose = False)
    X1 = X[0::2]
    X2 = X[1::2]
    Y1 = Y[0::2]
    Y2 = Y[1::2]    
    rbm.fit(X1,Y1)
    YY = rbm.transform(X1)

    for i in range(10):plt.plot(YY[i,:],'r')
    for i in range(10):plt.plot(YY[i+10,:],'g')
    for i in range(10):plt.plot(YY[i+20,:],'b')
    plt.show()
コード例 #18
0
ファイル: datasets.py プロジェクト: mikbuch/pymri
    def _RBM(self, X, y):

        from sklearn.neural_network import BernoulliRBM

        # PCA model creation, number of components
        # feature extraction method. Used here (after sampling) because we are
        # creating an universal model and not this_dataset-specific.
        neural_network = BernoulliRBM(n_components=self.k_features)

        neural_network.fit(X, y)
        X = neural_network.transform(X)

        self.feature_reduction_method = neural_network

        return X
コード例 #19
0
def temp(features):
    [featuresNorm, MAX, MIN] = normalizeFeatures(features)
    [X, Y] = listOfFeatures2Matrix(featuresNorm)
    rbm = BernoulliRBM(n_components = 10, n_iter = 1000, learning_rate = 0.01,  verbose = False)
    X1 = X[0::2]
    X2 = X[1::2]
    Y1 = Y[0::2]
    Y2 = Y[1::2]    
    rbm.fit(X1,Y1)
    YY = rbm.transform(X1)

    for i in range(10):plt.plot(YY[i,:],'r')
    for i in range(10):plt.plot(YY[i+10,:],'g')
    for i in range(10):plt.plot(YY[i+20,:],'b')
    plt.show()
コード例 #20
0
ファイル: datasets.py プロジェクト: mikbuch/pymri
    def _RBM(self, X, y):

        from sklearn.neural_network import BernoulliRBM

        # PCA model creation, number of components
        # feature extraction method. Used here (after sampling) because we are
        # creating an universal model and not this_dataset-specific.
        neural_network = BernoulliRBM(n_components=self.k_features)

        neural_network.fit(X, y)
        X = neural_network.transform(X)

        self.feature_reduction_method = neural_network

        return X
コード例 #21
0
 def pretraining(self):
     input_layer = self.x_train
     for i in range(len(self.hidden_layer)):
         print("DBN Layer {0} Pre-training".format(i + 1))
         rbm = BernoulliRBM(n_components=self.hidden_layer[i],
                            learning_rate=self.learning_rate_rbm,
                            batch_size=self.batch_size_rbm,
                            n_iter=self.n_epochs_rbm,
                            verbose=self.verbose_rbm,
                            random_state=self.verbose_rbm)
         rbm.fit(input_layer)
         # size of weight matrix is [input_layer, hidden_layer]
         self.weight_rbm.append(rbm.components_.T)
         self.bias_rbm.append(rbm.intercept_hidden_)
         input_layer = rbm.transform(input_layer)
     print('Pre-training finish.')
def train_ca_cd(type, X_train, y_train, X_test, y_test):
    input_layer = X_train
    hidden_layer = [250, 500, 200]
    weight_rbm = []
    bias_rbm = []
    for i in range(len(hidden_layer)):
        print("DBN Layer {0} Pre-training".format(i + 1))
        rbm = BernoulliRBM(n_components=hidden_layer[i],
                           learning_rate=0.0005,
                           batch_size=512,
                           n_iter=200,
                           verbose=2,
                           random_state=1)
        rbm.fit(input_layer)
        # size of weight matrix is [input_layer, hidden_layer]
        weight_rbm.append(rbm.components_.T)
        bias_rbm.append(rbm.intercept_hidden_)
        input_layer = rbm.transform(input_layer)
    print('Pre-training finish.', np.shape(weight_rbm[0]),
          np.shape(bias_rbm[0]))
    test_rms = 0
    result = []
    model = Sequential()
    print('Fine-tuning start.')
    for i in range(0, len(hidden_layer)):
        print('i:', i)
        if i == 0:
            model.add(
                Dense(hidden_layer[i],
                      activation='sigmoid',
                      input_dim=np.shape(X_train)[1]))
        elif i >= 1:
            model.add(Dense(hidden_layer[i], activation='sigmoid'))
        else:
            pass
        layer = model.layers[i]
        layer.set_weights([weight_rbm[i], bias_rbm[i]])
    # model.add(Dense(np.shape(yTrain)[1], activation='linear'))
    model.add(
        Dense(1, activation='linear',
              kernel_regularizer=regularizers.l2(0.01)))
    # sgd = SGD(lr=0.005, decay=0)
    model.compile(loss='mse', optimizer="rmsprop")  # sgd
    model.fit(X_train, y_train, batch_size=150, epochs=100, verbose=5)
    model.save('../model/dwt_dbn_' + type + '_100.h5')
    print('Fine-tuning finish.')
    return model
コード例 #23
0
ファイル: train.py プロジェクト: seifed/pyImageClassification
def trainRBM_SVM(features, Cparam, nComponents):
    [X, Y] = listOfFeatures2Matrix(features)
    rbm = BernoulliRBM(n_components = nComponents, n_iter = 30, learning_rate = 0.2,  verbose = True)
    rbm.fit(X,Y)
    newX = rbm.transform(X)
#    colors = ["r","g","b"]
#    for i in range(1,Y.shape[0],5):
#        plt.plot(newX[i,:], colors[int(Y[i])])
#    plt.show()

    classifier = {}
    classifier["rbm"] = rbm    
    svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear',  probability = True)        
    svm.fit(newX,Y)

    classifier["svm"] = svm

    return classifier    
コード例 #24
0
def trainRBM_SVM(features, Cparam, nComponents):
    [X, Y] = listOfFeatures2Matrix(features)
    rbm = BernoulliRBM(n_components = nComponents, n_iter = 30, learning_rate = 0.2,  verbose = True)
    rbm.fit(X,Y)
    newX = rbm.transform(X)
#    colors = ["r","g","b"]
#    for i in range(1,Y.shape[0],5):
#        plt.plot(newX[i,:], colors[int(Y[i])])
#    plt.show()

    classifier = {}
    classifier["rbm"] = rbm    
    svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear',  probability = True)        
    svm.fit(newX,Y)

    classifier["svm"] = svm

    return classifier    
コード例 #25
0
def RBM_new():
	'''
	RBM to evaluate on artificially generated data
	'''
	#Define datasize
	(val_length, test_length) = (11710, 105384)
	(train_half_length, test_half_length) = (int(val_length/2), int(test_length/2))
	
	#Generate artificial Z
	print('\n##############VALIDATION DATA#################')
	(TL_val, Z_val) = Z_new_generator(val_length, classifier_stats_val)
	print('\n##############TEST DATA#################')
	(TL_test, Z_test) = Z_new_generator(test_length, classifier_stats_test)
	
	#Convert '-1's to '0'
	Z_val[Z_val==-1]=0
	Z_test[Z_test==-1]=0
	
	#Train RBM
	rbm = BernoulliRBM(n_components = N_COMPONENTS, n_iter=N_ITERATIONS, learning_rate=LEARNING_RATE, batch_size=BATCH_SIZE)
	print(f'\nStarting RBM training.... {datetime.datetime.now().time()}')
	Z_val_probability = rbm.fit_transform(Z_val)
	Z_test_probability = rbm.transform(Z_test)
	print(f'\nRBM complete.... - {datetime.datetime.now().time()}')
	
	#Convert probability to values
	Z_val_final = np.sign(Z_val_probability - T1)
	Z_test_final = np.sign(Z_test_probability - T1)
	
	#RBM on validation data
	print(f'\n\n****VALIDATION RBM RESULTS*****')
	true_positives = sum(Z_val_final[train_half_length:]==-1)
	false_negatives = sum(Z_val_final[train_half_length:]==1)
	false_positives = sum(Z_val_final[0:train_half_length]==-1)
	true_negatives = sum(Z_val_final[0:train_half_length]==1)
	MLmodel_evaluation(true_positives, false_positives, false_negatives, true_negatives)
	
	#RBM on test data
	print(f'\n\n****TEST RBM RESULTS*****')
	true_positives = sum(Z_test_final[test_half_length:]==-1)
	false_negatives = sum(Z_test_final[test_half_length:]==1)
	false_positives = sum(Z_test_final[0:test_half_length]==-1)
	true_negatives = sum(Z_test_final[0:test_half_length]==1)
	MLmodel_evaluation(true_positives, false_positives, false_negatives, true_negatives)
コード例 #26
0
def RBM():
    filename = "../data/smaller.dta"
    raw_data = open(filename, 'rt')
    data = np.loadtxt(raw_data, delimiter=" ")
    X = data[:, :3]
    Y = data[:, 3]
    print(X)
    print(Y)
    print("training on RBM")
    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 100
    rbm.fit(X, Y)
    predictions = rbm.transform(X)
    params = rbm.get_params()
    print("predictions = ", predictions)
    print("rbm = ", rbm)
    print("params = ", params)
コード例 #27
0
def add_Brbm(Visible,
             components,
             rs,
             learning_rate,
             verbose=None,
             n_iter=None):

    rbm = BernoulliRBM(n_components=components,
                       random_state=rs,
                       learning_rate=learning_rate,
                       verbose=False,
                       n_iter=50)
    rbm.fit(Visible)
    rbm_data = {
        'coefs': np.transpose(np.array(rbm.components_)),
        'bias': np.array(rbm.intercept_hidden_),
        'hidden': rbm.transform(Visible)
    }
    return rbm_data
コード例 #28
0
ファイル: dbn.py プロジェクト: tanlangqie/Deep-Learning
    def pretrain(self, save=True):

        visual_layer = self.train_x  #训练集

        for i in range(len(self.hidden_sizes)):
            print("[DBN] Layer {} Pre-Training".format(i + 1))

            rbm = BernoulliRBM(n_components=self.hidden_sizes[i],
                               n_iter=self.rbm_iters,
                               learning_rate=self.rbm_learning_rate,
                               random_state=16,
                               verbose=0,
                               batch_size=2048)
            rbm.fit(visual_layer)  #训练
            self.rbm_weights.append(rbm.components_)  #权重矩阵
            self.rbm_biases.append(rbm.intercept_hidden_)
            self.rbm_h_act.append(rbm.transform(visual_layer))

            visual_layer = self.rbm_h_act[-1]
コード例 #29
0
ファイル: rbm_001.py プロジェクト: lixunlove/galaxy-zoo
def rbm_001():
    s = 15
    crop = 150
    n_patches = 400000
    rf_size = 5

    train_x_crop_scale = CropScaleImageTransformer(
        training=True,
        result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s),
        crop_size=crop,
        scaled_size=s,
        n_jobs=-1,
        memmap=True)

    patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches,
                                                         patch_size=rf_size,
                                                         n_jobs=-1)
    images = train_x_crop_scale.transform()
    images = images.reshape((images.shape[0], 15 * 15 * 3))

    # rbm needs inputs to be between 0 and 1
    scaler = MinMaxScaler()
    images = scaler.fit_transform(images)

    # Training takes a long time, says 80 seconds per iteration, but seems like longer
    # And this is only with 256 components
    rbm = BernoulliRBM(verbose=1)
    rbm.fit(images)

    train_x = rbm.transform(images)
    train_y = classes.train_solutions.data

    # 0.138 CV on 50% of the dataset
    wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {
        'alpha': 500,
        'n_estimators': 500
    },
                           n_jobs=-1)
    wrapper.cross_validation(train_x,
                             train_y,
                             sample=0.5,
                             parallel_estimator=True)
コード例 #30
0
ファイル: ModelDBN.py プロジェクト: yanzhaochang/ELSO
    def pretrain(self):
        self.weight_rbm = []
        self.bias_rbm = []

        x_train = self.x_train
        y_train = self.y_train

        hidden_layer_structure = self.get_hidden_layer_structure()

        input_layer = x_train
        for i in range(len(hidden_layer_structure)):
            rbm = BernoulliRBM(n_components=hidden_layer_structure[i],
                               learning_rate=self.learning_rate_rbm,
                               batch_size=self.batch_size_rbm,
                               n_iter=self.n_epochs_rbm,
                               verbose=1,
                               random_state=self.random_seed)
            rbm.fit(input_layer)
            self.weight_rbm.append(rbm.components_.T)
            self.bias_rbm.append(rbm.intercept_hidden_)
            input_layer = rbm.transform(input_layer)
        return
コード例 #31
0
    def pretrain(self):

        visual_layer = self.data
        print(self.data)
        for i in range(len(self.hidden_sizes)):
            print(visual_layer.shape)
            print("[DBN] Layer {} Pre-Training".format(i + 1))

            rbm = BernoulliRBM(n_components=self.hidden_sizes[i],
                               n_iter=self.rbm_iters,
                               learning_rate=self.rbm_learning_rate,
                               verbose=2,
                               batch_size=64)
            rbm.fit(visual_layer)
            self.rbm_weights.append(rbm.components_)

            self.rbm_biases.append(rbm.intercept_hidden_)

            self.rbm_h_act.append(rbm.transform(visual_layer))

            visual_layer = self.rbm_h_act[-1]
            print(visual_layer.shape)
        print(visual_layer)
コード例 #32
0
ファイル: CatsDogsBernoulli.py プロジェクト: wacax/DogsVsCats
#bigMatrixTrain = (bigMatrixTrain - np.min(bigMatrixTrain, 0)) / (np.max(bigMatrixTrain, 0) + 0.0001)  # 0-1 scaling
#Divide dataset for cross validation purposes
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    bigMatrixTrain, y, test_size = 0.4, random_state = 0) #fix this

# specify parameters and distributions to sample from
# Models we will use
rbm = BernoulliRBM(random_state=0, verbose=True)

#classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
rbm.learning_rate = 0.04
rbm.n_iter = 30
# More components tend to give better prediction performance, but larger fitting time
rbm.n_components = 300
X_train = rbm.fit_transform(X_train)
X_test = rbm.transform(X_test)

# Train a logistic model
print("Fitting the classifier to the training set")
logisticModel = linear_model.LogisticRegression()
t0 = time()
param_grid = {'C': [10, 30, 100, 300, 1000]}
logisticModel = GridSearchCV(logisticModel, param_grid = param_grid)
logisticModel = logisticModel.fit(X_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(logisticModel.best_estimator_)

#logistic.C = 6000.0

# Train a SVM classification model
コード例 #33
0
ファイル: rbm_reg.py プロジェクト: lavizhao/sentiment
    x = x_all[:length_train]
    t = x_all[length_train:]

    label = np.array(label)

    length_test = len(test)

    n = label.shape[1]

    print "x shape",x.shape
    print "t shape",t.shape

    print "rbm"
    rbm  = BernoulliRBM(n_components=2000,n_iter=20,batch_size=66)
    rbm.fit(x)
    x = rbm.transform(x)
    t = rbm.transform(t)

    print "rbm x shape",x.shape
    print "rbm t shape",t.shape

    #构造结果的矩阵
    answer = []

    

    print "开始回归"

    for i in range(n):
        print "第%s个"%(i)
        clf = linear_model.Ridge(alpha=2,fit_intercept=True,normalize=True,tol=1e-9)
コード例 #34
0
gmm = GaussianMixture(n_components=NUM_DIM,
                      max_iter=100,
                      covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM,
                   batch_size=8,
                   learning_rate=0.0008,
                   n_iter=8,
                   verbose=2,
                   random_state=SEED)
rbm.fit(X_train)
X_train_rbm = rbm.transform(X_train)
X_score_rbm = rbm.transform(X_score)

# ===========================================================================
# Deep Learning
# ===========================================================================


# ===========================================================================
# Visualize
# ===========================================================================
def plot(train, score, title, applying_pca=False):
    if applying_pca:
        pca = PCA(n_components=NUM_DIM)
        pca.fit(train)
        train = pca.transform(train)
コード例 #35
0

readCsvData()
#print Train_X[0]
#print final
Test_X = Train_X[:15]
Test_Y = Train_X[15:]
print len(Test_X)
#print Train_X
#Remove all stopwords since all characters are taken
#vectorizer=Tfidfvectorizer(stop_words=None)
#Train_X=vectorizer.fit_transform(documents)
X = np.array(Test_X)
#print type(X)
#print Train_X
"""num_Of_clusters=3
model= KMeans(n_clusters=num_Of_clusters,init='random',max_iter=1000,n_init=2)
model.fit_transform(X)
labels=model.labels_
order_centroids=model.cluster_centers_.argsort()[:, ::-1]"""
"""model=GMM(n_components=2)
model.fit(X)"""

model = BernoulliRBM(n_components=2)
model.fit(X)

#Predict the test label for new data.
testLabels = model.transform(Test_Y)

print testLabels
コード例 #36
0
ファイル: RBM.py プロジェクト: ElJB/agora
del rows

folds = StratifiedKFold(information, n_folds=3)

result = []


for train, test in folds:
	data_train = sentences[train]
	result_train = information[train]

	data_test = sentences[test]
	result_test = information[test]

	vectorizer = TfidfVectorizer(binary=True, norm=False, use_idf=False)
	rbm = BernoulliRBM()
	classifier = RandomForestClassifier()

	data_train = vectorizer.fit_transform(data_train)
	data_test = vectorizer.transform(data_test)

	data_train = rbm.fit_transform(data_train)
	data_test = rbm.transform(data_test)

	classifier.fit(data_train, result_train)

	print classificationError(classifier.predict(data_test), result_test)
	result.append(classifier.score(data_test, result_test))

print reduce(lambda x, y: x + y, result) / float(len(result))
コード例 #37
0
ファイル: RBMseq.py プロジェクト: PurinLord/DBM-s_Proteinas
#'MVDREQLVQKARLAEQAERYDDMAAAMKNVTELNEPLSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTSADGNEKKIEMVRAYREKIEKELEAVCQDVLSLLDNYLIKNCSETQYESKVFYLKMKGDYYRYLAEVATGEKRATVVESSEKAYSEAHEISKEHMQPTHPIRLGLALNYSVFYYEIQNAPEQACHLAKTAFDDAIAELDTLNEDSYKDSTLIMQLLRDNLTLWTSDQQDD',
#'MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSL',
#'MTMDKSELVQKAKLAEQAERYDDMAAAMKAVTEQGHELSNEERNLLSVAYKNVVGARRSSWRVISSIEQKTERNEKKQQMGKEYREKIEAELQDICNDVLELLDKYLIPNATQPESKVFYLKMKGDYFRYLSEVASGDNKQTTVSNSQQAYQEAFEISKKEMQPTHPIRLGLALNFSVFYYEILNSPEKACSLAKTAFDEAIAELDTLNEESYKDSTLIMQLLRDNLTLWTSENQGDEGD',
#]

comblength = 7

X = map(lambda s : np.array(createAAFreqVector(s,Lmap,comblength)) , seqs)
#print X

#X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling
#print X.shape

rbm.fit(X)
ssss ='MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQRTDAPKTHMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGQEQRYTCHVQHEGLPKPLTLRWEPSSQPTIPIVGIIAGLVLFGAVITGAVVAAVMWRRKSSDRKGGSYSQAASSDSAQGSDVSL'
transformedSeq = rbm.transform(np.array(createAAFreqVector(ssss,Lmap,comblength)))
print transformedSeq
print 'len', len(transformedSeq)
# Training RBM-Logistic Pipeline
#classifier.fit(X_train, Y_train)

# Training Logistic regression
#logistic_classifier = linear_model.LogisticRegression(C=100.0)
#logistic_classifier.fit(X_train, Y_train)

###############################################################################
# Evaluation

print()

###############################################################################
コード例 #38
0
train_X = train_X.reshape((train_X.shape[0], before*77))
test_X = test_X.reshape((test_X.shape[0], before*77))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# dbn
input_layer = train_X
hidden_layer=[250,500,200]
weight_rbm = []
bias_rbm = []
for i in range(len(hidden_layer)):
    print("DBN Layer {0} Pre-training".format(i + 1))
    rbm = BernoulliRBM(n_components=hidden_layer[i],learning_rate=0.0005,batch_size=512,n_iter=200,verbose=2,random_state=1)
    rbm.fit(input_layer)
    # size of weight matrix is [input_layer, hidden_layer]
    weight_rbm.append(rbm.components_.T)
    bias_rbm.append(rbm.intercept_hidden_)
    input_layer = rbm.transform(input_layer)
print('Pre-training finish.',np.shape(weight_rbm[0]),np.shape(bias_rbm[0]))
test_rms = 0
result = []
model = Sequential()
print('Fine-tuning start.')
for i in range(0, len(hidden_layer)):
    print('i:',i)
    if i == 0:
        model.add(Dense(hidden_layer[i], activation='sigmoid',input_dim=np.shape(train_X)[1]))
    elif i >= 1:
        model.add(Dense(hidden_layer[i], activation='sigmoid'))
    else:
        pass
    layer = model.layers[i]
    layer.set_weights([weight_rbm[i], bias_rbm[i]])
コード例 #39
0
def RBMtest01():
	#利用RBM进行non-linear feature extraction
	#相对于直接进行logistic regression, RBM features 可以提高分类精度

	import numpy as np
	import matplotlib.pyplot as plt

	from scipy.ndimage import convolve
	from sklearn import linear_model, datasets, metrics
	from sklearn.cross_validation import train_test_split
	from sklearn.neural_network import BernoulliRBM
	from sklearn.pipeline import Pipeline

	def nudge_dataset(X, Y):
		direction_vectors = [
			[[0, 1, 0],
			 [0, 0, 0],
			 [0, 0, 0]],

			[[0, 0, 0],
			 [1, 0, 0],
			 [0, 0, 0]],

			[[0, 0, 0],
			 [0, 0, 1],
			 [0, 0, 0]],

			[[0, 0, 0],
			 [0, 0, 0],
			 [0, 1, 0]]
		]

		shift = lambda x, w: convolve(x.reshape((8, 8)), mode = 'constant', weights = w).ravel()

		X = np.concatenate([X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors])
		Y = np.concatenate([Y for _ in range(5)], axis = 0)

		return X, Y

	digits = datasets.load_digits()
	X = np.asarray(digits.data, 'float32')  #这里应该就是进行了一下数据类型转换 a#list to array

	X, Y = nudge_dataset(X, digits.target)  #相当于重新生成了5倍的X,Y

	#print np.max(X, 0)
	#print np.min(X, 0)
	X = (X - np.min(X, 0)) / (np.max(X, 0) - - np.min(X, 0) + 0.0001) # 0-1 scaling 这里做了归一化(每一维分别归一化)

	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)


	print set(Y_train)
	#'''
	#新建模型
	logistic = linear_model.LogisticRegression()
	rbm = BernoulliRBM(random_state = 0, verbose = True)

	#感觉这里的pipeline就是一个连续进行fit, transform的过程
	#而rbm模型transform的结果是Latent representations of the data.

	classifier = Pipeline(steps = [('rbm', rbm), ('logistic', logistic)])

	#Training
	#这里的参数是根据cross-validation选出来的 -- GridSearchCV
	rbm.learning_rate = 0.06
	rbm.n_iter = 20
	rbm.n_components = 100  #这里就是利用rbm 训练出100个特征
	logistic.C = 6000


	#rbm.fit(X_train, Y_train)
	rbm.fit(X_train)


	#rbm从数据的维数来看,首先是一个非监督的训练过程,就是从X_train中求出N个代表性的vector,
	#然后再把原始的X_trian投影到这N的向量上,获得X_train的新N维feature
	#与PCA类似

	predicted_Y = rbm.transform(X_train)

	print rbm.components_  #rbm.components_是 100 * 64的矩阵
	print len(rbm.components_)
	print len(rbm.components_[0])

	print predicted_Y
	print len(predicted_Y)
	print len(predicted_Y[0])
	print len(X_train)
	print len(X_train[0])


	# Training RBM-Logistic Pipeline
	#相当于这里输入的还是每一维都进行了归一化之后的X_train
	#对应的Y_train还是0-9 表示label
	print "Start Training RBM-Logistic Pipeline"
	classifier.fit(X_train, Y_train)





	# Training Logistic regression,
	logistic_classifier = linear_model.LogisticRegression(C = 100.0)
	logistic_classifier.fit(X_train, Y_train)

	#Evaluation

	print "Logistic regression using RBM features: \n%s\n" %(metrics.classification_report(Y_test, classifier.predict(X_test)))
	print "Logistic regression using raw features: \n%s\n" %(metrics.classification_report(Y_test, logistic_classifier.predict(X_test)))


	#Plotting

	plt.figure(figsize = (4.2, 4))

	for i, comp in enumerate(rbm.components_):
		plt.subplot(10, 10, i + 1)
		#这里获得的还是100个64维vector,然后把每一个vector都reshape到8*8显示出来
		plt.imshow(comp.reshape(8,8), cmap=plt.cm.gray_r)
		plt.xticks(())
		plt.yticks(())

	plt.suptitle('100 components extracted by RBM', fontsize = 16)
	plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.23)

	plt.show()
コード例 #40
0
ファイル: main.py プロジェクト: boocheck/santander
# X_test = X_test[test_permut, :]
# y_test = y_test[test_permut]


# rbm learning
# TODO: try to search better parametrs with grid search
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm.learning_rate = 0.1
rbm.n_iter = 30
rbm.n_components = 16

print X_train
print X_train.shape
rbm.fit(all_feats)
X_train = np.concatenate((rbm.transform(X_train), X_train_preserved), 1)
X_test = np.concatenate((rbm.transform(X_test), X_test_preserved), 1)
print X_train
print X_train.shape


ens_lbls = []
ens_probs = []
# iterate over classifiers
for name, clf in zip(names, classifiers):
    print "[{}] learning starting ...".format(name)
    clf.fit(X_train, y_train)
    print "[{}] learning finished".format(name)
    probs = clf.predict_proba(X_test)[:, [1]]
    dump_to_file(name+"_res_probs", ids, probs)
コード例 #41
0
class BernoulliRBMComponent(AutoSklearnPreprocessingAlgorithm):
    def __init__(self,
                 n_components: int = 256,
                 learning_rate: float = 0.1,
                 batch_size: int = 10,
                 n_iter: int = 10,
                 random_state=None):
        super().__init__()
        self.n_components = n_components
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, Y=None):
        from sklearn.neural_network import BernoulliRBM
        self.n_components = int(self.n_components)
        self.learning_rate = float(self.learning_rate)
        self.batch_size = int(self.batch_size)
        self.n_iter = int(self.n_iter)

        self.preprocessor = BernoulliRBM(n_components=self.n_components,
                                         learning_rate=self.learning_rate,
                                         batch_size=self.batch_size,
                                         n_iter=self.n_iter,
                                         random_state=self.random_state)
        return self

    def transform(self, X):
        if self.preprocessor is None:
            raise NotImplementedError()
        return self.preprocessor.transform(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'BernoulliRBM',
            'name': 'Bernoulli Restricted Bolzman Machine',
            'handles_regression': True,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'handles_multioutput': True,
            'is_deterministic': False,
            'input': (DENSE, SPARSE, UNSIGNED_DATA),
            'output': (DENSE, UNSIGNED_DATA)
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        n_components = UniformIntegerHyperparameter("n_components",
                                                    1,
                                                    512,
                                                    default_value=256)
        learning_rate = UniformFloatHyperparameter("learning_rate",
                                                   1e-5,
                                                   1.,
                                                   default_value=0.1)
        batch_size = UniformIntegerHyperparameter("batch_size",
                                                  1,
                                                  100,
                                                  default_value=10)
        n_iter = UniformIntegerHyperparameter("n_iter",
                                              2,
                                              200,
                                              default_value=10)

        cs = ConfigurationSpace()
        cs.add_hyperparameters(
            [n_components, n_iter, learning_rate, batch_size])
        return cs
コード例 #42
0
class BoWFeature(BaseEstimator, TransformerMixin):
    def __init__(self, patch_num=10000, patch_size=(8, 8), sample_num = 300,\
                n_components=256, learning_rate=0.03, n_iter=100, batch_size=100):
        self.patch_num = patch_num
        self.patch_size = patch_size
        self.sample_num = sample_num
        
        self.n_components = n_components
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.batch_size = batch_size

    
    def fit(self, X, y=None):
        num = self.patch_num // X.size
        data = []
        for item in X:
            img = imread(str(item[0]))
            img = img_as_ubyte(rgb2gray(img))
            #img = self.binary(img) # 二值化
            tmp = extract_patches_2d(img, self.patch_size, max_patches = num,\
                                    random_state=np.random.RandomState())
            data.append(tmp)
        
        data = np.vstack(data)
        data = data.reshape(data.shape[0], -1)
        data = np.asarray(data, 'float32')
        
        # 二值化后不需要0-1归化
        data = data - np.min(data, 0)
        data = data/(np.max(data, 0) + 0.0001)  # 0-1 scaling
        
        self.rbm = BernoulliRBM(n_components=self.n_components,\
                        learning_rate=self.learning_rate, \
                        n_iter=self.n_iter,\
                        batch_size=self.batch_size,\
                        verbose=True)
        self.rbm.fit(data)
        return self
    
    def transform(self, X):
        results = []
        for sample in X:
            img = imread(str(sample[0]))
            img = img_as_ubyte(rgb2gray(img))
            #img = self.binary(img)
            patches = extract_patches_2d(img, self.patch_size,\
                                         max_patches = self.sample_num,\
                                         random_state=np.random.RandomState())
            
            patches = patches.reshape(patches.shape[0], -1)
            patches = np.asarray(patches, 'float32')
            
            patches = patches-np.min(patches, 0)
            patches = patches/(np.max(patches, 0) + 0.0001)

            patches = self.rbm.transform(patches)
            results.append(patches.sum(axis=0))
        return np.vstack(results)
    
    def get_params(self, deep=True):
        return {"patch_num": self.patch_num,
                "sample_num":self.sample_num,
                "patch_size":self.patch_size,
                "learning_rate":self.learning_rate,
                "n_components":self.n_components,
                "n_iter":self.n_iter,
                "batch_size":self.batch_size}
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            self.__setattr__(parameter, value)
        return self
        
    def binary(self, img):
        edge = sobel(img)
        thresh = threshold_otsu(edge)
        edge = edge>=thresh
        return edge.astype(np.int)
コード例 #43
0
    X_train_unlab = X_train0[N_LABEL:N_UNLAB]
    X_validation = mnist.validation.images[:N_CV]
    y_validation = mnist.validation.labels[:N_CV]
    X_test = mnist.test.images
    y_test = mnist.test.labels

    rbm = BernoulliRBM(random_state=0, verbose=True)
    rbm.learning_rate = 0.03
    rbm.n_iter = 10
    # More components tend to give better prediction performance, but larger
    # fitting time
    rbm.n_components = 500
    print('\nRBM Training...')
    rbm.fit(X_train_unlab)  # train by unlabelled data

    X_train_rbmfitted = rbm.transform(X_train_lab)
    X_validation_rbmfitted = rbm.transform(X_validation)
    X_test_rbmfitted = rbm.transform(X_test)

    gbm = lgb.LGBMClassifier(objective='multiclass',
                             num_leaves=63,
                             learning_rate=0.01,
                             n_estimators=1000)

    gbm.fit(
        X_train_rbmfitted,
        y_train_lab,  # train by labbelled data
        eval_set=[(X_validation_rbmfitted, y_validation)],
        eval_metric='multi_logloss',
        early_stopping_rounds=10)
    y_pred = gbm.predict(X_test_rbmfitted, num_iteration=gbm.best_iteration)
コード例 #44
0
import numpy as np

train = pandas.read_csv("train.csv")
target = train["Survived"]

m = Massager()
train_array = m.transform(train, True)

brbm = BernoulliRBM(n_components=3, learning_rate=0.01)

trantrain = brbm.fit_transform(train_array)
param_grid = dict(C=np.logspace(-10, 2, 13), gamma=np.logspace(-9, 3, 13))
grid = GridSearchCV(svm.SVC(), param_grid=param_grid)
grid.fit(trantrain, target)
C = grid.best_params_['C']
gamma = grid.best_params_['gamma']
classifier = svm.SVC(C=C, gamma=gamma)
classifier.fit(trantrain, target)

vscore = cross_val_score(classifier, train_array, target)
print "Validation score: {0} sd: {1}".format(vscore.mean(), vscore.std())

test = pandas.read_csv("test.csv")
answers = pandas.DataFrame(test["PassengerId"])
test_array = m.transform(test)
trantest = brbm.transform(test_array)
predictions = classifier.predict(trantest)
print(classifier.score(trantrain, target))
answers['Survived'] = pandas.Series(predictions.astype(int))

answers.to_csv("solution_rbm_svm.csv", index=False)
コード例 #45
0
ファイル: iris_latent_space.py プロジェクト: imito/odin
# ====== plda ====== #
plda = PLDA(n_phi=NUM_DIM, random_state=SEED)
plda.fit(X_train, y_train)
X_train_plda = plda.predict_log_proba(X_train)
X_score_plda = plda.predict_log_proba(X_score)
# ====== gmm ====== #
gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008,
                   n_iter=8, verbose=2, random_state=SEED)
rbm.fit(X_train)
X_train_rbm = rbm.transform(X_train)
X_score_rbm = rbm.transform(X_score)
# ===========================================================================
# Deep Learning
# ===========================================================================

# ===========================================================================
# Visualize
# ===========================================================================
def plot(train, score, title, applying_pca=False):
  if applying_pca:
    pca = PCA(n_components=NUM_DIM)
    pca.fit(train)
    train = pca.transform(train)
    score = pca.transform(score)
  plot_figure(nrow=6, ncol=12)