LogisticFunction(binary_flag=False,
                     normalization_mode="min_max",
                     normalize_flag=True),
    LogisticFunction(binary_flag=False,
                     normalization_mode="min_max",
                     normalize_flag=True),
    LogisticFunction(binary_flag=False,
                     normalization_mode="min_max",
                     normalize_flag=True)
]

# Setting the object for function approximation.
approximaion_list = [ContrastiveDivergence(), ContrastiveDivergence()]

dbm = StackedAutoEncoder(
    DBMMultiLayerBuilder(),
    [observed_arr.shape[1], 10, observed_arr.shape[1]],
    activation_list,
    approximaion_list,
    1e-05,  # Setting learning rate.
    0.5  # Setting dropout rate.
)

# Execute learning.
dbm.learn(
    observed_arr,
    1,  # If approximation is the Contrastive Divergence, this parameter is `k` in CD method.
    batch_size=100,  # Batch size in mini-batch training.
    r_batch_size=
    -1,  # if `r_batch_size` > 0, the function of `dbm.learn` is a kind of reccursive learning.
    sgd_flag=True)
Beispiel #2
0
        Feature points:
            0.190599  0.183594  0.482996  0.911710  0.939766  0.202852  0.042163
            0.470003  0.104970  0.602966  0.927917  0.134440  0.600353  0.264248
            0.419805  0.158642  0.328253  0.163071  0.017190  0.982587  0.779166
            0.656428  0.947666  0.409032  0.959559  0.397501  0.353150  0.614216
            0.167008  0.424654  0.204616  0.573720  0.147871  0.722278  0.068951
            .....

        Reconstruct error:
            [ 0.08297197  0.07091231  0.0823424  ...,  0.0721624   0.08404181  0.06981017]
    '''

    target_arr = np.random.uniform(size=(10000, 10000))

    dbm = StackedAutoEncoder(
        DBMMultiLayerBuilder(), [target_arr.shape[1], 10, target_arr.shape[1]],
        [SoftmaxFunction(),
         SoftmaxFunction(),
         SoftmaxFunction()],
        [ContrastiveDivergence(),
         ContrastiveDivergence()],
        0.05,
        0.5,
        inferencing_flag=True,
        inferencing_plan="each")
    dbm.learn(target_arr, traning_count=1, batch_size=100, r_batch_size=-1)

    import pandas as pd
    feature_points_df = pd.DataFrame(dbm.feature_points_arr)
    print(feature_points_df.shape)
    print(feature_points_df.head())
Beispiel #3
0
from pydbm.activation.logistic_function import LogisticFunction
import numpy as np
import random
import pandas as pd
from pprint import pprint
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split

if __name__ == "__main__":
    '''
    '''

    data_tuple = make_classification(n_samples=20000,
                                     n_features=1000,
                                     n_informative=5,
                                     n_classes=5,
                                     class_sep=1.0,
                                     scale=0.1)
    data_tuple_x, data_tuple_y = data_tuple
    traning_x, test_x, traning_y, test_y = train_test_split(data_tuple_x,
                                                            data_tuple_y,
                                                            test_size=0.5,
                                                            random_state=888)

    dbm = DeepBoltzmannMachine(DBMMultiLayerBuilder(),
                               [traning_x.shape[1], 10, traning_x.shape[1]],
                               LogisticFunction(), ContrastiveDivergence(),
                               0.05)
    dbm.learn(traning_x, traning_count=1)
    print(dbm.get_feature_point_list(0))
Beispiel #4
0
from pprint import pprint
from sklearn.datasets import make_classification
from sklearn.cross_validation import train_test_split

if __name__ == "__main__":
    '''
    '''

    data_tuple = make_classification(n_samples=20000,
                                     n_features=1000,
                                     n_informative=5,
                                     n_classes=5,
                                     class_sep=1.0,
                                     scale=0.1)
    data_tuple_x, data_tuple_y = data_tuple
    traning_x, test_x, traning_y, test_y = train_test_split(data_tuple_x,
                                                            data_tuple_y,
                                                            test_size=0.5,
                                                            random_state=888)

    dbm = StackedAutoEncoder(DBMMultiLayerBuilder(),
                             [traning_x.shape[1], 10, traning_x.shape[1]],
                             LogisticFunction(), ContrastiveDivergence(), 0.05)
    dbm.learn(traning_x, traning_count=1)
    import pandas as pd
    feature_points_df = pd.DataFrame(dbm.feature_points_arr)
    print(feature_points_df.shape)
    print(feature_points_df.head())
    print("-" * 100)
    print(feature_points_df.tail())
Beispiel #5
0
    def __init__(self,
                 token_list,
                 document_list=[],
                 traning_count=100,
                 batch_size=20,
                 learning_rate=1e-05,
                 feature_dim=100):
        '''
        Initialize.
        
        Args:
            token_list:         The list of all tokens in all sentences.
                                If the input value is a two-dimensional list, 
                                the first-dimensional key represents a sentence number, 
                                and the second-dimensional key represents a token number.

            document_list:      The list of document composed by tokens.
            training_count:     The epochs.
            batch_size:         Batch size.
            learning_rate:      Learning rate.
            feature_dim:        The dimension of feature points.
        '''
        pair_dict = {}
        document_dict = {}

        self.__token_arr = np.array(token_list)
        if self.__token_arr.ndim == 2:
            for i in range(self.__token_arr.shape[0]):
                for j in range(1, self.__token_arr[i].shape[0] - 1):
                    pair_dict.setdefault(
                        (self.__token_arr[i, j], self.__token_arr[i, j - 1]),
                        0)
                    pair_dict[(self.__token_arr[i, j],
                               self.__token_arr[i, j - 1])] += 1
                    pair_dict.setdefault(
                        (self.__token_arr[i, j], self.__token_arr[i, j + 1]),
                        0)
                    pair_dict[(self.__token_arr[i, j],
                               self.__token_arr[i, j + 1])] += 1
                    document_dict.setdefault(self.__token_arr[i], [])
                    for d in range(len(document_list)):
                        if self.__token_arr[i, j] in document_list[d]:
                            document_dict[self.__token_arr[i, j]].append(d)

        elif self.__token_arr.ndim == 1:
            for i in range(1, self.__token_arr.shape[0] - 1):
                pair_dict.setdefault(
                    (self.__token_arr[i], self.__token_arr[i - 1]), 0)
                pair_dict[(self.__token_arr[i], self.__token_arr[i - 1])] += 1
                pair_dict.setdefault(
                    (self.__token_arr[i], self.__token_arr[i + 1]), 0)
                pair_dict[(self.__token_arr[i], self.__token_arr[i + 1])] += 1

                document_dict.setdefault(self.__token_arr[i], [])
                for d in range(len(document_list)):
                    if self.__token_arr[i] in document_list[d]:
                        document_dict[self.__token_arr[i]].append(d)
        else:
            raise ValueError()

        token_list = list(set(self.__token_arr.ravel().tolist()))

        token_arr = np.zeros((len(token_list), len(token_list)))
        pair_arr = np.zeros((len(token_list), len(token_list)))
        document_arr = np.zeros((len(token_list), len(document_list)))
        for i in range(token_arr.shape[0]):
            for j in range(token_arr.shape[0]):
                try:
                    pair_arr[i, j] = pair_dict[(token_list[i], token_list[j])]
                    token_arr[i, j] = 1.0
                except:
                    pass

            if len(document_list) > 0:
                if token_list[i] in document_dict:
                    for d in document_dict[token_list[i]]:
                        document_arr[i, d] = 1.0

        pair_arr = np.exp(pair_arr - pair_arr.max())
        pair_arr = pair_arr / pair_arr.sum()
        pair_arr = (pair_arr - pair_arr.mean()) / (pair_arr.std() + 1e-08)
        if len(document_list) > 0:
            document_arr = (document_arr -
                            document_arr.mean()) / (document_arr.std() + 1e-08)

            token_arr = np.c_[pair_arr, document_arr]
            token_arr = (token_arr - token_arr.mean()) / (token_arr.std() +
                                                          1e-08)

        self.__dbm = StackedAutoEncoder(
            DBMMultiLayerBuilder(),
            [token_arr.shape[1], feature_dim, token_arr.shape[1]],
            [TanhFunction(), TanhFunction(),
             TanhFunction()],
            [ContrastiveDivergence(),
             ContrastiveDivergence()],
            learning_rate=learning_rate)
        self.__dbm.learn(token_arr,
                         traning_count=traning_count,
                         batch_size=batch_size,
                         sgd_flag=True)
        feature_points_arr = self.__dbm.feature_points_arr
        self.__token_arr = token_arr
        self.__token_list = token_list
        self.__feature_points_arr = feature_points_arr