Exemple #1
0
 def __createDBN__(self):
     self.FM = UnsupervisedDBN(hidden_layers_structure=self.dbn_layers,
                               batch_size=512,
                               learning_rate_rbm=0.3,
                               n_epochs_rbm=64,
                               activation_function='sigmoid',
                               verbose=False)
Exemple #2
0
def main():
    # 0. read data and splite it to 80% for training and 20% for testing
    items = pd.read_csv('input/items.csv', sep=';', encoding='ISO-8859-1')
    
    print items.shape
    
    items_train, items_test = train_test_split(items, train_size=0.8, random_state=0)
    
    print items_train.shape, items_test.shape


    # 1. train tf-idf model and save it under model/tf-idf-model.pickle with the result
    if not os.path.isfile('model/tfidf_model.pickle'):
        print('traning tf-idf model ...')
        tfidf_model = TfidfVectorizer(norm='l2',min_df=0, use_idf=True,max_features=5000, smooth_idf=False, sublinear_tf=True, tokenizer=tokeniser)
        item_feature_matrix = tfidf_model.fit_transform(items_train['movie desription'].values.astype('U'))
        print('#1. dimension of the item-feature matrix', item_feature_matrix.shape)

        # 1.1 saving tf-idf model
        print('Saving tf-idf model ...')
        save_model('model/tfidf_model.pickle', tfidf_model)
    
    if not os.path.isfile('result/item_feature_matrix.pickle'):
        # 1.2. saving tf-idf matrix result
        print('Saving tf-idf matrix result ...')
        save_model('result/item_feature_matrix.pickle', item_feature_matrix)

    # 2. train dbn model and save the model into model/dbn.pickle
    # 2.1. load tf-idf result
    print('loading item feature matrix ...')
    item_feature_matrix = load_model('result/item_feature_matrix.pickle')
    
    if not os.path.isfile('model/dbn-model.pkl'):
        dbn = UnsupervisedDBN(hidden_layers_structure=[5000, 400],
                              batch_size=10,
                              learning_rate_rbm=0.06,
                              n_epochs_rbm=20,
                              activation_function='sigmoid')
        # 2.2. fit dbn model
        dbn.fit(item_feature_matrix.A)
        # 2.3. save dbn model
        print('saving DBN model ...')
        dbn.save('model/dbn-model.pkl')
        
    print('Loadin DBN model')
    dbn = UnsupervisedDBN.load('model/dbn-model.pkl')

    # 3. Clustering with k-mens and save model and results
    if not os.path.isfile('model/kmeans-model.pkl'):
        kmeans = KMeans(n_clusters=5, random_state=0).fit(dbn.transform(item_feature_matrix.A))
        print('saving k-means model ...')
        save_model('model/kmeans-model.pkl', kmeans)
    else:
        kmeans = load_model('model/kmeans-model.pkl')
    
    print(kmeans.labels_)
Exemple #3
0
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('foo.png')

# reducing the dimension of the feature vector space from (12*186) to (12*8)
X_with_reduced_dimension = loaded_encoder.predict(X)
print('Now the dimensions of the encoded input feature space is -- ',
      X_with_reduced_dimension.shape)

if not LOAD_MODEL:
    # Creating the classifier for the reduced dimension input to classify
    # the Clasifier is a hybrid model which comprises of Unsupervised DBN followed by a SVM Classfier to predict the label
    svm = SVC()
    dbn = UnsupervisedDBN(hidden_layers_structure=[512, 256, 256, 512, 256],
                          batch_size=10,
                          learning_rate_rbm=0.06,
                          n_epochs_rbm=20,
                          activation_function='relu')
    classifier = Pipeline(steps=[('dbn', dbn), ('svm', svm)])

    ena = classifier.fit(X_with_reduced_dimension, y)

    f = open("pickled/DBNClassifier.pkl", "wb")
    pickle.dump(classifier, f)
    f.close()

f = open("pickled/DBNClassifier.pkl", "rb")
classifier = pickle.load(f)
f.close()

# X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size=0.8)
X, Y = nudge_dataset(X, digits.target)
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)

print('X_train size : {0} \n'.format(X_train.shape))
print('X_test size : {0} \n'.format(X_test.shape))

# Models we will use
dbn = UnsupervisedDBN(hidden_layers_structure=[256, 512],
                      batch_size=32,
                      learning_rate_rbm=0.06,
                      learning_rate_backprop=1e-3,
                      n_epochs_rbm=50,
                      n_epochs_fine_tune=500,
                      activation_function='sigmoid',
                      contrastive_divergence_iter=1)

###############################################################################

# Training RBM-Logistic Pipeline
dbn.fit(X_train)
# Save the training metrics
for layer_wise_error, index in zip(dbn.layer_wise_error,
                                   range(len(dbn.layer_wise_error))):
    with io.open("layer_" + str(index), 'wb') as f:
        pickle.dump(layer_wise_error, f)

# Fine tune the DBN using the reconstruction MSE (over pixels)
Exemple #5
0
X_test = mnist_data.test.images
Y_train = mnist_data.train.labels
Y_test = mnist_data.test.labels
plt.imshow(X_train[200].reshape(28, 28))
n_epochs_rbm = 10

logistic_inverse_reg = 50.0  #regularization strength, the smaller, the stronger
logistic_inverse_reg_2 = 1

##Models we will use
logistic = linear_model.LogisticRegression(
    solver='newton-cg', multi_class='auto',
    C=logistic_inverse_reg)  #logistic regression
dbn = UnsupervisedDBN(hidden_layers_structure=[256, 512],
                      batch_size=100,
                      learning_rate_rbm=0.01,
                      n_epochs_rbm=n_epochs_rbm,
                      activation_function='sigmoid')

classifier = Pipeline(steps=[('dbn', dbn), ('logistic', logistic)])

##Training RBM-logistic pipeline
classifier.fit(X_train, Y_train)

##Training logistic regression
logistic_classifier = linear_model.LogisticRegression(solver='newton-cg',
                                                      multi_class='auto',
                                                      C=logistic_inverse_reg_2)
logistic_classifier.fit(X_train, Y_train)

##Save model
Exemple #6
0
class DBN_AE:
    def __init__(self,
                 n,
                 max_autoencoder_size=10,
                 FM_grace_period=None,
                 AD_grace_period=10000,
                 dbn_layers=[100, 75, 50, 35, 16],
                 learning_rate=0.1,
                 hidden_ratio=0.75,
                 feature_map=None):
        self.AD_grace_period = AD_grace_period
        self.FM_grace_period = FM_grace_period
        self.lr = learning_rate
        self.hr = hidden_ratio
        self.n = n

        self.n_trained = 0  # the number of training instances so far
        self.n_executed = 0  # the number of executed instances so far
        self.dbn_batch = 10000
        self.dbn_layers = dbn_layers
        self.AE_dim = dbn_layers[-1]
        self.__createDBN__()
        self.__createAE__()
        self.fvs = []
        print("Deep Belief Network: train-mode, Auto-Encoder: off-mode")

    def __createAE__(self):
        params = AE.dA_params(self.AE_dim,
                              n_hidden=0,
                              lr=self.lr,
                              corruption_level=0,
                              gracePeriod=0,
                              hiddenRatio=self.hr)
        self.AE = AE.dA(params)

    def __createDBN__(self):
        self.FM = UnsupervisedDBN(hidden_layers_structure=self.dbn_layers,
                                  batch_size=512,
                                  learning_rate_rbm=0.3,
                                  n_epochs_rbm=64,
                                  activation_function='sigmoid',
                                  verbose=False)

    def process(self, x):
        if self.n_trained < self.FM_grace_period + self.AD_grace_period:
            self.train(x)
            return 0.0
        else:
            return self.execute(x)

    def train_FM(self, x):
        self.fvs.append(x)
        if len(self.fvs) == self.dbn_batch:
            xx = np.array(self.fvs)
            self.FM.fit(xx)
            self.fvs.clear()

    def train(self, x):
        if self.n_trained < self.FM_grace_period:
            self.train_FM(x)
        else:
            S_l1 = self.FM.transform(x)
            self.AE.train(S_l1)
        self.n_trained += 1
        if self.n_trained == self.AD_grace_period + self.FM_grace_period:
            print(
                "Deep Belief Network: execute-mode, Auto-Encoder: train-mode")

    def execute(self, x):
        self.n_executed += 1
        S_l1 = self.FM.transform(x)
        return self.AE.execute(S_l1)
Exemple #7
0
#json_file = open("encoder_advanced.json","r")
#encoder_model_json = json_file.read()
#json_file.close()

#encoder_model = model_from_json(encoder_model_json)
#encoder_model.load_weights("model_advanced.h5")
#print("loaded model from disk")

#encoder_model.compile(loss="binary_crossentropy",optimizer="adam")

#X_encoded_train = encoder_model.predict(X_train)
#X_encoded_test = encoder_model.predict(X_test)
dbn = UnsupervisedDBN(hidden_layers_structure=[50, 100, 200],
                      batch_size=200,
                      learning_rate_rbm=0.06,
                      n_epochs_rbm=10,
                      activation_function="sigmoid")
sae = StackedAutoEncoder(network_architecture=[100, 50],
                         batch_size=200,
                         learning_rate_ae=0.001,
                         n_epochs_ae=1,
                         activation_function="sigmoid")

RFClassifier = RandomForestClassifier(n_estimators=25)
classifier_dbn = Pipeline(steps=[('dbn', dbn), ('rfc', RFClassifier)])
classifier_dbn.fit(X_train, Y_train)
print("Random Forest Classification using DBN features:\n%s\n" %
      (metrics.classification_report(Y_test, classifier_dbn.predict(X_test))))

print()
Exemple #8
0
                       header=None,
                       encoding='ISO-8859-1')
u_item_DF['movie desription'] = [val[2] for i, val in data_new.iterrows()]

sklearn_tfidf = TfidfVectorizer(norm='l2',
                                min_df=0,
                                use_idf=True,
                                max_features=5000,
                                smooth_idf=False,
                                sublinear_tf=True,
                                tokenizer=tokeniser)
item_feature_matrix = sklearn_tfidf.fit_transform(
    u_item_DF['movie desription'].values.astype('U'))
print('dimension of the item-feature matrix', item_feature_matrix.shape)

# Train DBN model
from dbn.models import UnsupervisedDBN

#[4604, 2000, 4000, 3000, 1000]
dbn = UnsupervisedDBN(hidden_layers_structure=[5000, 400],
                      batch_size=10,
                      learning_rate_rbm=0.06,
                      n_epochs_rbm=20,
                      activation_function='sigmoid')
dbn.fit(item_feature_matrix.A)

# Save the model
print('Saving Model ...')
dbn.save('model-1.pkl')
print('Model Saved')
Exemple #9
0
Y = Y.T[0]

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)

##########################################################################################

### RBM-Logistic Models we will use #######################################################3
logistic = linear_model.LogisticRegression()
dbn = UnsupervisedDBN(hidden_layers_structure=[500, 500],
                      batch_size=32,
                      learning_rate_rbm=0.05,
                      n_epochs_rbm=10,
                      activation_function='relu')

classifier = Pipeline(steps=[('dbn', dbn), ('logistic', logistic)])

classifier.fit(X_train, Y_train)

DBNprediction = classifier.predict(X_test)

###############################################################################
# Evaluation

print("Logistic regression using RBM features:\n%s\n" %
      (classification_report(Y_test, DBNprediction)))
Exemple #10
0
def main(tfidfModel=None, tfidfMatrix=None, dbn_model=None, kmeans_model=None):
    # 0. read data and splite it to 80% for training and 20% for testing
    items_info = pd.read_csv('input/items.csv', sep=';', encoding='ISO-8859-1')
    u_base1 = pd.read_csv('input/u1.base', sep='\t', header=None)
    train = pd.DataFrame(u_base1[1].drop_duplicates())
    u_test1 = pd.read_csv('input/u1.test', sep='\t', header=None)
    test = pd.DataFrame(u_test1[1].drop_duplicates())

    train_desc = [
        items_info[items_info['movie id'] == df[1]]
        ['movie desription'].values[0] for i, df in train.iterrows()
    ]
    test_desc = [
        items_info[items_info['movie id'] == df[1]]
        ['movie desription'].values[0] for i, df in test.iterrows()
    ]

    # 1. train tf-idf model and save it under model/tf-idf-model.pickle with the result
    if not tfidfModel:
        print('traning tf-idf model ...')
        tfidf_model = TfidfVectorizer(norm='l2',
                                      min_df=0,
                                      use_idf=True,
                                      max_features=5000,
                                      smooth_idf=False,
                                      sublinear_tf=True,
                                      tokenizer=tokeniser)
        tfidf_model.fit(train_desc)
        print('- Saving tf-idf model ...')
        save_model('model/tfidf_model.pickle', tfidf_model)
    else:
        print('# Loading tf-idf model ...')
        tfidf_model = load_model(tfidfModel)

    if not tfidfMatrix:
        item_feature_matrix = tfidf_model.transform(train_desc)
        # 1.2. saving tf-idf matrix result
        print('- Saving tf-idf matrix result ...')
        save_model('result/item_feature_matrix.pickle', item_feature_matrix)
    else:
        print('# Loading tf-idf matrix result ...')
        item_feature_matrix = load_model(tfidfMatrix)

    if not dbn_model:
        dbn = UnsupervisedDBN(hidden_layers_structure=[5000, 1000, 1000, 500],
                              batch_size=10,
                              learning_rate_rbm=0.06,
                              n_epochs_rbm=20,
                              activation_function='sigmoid')
        # 2.2. fit dbn model
        dbn.fit(item_feature_matrix.A)
        # 2.3. save dbn model
        print('saving DBN model ...')
        dbn.save('model/dbn-model.pkl')
    else:
        print('Loadin DBN model')
        dbn = UnsupervisedDBN.load(dbn_model)

    # 3. Clustering with k-mens and save model and results
    if not kmeans_model:
        kmeans = KMeans(n_clusters=5, random_state=0).fit(
            dbn.transform(item_feature_matrix.A))
        print('saving k-means model ...')
        save_model('model/kmeans-model.pkl', kmeans)
    else:
        print('loading k-means model ...')
        kmeans = load_model(kmeans_model)

    print("Done!")