Exemple #1
0
class NMFImpl():

    def __init__(self, n_components=None, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=200, random_state=None, alpha=0.0, l1_ratio=0.0, verbose=0, shuffle=False):
        self._hyperparams = {
            'n_components': n_components,
            'init': init,
            'solver': solver,
            'beta_loss': beta_loss,
            'tol': tol,
            'max_iter': max_iter,
            'random_state': random_state,
            'alpha': alpha,
            'l1_ratio': l1_ratio,
            'verbose': verbose,
            'shuffle': shuffle}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)
def a():
    data_matrix = numpy.ones((7, 5)) * 2.5
    data_matrix[0, 0] = data_matrix[0, 1] = data_matrix[0, 2] = 1
    data_matrix[1, 0] = data_matrix[1, 1] = data_matrix[1, 2] = 3
    data_matrix[2, 0] = data_matrix[2, 1] = data_matrix[2, 2] = 4
    data_matrix[3, 0] = data_matrix[3, 1] = data_matrix[3, 2] = 5
    data_matrix[4, 3] = data_matrix[4, 4] = 4
    data_matrix[4, 1] = 2
    data_matrix[5, 3] = data_matrix[5, 4] = 5
    data_matrix[6, 3] = data_matrix[6, 4] = 2
    print data_matrix
    W, H = factorize(data_matrix)
    print 'nimfa', numpy.dot(W, H)
    rating_vector = numpy.zeros((5, 1)) * 2.5
    rating_vector[0, 0] = 4
    result = numpy.dot(H, rating_vector)
    result = numpy.dot(numpy.transpose(H), result)
    print result
    svd = NMF(n_components=2);
    W = svd.fit_transform(data_matrix)
    H = svd.components_
    print numpy.dot(W, H)
    rating_vector = numpy.ones((5, 1)) * 2.5
    rating_vector[0, 0] = 4
    result = numpy.dot(H, rating_vector)
    result = numpy.dot(numpy.transpose(H), result)
    
    print result
    print result.max()
class NMFRecommender(BaseEstimator, RegressorMixin):
    def __init__(self, n_components):
        self.nmf = NMF(n_components=2, init='random', random_state=0)
        self.user_ids_dict = {}
        self.book_isbns_dict = {}

    def fit(self, X, y=None):
        self.sparse_matrix = X['sparse_matrix']
        self.user_ids_dict = X['user_ids_dict']
        self.book_isbns_dict = X['book_isbns_dict']
        self.nmf.fit(X['sparse_matrix'])

    def predict(self, X, y=None):
        ratings = X['ratings']
        user_representations = self.nmf.transform(self.sparse_matrix)
        book_representations = self.nmf.components_
        estimations = []
        for i in tqdm(range(len(ratings))):
            estimation = np.dot(
                user_representations[self.user_ids_dict[
                    ratings.iloc[i]['User-ID']]], book_representations)[
                        self.book_isbns_dict[ratings.iloc[i]['ISBN']]]
            estimations.append(estimation)
        return estimations

    def fit_predict(self, X, y=None):
        self.fit(X, y)
        return self.predict(X, y)
 def factorize_data_matrix(self):
     self.logger.info("Going to factorize matrix")
     DM_W_FILE = '../../resources/scikit_dm_W.npy'
     DM_H_FILE = '../../resources/scikit_dm_H.npy'
     if os.path.isfile(DM_W_FILE) and os.path.isfile(DM_H_FILE):
         self.W = numpy.load(DM_W_FILE)
         self.H = numpy.load(DM_H_FILE)
     else:
         svd = NMF(n_components=25);
         self.W = svd.fit_transform(self.data_matrix)
         self.H = svd.components_
         self.rmse()
         numpy.save(DM_W_FILE, self.W)
         numpy.save(DM_H_FILE, self.H)
     return self.W, self.H
Exemple #5
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Exemple #6
0
 def __init__(self,
              n_components=None,
              init=None,
              solver='cd',
              beta_loss='frobenius',
              tol=0.0001,
              max_iter=200,
              random_state=None,
              alpha=0.0,
              l1_ratio=0.0,
              verbose=0,
              shuffle=False):
     self._hyperparams = {
         'n_components': n_components,
         'init': init,
         'solver': solver,
         'beta_loss': beta_loss,
         'tol': tol,
         'max_iter': max_iter,
         'random_state': random_state,
         'alpha': alpha,
         'l1_ratio': l1_ratio,
         'verbose': verbose,
         'shuffle': shuffle
     }
     self._wrapped_model = Op(**self._hyperparams)
Exemple #7
0
def nmf(data, components):
    nmf = NMF(n_components=components, init="random", random_state=0)
    W = nmf.fit_transform(data)
    H = nmf.components_
    R = np.dot(W, H)
    return W
Exemple #8
0
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
			'MaxAbsScaler':MaxAbsScaler(),
			'MeanShift':MeanShift(),
			'MinCovDet':MinCovDet(),
			'MinMaxScaler':MinMaxScaler(),
			'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(),
			'MiniBatchKMeans':MiniBatchKMeans(),
			'MiniBatchSparsePCA':MiniBatchSparsePCA(),
			'MultiTaskElasticNet':MultiTaskElasticNet(),
			'MultiTaskElasticNetCV':MultiTaskElasticNetCV(),
			'MultiTaskLasso':MultiTaskLasso(),
			'MultiTaskLassoCV':MultiTaskLassoCV(),
			'MultinomialNB':MultinomialNB(),
			'NMF':NMF(),
			'NearestCentroid':NearestCentroid(),
			'NearestNeighbors':NearestNeighbors(),
			'Normalizer':Normalizer(),
			'NuSVC':NuSVC(),
			'NuSVR':NuSVR(),
			'Nystroem':Nystroem(),
			'OAS':OAS(),
			'OneClassSVM':OneClassSVM(),
			'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(),
			'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(),
			'PCA':PCA(),
			'PLSCanonical':PLSCanonical(),
			'PLSRegression':PLSRegression(),
			'PLSSVD':PLSSVD(),
			'PassiveAggressiveClassifier':PassiveAggressiveClassifier(),
 def __init__(self, n_components):
     self.nmf = NMF(n_components=2, init='random', random_state=0)
     self.user_ids_dict = {}
     self.book_isbns_dict = {}
Exemple #10
0
def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
    timeset = defaultdict(lambda: [])
    err = defaultdict(lambda: [])

    for n_samples in samples_range:
        for n_features in features_range:
            print("%2d samples, %2d features" % (n_samples, n_features))
            print('=======================')
            X = np.abs(
                make_low_rank_matrix(n_samples,
                                     n_features,
                                     effective_rank=rank,
                                     tail_strength=0.2))

            gc.collect()
            print("benchmarking nndsvd-nmf: ")
            tstart = time()
            m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
            tend = time() - tstart
            timeset['nndsvd-nmf'].append(tend)
            err['nndsvd-nmf'].append(m.reconstruction_err_)
            report(m.reconstruction_err_, tend)

            gc.collect()
            print("benchmarking nndsvda-nmf: ")
            tstart = time()
            m = NMF(n_components=30, init='nndsvda', tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvda-nmf'].append(tend)
            err['nndsvda-nmf'].append(m.reconstruction_err_)
            report(m.reconstruction_err_, tend)

            gc.collect()
            print("benchmarking nndsvdar-nmf: ")
            tstart = time()
            m = NMF(n_components=30, init='nndsvdar', tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvdar-nmf'].append(tend)
            err['nndsvdar-nmf'].append(m.reconstruction_err_)
            report(m.reconstruction_err_, tend)

            gc.collect()
            print("benchmarking random-nmf")
            tstart = time()
            m = NMF(n_components=30,
                    init='random',
                    max_iter=1000,
                    tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['random-nmf'].append(tend)
            err['random-nmf'].append(m.reconstruction_err_)
            report(m.reconstruction_err_, tend)

            gc.collect()
            print("benchmarking alt-random-nmf")
            tstart = time()
            W, H = alt_nnmf(X, r=30, init='random', tol=tolerance)
            tend = time() - tstart
            timeset['alt-random-nmf'].append(tend)
            err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
            report(norm(X - np.dot(W, H)), tend)

    return timeset, err
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
    it = 0
    timeset = defaultdict(lambda: [])
    err = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            X = np.abs(
                make_low_rank_matrix(n_samples,
                                     n_features,
                                     effective_rank=rank,
                                     tail_strength=0.2))

            gc.collect()
            print "benching nndsvd-nmf: "
            tstart = time()
            m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
            tend = time() - tstart
            timeset['nndsvd-nmf'].append(tend)
            err['nndsvd-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvda-nmf: "
            tstart = time()
            m = NMF(n_components=30, init='nndsvda', tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvda-nmf'].append(tend)
            err['nndsvda-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching nndsvdar-nmf: "
            tstart = time()
            m = NMF(n_components=30, init='nndsvdar', tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['nndsvdar-nmf'].append(tend)
            err['nndsvdar-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching random-nmf"
            tstart = time()
            m = NMF(n_components=30, init=None, max_iter=1000,
                    tol=tolerance).fit(X)
            tend = time() - tstart
            timeset['random-nmf'].append(tend)
            err['random-nmf'].append(m.reconstruction_err_)
            print m.reconstruction_err_, tend

            gc.collect()
            print "benching alt-random-nmf"
            tstart = time()
            W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
            tend = time() - tstart
            timeset['alt-random-nmf'].append(tend)
            err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
            print np.linalg.norm(X - np.dot(W, H)), tend

    return timeset, err
Exemple #12
0
scorer = make_scorer(score_func=singleLabelScore, greater_is_better=False)

# PREPROCESSING
# SCALING
minMaxScaler = MinMaxScaler(feature_range=(0.0, 1.0))
#normalizer = skprep.Normalizer()
columnDeleter = fs.FeatureDeleter()

# FEATURE SELECTION
varianceThresholdSelector = VarianceThreshold(threshold=(0))
percentileSelector = SelectPercentile(score_func=f_classif, percentile=20)
kBestSelector = SelectKBest(f_classif, 1000)

# FEATURE EXTRACTION
#rbmPipe = skpipe.Pipeline(steps=[('scaling', minMaxScaler), ('rbm', rbm)])
nmf = NMF(n_components=150)
pca = PCA(n_components=80)
sparse_pca = SparsePCA(n_components=700, max_iter=3, verbose=2)
kernel_pca = KernelPCA(n_components=150)  # Costs huge amounts of ram
randomized_pca = RandomizedPCA(n_components=500)

# REGRESSORS
random_forest_regressor = RandomForestRegressor(n_estimators=256)
gradient_boosting_regressor = GradientBoostingRegressor(n_estimators=60)
support_vector_regressor = svm.SVR()

# CLASSIFIERS
support_vector_classifier = svm.SVC(probability=True, verbose=True)
linear_support_vector_classifier = svm.LinearSVC(dual=False)
nearest_neighbor_classifier = KNeighborsClassifier()
extra_trees_classifier = ExtraTreesClassifier(n_estimators=256)