def compare_learning_rate(version='100k', n_jobs=1, random_state=0):
    if version in ['100k', '1m', '10m']:
        X = load_movielens(version)
        X_tr, X_te = train_test_split(X,
                                      train_size=0.75,
                                      random_state=random_state)
        X_tr = X_tr.tocsr()
        X_te = X_te.tocsr()
    elif version is 'netflix':
        X_tr = load(expanduser('~/spira_data/nf_prize/X_tr.pkl'))
        X_te = load(expanduser('~/spira_data/nf_prize/X_te.pkl'))
    mf = DictMF(
        n_components=30,
        n_epochs=20 if version == '10m' else 7,
        alpha=0.1373823795883263 if version == '10m' else 0.16681005372000587,
        verbose=5,
        batch_size=600 if version == '10m' else 4000,
        normalize=True,
        fit_intercept=True,
        random_state=0,
        learning_rate=.75,
        impute=False,
        partial=True,
        backend='c')
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H' '-%M-%S')
    subdir = 'learning_rate'
    output_dir = expanduser(join('~/output/recommender/', timestamp, subdir))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    results = {}
    par_res = Parallel(n_jobs=n_jobs, max_nbytes=None)(
        delayed(single_learning_rate)(mf, learning_rate, X_tr, X_te)
        for learning_rate in np.linspace(0.5, 1, 10))

    for i, learning_rate in enumerate(np.linspace(0.5, 1, 10)):
        results[learning_rate] = par_res[i]
    with open(join(output_dir, 'results_%s.json' % version), 'w+') as f:
        json.dump(results, f)
def compare_learning_rate(version='100k', n_jobs=1, random_state=0):
    if version in ['100k', '1m', '10m']:
        X = load_movielens(version)
        X_tr, X_te = train_test_split(X, train_size=0.75,
                                      random_state=random_state)
        X_tr = X_tr.tocsr()
        X_te = X_te.tocsr()
    elif version is 'netflix':
        X_tr = load(expanduser('~/spira_data/nf_prize/X_tr.pkl'))
        X_te = load(expanduser('~/spira_data/nf_prize/X_te.pkl'))
    mf = DictMF(n_components=30, n_epochs=20 if version == '10m' else 7,
                alpha=0.1373823795883263 if version == '10m' else 0.16681005372000587,
                verbose=5,
                batch_size=600 if version == '10m' else 4000,
                normalize=True,
                fit_intercept=True,
                random_state=0,
                learning_rate=.75,
                impute=False,
                partial=True,
                backend='c')
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H'
                                                 '-%M-%S')
    subdir = 'learning_rate'
    output_dir = expanduser(join('~/output/recommender/', timestamp, subdir))

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    results = {}
    par_res = Parallel(n_jobs=n_jobs, max_nbytes=None)(
        delayed(single_learning_rate)(mf, learning_rate, X_tr, X_te) for
        learning_rate in np.linspace(0.5, 1, 10 ))

    for i, learning_rate in enumerate(np.linspace(0.5, 1, 10)):
        results[learning_rate] = par_res[i]
    with open(join(output_dir, 'results_%s.json' % version), 'w+') as f:
        json.dump(results, f)
        loss = 0.5 * error(self.X_tr, mf.P_, mf.Q_)
        regul = 0.5 * mf.alpha * (sqnorm(mf.P_) + sqnorm(mf.Q_))
        self.obj.append(loss + regul)

        X_pred = mf.predict(self.X_te)
        self.f1.append(f1_score(self.X_te, X_pred))

        self.test_time += time.clock() - test_time
        self.times.append(time.clock() -  self.start_time - self.test_time)

try:
    version = sys.argv[1]
except:
    version = "100k"

X = load_movielens(version)
print(X.shape)

# Binarize and pretend this is implicit feedback.
cond = X.data > X.data.mean()
X.data[cond] = 1
X.data[~cond] = 0

X_tr, X_te = train_test_split(X, train_size=0.75, random_state=0)
X_tr = X_tr.tocsr()
X_te = X_te.tocsr()

cb = Callback(X_tr, X_te)
mf = ImplicitMF(n_components=30, max_iter=50, alpha=0.1, callback=cb,
                random_state=0)
mf.fit(X_tr)
        dump(estimator, join(debug_folder, 'estimator'), compress=9)

    return score


output_dir = expanduser(join('~/output/dl_recommender/',
                             datetime.datetime.now().strftime('%Y-%m-%d_%H'
                                                              '-%M-%S'))
                        )
os.makedirs(output_dir)

random_state = check_random_state(0)
mem = Memory(cachedir=expanduser("~/cache"), verbose=10)
# X_csr = mem.cache(fetch_ml_10m)(expanduser('~/data/own/ml-10M100K'),
#                                 remove_empty=True)
X_csr = sp.csr_matrix(load_movielens('1m'))
permutation = random_state.permutation(X_csr.shape[0])

X_csr = X_csr[permutation]

X, y = array_to_fm_format(X_csr)

uniform_split = ShuffleSplit(n_iter=1,
                             test_size=.25, random_state=random_state)

fm_decoder = FMDecoder(n_samples=X_csr.shape[0], n_features=X_csr.shape[1])

base_estimator = BaseRecommender(fm_decoder)

dl_list = [DLRecommender(fm_decoder,
                         n_components=n_components,
def main(version='100k', n_jobs=1, random_state=0, cross_val=False):
    dl_params = {}
    dl_params['100k'] = dict(learning_rate=1, batch_size=10, offset=0, alpha=1)
    dl_params['1m'] = dict(learning_rate=.75,
                           batch_size=60,
                           offset=0,
                           alpha=.8)
    dl_params['10m'] = dict(learning_rate=.75,
                            batch_size=600,
                            offset=0,
                            alpha=3)
    dl_params['netflix'] = dict(learning_rate=.8,
                                batch_size=4000,
                                offset=0,
                                alpha=0.16)
    cd_params = {
        '100k': dict(alpha=.1),
        '1m': dict(alpha=.03),
        '10m': dict(alpha=.04),
        'netflix': dict(alpha=.1)
    }

    if version in ['100k', '1m', '10m']:
        X = load_movielens(version)
        X_tr, X_te = train_test_split(X,
                                      train_size=0.75,
                                      random_state=random_state)
        X_tr = X_tr.tocsr()
        X_te = X_te.tocsr()
    elif version is 'netflix':
        X_tr = load(expanduser('~/spira_data/nf_prize/X_tr.pkl'))
        X_te = load(expanduser('~/spira_data/nf_prize/X_te.pkl'))

    cd_mf = ExplicitMF(
        n_components=60,
        max_iter=50,
        alpha=.1,
        normalize=True,
        verbose=1,
    )
    dl_mf = DictMF(n_components=30,
                   n_epochs=20,
                   alpha=1.17,
                   verbose=5,
                   batch_size=10000,
                   normalize=True,
                   fit_intercept=True,
                   random_state=0,
                   learning_rate=.75,
                   impute=False,
                   partial=False,
                   backend='python')
    dl_mf_partial = DictMF(n_components=60,
                           n_epochs=20,
                           alpha=1.17,
                           verbose=5,
                           batch_size=10000,
                           normalize=True,
                           fit_intercept=True,
                           random_state=0,
                           learning_rate=.75,
                           impute=False,
                           partial=True,
                           backend='python')

    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H' '-%M-%S')
    if cross_val:
        subdir = 'benches_ncv'
    else:
        subdir = 'benches'
    output_dir = expanduser(join('~/output/recommender/', timestamp, subdir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    alphas = np.logspace(-2, 1, 10)
    mf_list = [dl_mf_partial]
    dict_id = {cd_mf: 'cd', dl_mf: 'dl', dl_mf_partial: 'dl_partial'}
    names = {
        'cd': 'Coordinate descent',
        'dl': 'Proposed online masked MF',
        'dl_partial': 'Proposed algorithm (with partial projection)'
    }

    if os.path.exists(
            join(output_dir, 'results_%s_%s.json' % (version, random_state))):
        with open(
                join(output_dir,
                     'results_%s_%s.json' % (version, random_state)),
                'r') as f:
            results = json.load(f)
    else:
        results = {}

    for mf in mf_list:
        results[dict_id[mf]] = {}
        if not cross_val:
            if isinstance(mf, DictMF):
                mf.set_params(
                    learning_rate=dl_params[version]['learning_rate'],
                    batch_size=dl_params[version]['batch_size'],
                    alpha=dl_params[version]['alpha'])
            else:
                mf.set_params(alpha=cd_params[version]['alpha'])
        else:
            if isinstance(mf, DictMF):
                mf.set_params(
                    learning_rate=dl_params[version]['learning_rate'],
                    batch_size=dl_params[version]['batch_size'])
            if version != 'netflix':
                cv = ShuffleSplit(n_iter=3, train_size=0.66, random_state=0)
                mf_scores = Parallel(n_jobs=n_jobs, verbose=10)(
                    delayed(single_fit)(mf, alpha, X_tr, cv)
                    for alpha in alphas)
            else:
                mf_scores = Parallel(n_jobs=n_jobs, verbose=10)(
                    delayed(single_fit)(mf, alpha, X_tr, X_te, nested=False)
                    for alpha in alphas)
            mf_scores = np.array(mf_scores).mean(axis=1)
            best_alpha_arg = mf_scores.argmin()
            best_alpha = alphas[best_alpha_arg]
            mf.set_params(alpha=best_alpha)

        cb = Callback(X_tr, X_te, refit=False)
        mf.set_params(callback=cb)
        mf.fit(X_tr)
        results[dict_id[mf]] = dict(name=names[dict_id[mf]],
                                    time=cb.times,
                                    rmse=cb.rmse)
        if cross_val:
            results[dict_id[mf]]['alphas'] = alphas.tolist()
            results[dict_id[mf]]['cv_alpha'] = mf_scores.tolist()
            results[dict_id[mf]]['best_alpha'] = mf.alpha

        with open(
                join(output_dir,
                     'results_%s_%s.json' % (version, random_state)),
                'w+') as f:
            json.dump(results, f)

        print('Done')
def main(version='100k', n_jobs=1, random_state=0, cross_val=False):
    dl_params = {}
    dl_params['100k'] = dict(learning_rate=1, batch_size=10, offset=0, alpha=1)
    dl_params['1m'] = dict(learning_rate=.75, batch_size=60, offset=0,
                           alpha=.8)
    dl_params['10m'] = dict(learning_rate=.75, batch_size=600, offset=0,
                            alpha=3)
    dl_params['netflix'] = dict(learning_rate=.8, batch_size=4000, offset=0,
                                alpha=0.16)
    cd_params = {'100k': dict(alpha=.1), '1m': dict(alpha=.03),
                 '10m': dict(alpha=.04),
                 'netflix': dict(alpha=.1)}

    if version in ['100k', '1m', '10m']:
        X = load_movielens(version)
        X_tr, X_te = train_test_split(X, train_size=0.75,
                                      random_state=random_state)
        X_tr = X_tr.tocsr()
        X_te = X_te.tocsr()
    elif version is 'netflix':
        X_tr = load(expanduser('~/spira_data/nf_prize/X_tr.pkl'))
        X_te = load(expanduser('~/spira_data/nf_prize/X_te.pkl'))

    cd_mf = ExplicitMF(n_components=60, max_iter=50, alpha=.1, normalize=True,
                       verbose=1, )
    dl_mf = DictMF(n_components=30, n_epochs=20, alpha=1.17, verbose=5,
                   batch_size=10000, normalize=True,
                   fit_intercept=True,
                   random_state=0,
                   learning_rate=.75,
                   impute=False,
                   partial=False,
                   backend='python')
    dl_mf_partial = DictMF(n_components=60, n_epochs=20, alpha=1.17, verbose=5,
                           batch_size=10000, normalize=True,
                           fit_intercept=True,
                           random_state=0,
                           learning_rate=.75,
                           impute=False,
                           partial=True,
                           backend='python')

    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H'
                                                 '-%M-%S')
    if cross_val:
        subdir = 'benches_ncv'
    else:
        subdir = 'benches'
    output_dir = expanduser(join('~/output/recommender/', timestamp, subdir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    alphas = np.logspace(-2, 1, 10)
    mf_list = [dl_mf_partial]
    dict_id = {cd_mf: 'cd', dl_mf: 'dl', dl_mf_partial: 'dl_partial'}
    names = {'cd': 'Coordinate descent', 'dl': 'Proposed online masked MF',
             'dl_partial': 'Proposed algorithm (with partial projection)'}

    if os.path.exists(join(output_dir, 'results_%s_%s.json' % (version,
                                                               random_state))):
        with open(join(output_dir, 'results_%s_%s.json' % (version,
                                                           random_state)),
                  'r') as f:
            results = json.load(f)
    else:
        results = {}

    for mf in mf_list:
        results[dict_id[mf]] = {}
        if not cross_val:
            if isinstance(mf, DictMF):
                mf.set_params(
                    learning_rate=dl_params[version]['learning_rate'],
                    batch_size=dl_params[version]['batch_size'],
                    alpha=dl_params[version]['alpha'])
            else:
                mf.set_params(alpha=cd_params[version]['alpha'])
        else:
            if isinstance(mf, DictMF):
                mf.set_params(
                    learning_rate=dl_params[version]['learning_rate'],
                    batch_size=dl_params[version]['batch_size'])
            if version != 'netflix':
                cv = ShuffleSplit(n_iter=3, train_size=0.66, random_state=0)
                mf_scores = Parallel(n_jobs=n_jobs, verbose=10)(
                    delayed(single_fit)(mf, alpha, X_tr, cv) for alpha in
                    alphas)
            else:
                mf_scores = Parallel(n_jobs=n_jobs, verbose=10)(
                    delayed(single_fit)(mf, alpha, X_tr, X_te,
                                        nested=False) for alpha in alphas)
            mf_scores = np.array(mf_scores).mean(axis=1)
            best_alpha_arg = mf_scores.argmin()
            best_alpha = alphas[best_alpha_arg]
            mf.set_params(alpha=best_alpha)

        cb = Callback(X_tr, X_te, refit=False)
        mf.set_params(callback=cb)
        mf.fit(X_tr)
        results[dict_id[mf]] = dict(name=names[dict_id[mf]],
                                    time=cb.times,
                                    rmse=cb.rmse)
        if cross_val:
            results[dict_id[mf]]['alphas'] = alphas.tolist()
            results[dict_id[mf]]['cv_alpha'] = mf_scores.tolist()
            results[dict_id[mf]]['best_alpha'] = mf.alpha

        with open(join(output_dir, 'results_%s_%s.json' % (version,
                                                           random_state)),
                  'w+') as f:
            json.dump(results, f)

        print('Done')